Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/attrd/commands.c b/attrd/commands.c
index cfb6533392..459a2f17aa 100644
--- a/attrd/commands.c
+++ b/attrd/commands.c
@@ -1,768 +1,770 @@
/*
* Copyright (C) 2013 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <glib.h>
#include <crm/msg_xml.h>
#include <crm/cluster.h>
#include <crm/cib.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election.h>
#include <crm/cib/internal.h>
#include <internal.h>
#define ATTRD_PROTOCOL_VERSION "1"
int last_cib_op_done = 0;
char *peer_writer = NULL;
GHashTable *attributes = NULL;
typedef struct attribute_s {
char *uuid; /* TODO: Remove if at all possible */
char *id;
char *set;
GHashTable *values;
int update;
int timeout_ms;
bool changed;
bool unknown_peer_uuids;
mainloop_timer_t *timer;
char *user;
} attribute_t;
typedef struct attribute_value_s {
uint32_t nodeid;
gboolean is_remote;
char *nodename;
char *current;
char *requested;
char *stored;
} attribute_value_t;
void write_attribute(attribute_t *a);
void write_or_elect_attribute(attribute_t *a);
void attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter);
void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
void attrd_peer_remove(const char *host, const char *source);
static gboolean
send_attrd_message(crm_node_t * node, xmlNode * data)
{
crm_xml_add(data, F_TYPE, T_ATTRD);
crm_xml_add(data, F_ATTRD_IGNORE_LOCALLY, "atomic-version"); /* Tell older versions to ignore our messages */
crm_xml_add(data, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION);
crm_xml_add_int(data, F_ATTRD_WRITER, election_state(writer));
return send_cluster_message(node, crm_msg_attrd, data, TRUE);
}
static gboolean
attribute_timer_cb(gpointer data)
{
attribute_t *a = data;
crm_trace("Dampen interval expired for %s in state %d", a->id, election_state(writer));
write_or_elect_attribute(a);
return FALSE;
}
static void
free_attribute_value(gpointer data)
{
attribute_value_t *v = data;
free(v->nodename);
free(v->current);
free(v->requested);
free(v->stored);
free(v);
}
void
free_attribute(gpointer data)
{
attribute_t *a = data;
if(a) {
free(a->id);
free(a->set);
free(a->uuid);
free(a->user);
mainloop_timer_del(a->timer);
g_hash_table_destroy(a->values);
free(a);
}
}
xmlNode *
build_attribute_xml(
xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
const char *peer, uint32_t peerid, const char *value)
{
xmlNode *xml = create_xml_node(parent, __FUNCTION__);
crm_xml_add(xml, F_ATTRD_ATTRIBUTE, name);
crm_xml_add(xml, F_ATTRD_SET, set);
crm_xml_add(xml, F_ATTRD_KEY, uuid);
crm_xml_add(xml, F_ATTRD_USER, user);
crm_xml_add(xml, F_ATTRD_HOST, peer);
crm_xml_add_int(xml, F_ATTRD_HOST_ID, peerid);
crm_xml_add(xml, F_ATTRD_VALUE, value);
crm_xml_add_int(xml, F_ATTRD_DAMPEN, timeout_ms/1000);
return xml;
}
static attribute_t *
create_attribute(xmlNode *xml)
{
int dampen = 0;
const char *value = crm_element_value(xml, F_ATTRD_DAMPEN);
attribute_t *a = calloc(1, sizeof(attribute_t));
a->id = crm_element_value_copy(xml, F_ATTRD_ATTRIBUTE);
a->set = crm_element_value_copy(xml, F_ATTRD_SET);
a->uuid = crm_element_value_copy(xml, F_ATTRD_KEY);
a->values = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute_value);
#if ENABLE_ACL
crm_trace("Performing all %s operations as user '%s'", a->id, a->user);
a->user = crm_element_value_copy(xml, F_ATTRD_USER);
#endif
if(value) {
dampen = crm_get_msec(value);
crm_trace("Created attribute %s with delay %dms (%s)", a->id, dampen, value);
} else {
crm_trace("Created attribute %s with no delay", a->id);
}
if(dampen > 0) {
a->timeout_ms = dampen;
a->timer = mainloop_timer_add(strdup(a->id), a->timeout_ms, FALSE, attribute_timer_cb, a);
}
g_hash_table_replace(attributes, a->id, a);
return a;
}
void
attrd_client_message(crm_client_t *client, xmlNode *xml)
{
bool broadcast = FALSE;
static int plus_plus_len = 5;
const char *op = crm_element_value(xml, F_ATTRD_TASK);
if(safe_str_eq(op, "peer-remove")) {
const char *host = crm_element_value(xml, F_ATTRD_HOST);
crm_info("Client %s is requesting all values for %s be removed", client->name, host);
if(host) {
broadcast = TRUE;
}
} else if(safe_str_eq(op, "update")) {
attribute_t *a = NULL;
attribute_value_t *v = NULL;
char *key = crm_element_value_copy(xml, F_ATTRD_KEY);
char *set = crm_element_value_copy(xml, F_ATTRD_SET);
char *host = crm_element_value_copy(xml, F_ATTRD_HOST);
const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
const char *value = crm_element_value(xml, F_ATTRD_VALUE);
a = g_hash_table_lookup(attributes, attr);
if(host == NULL) {
crm_trace("Inferring host");
host = strdup(attrd_cluster->uname);
crm_xml_add(xml, F_ATTRD_HOST, host);
crm_xml_add_int(xml, F_ATTRD_HOST_ID, attrd_cluster->nodeid);
}
if (value) {
int offset = 1;
int int_value = 0;
int value_len = strlen(value);
if (value_len < (plus_plus_len + 2)
|| value[plus_plus_len] != '+'
|| (value[plus_plus_len + 1] != '+' && value[plus_plus_len + 1] != '=')) {
goto send;
}
if(a) {
v = g_hash_table_lookup(a->values, host);
}
if(v) {
int_value = char2score(v->current);
}
if (value[plus_plus_len + 1] != '+') {
const char *offset_s = value + (plus_plus_len + 2);
offset = char2score(offset_s);
}
int_value += offset;
if (int_value > INFINITY) {
int_value = INFINITY;
}
crm_info("Expanded %s=%s to %d", attr, value, int_value);
crm_xml_add_int(xml, F_ATTRD_VALUE, int_value);
}
send:
if(peer_writer == NULL && election_state(writer) != election_in_progress) {
crm_info("Starting an election to determine the writer");
election_vote(writer);
}
crm_info("Broadcasting %s[%s] = %s%s", attr, host, value, election_state(writer) == election_won?" (writer)":"");
broadcast = TRUE;
free(key);
free(set);
free(host);
}
if(broadcast) {
send_attrd_message(NULL, xml);
}
}
void
attrd_peer_message(crm_node_t *peer, xmlNode *xml)
{
int peer_state = 0;
const char *v = crm_element_value(xml, F_ATTRD_VERSION);
const char *op = crm_element_value(xml, F_ATTRD_TASK);
const char *election_op = crm_element_value(xml, F_CRM_TASK);
if(election_op) {
enum election_result rc = 0;
crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname);
rc = election_count_vote(writer, xml, TRUE);
switch(rc) {
case election_start:
free(peer_writer);
peer_writer = NULL;
election_vote(writer);
break;
case election_lost:
free(peer_writer);
peer_writer = strdup(peer->uname);
break;
default:
election_check(writer);
break;
}
return;
} else if(v == NULL) {
/* From the non-atomic version */
if(safe_str_eq(op, "update")) {
const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
crm_trace("Compatibility update of %s from %s", name, peer->uname);
attrd_peer_update(peer, xml, FALSE);
} else if(safe_str_eq(op, "flush")) {
const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
attribute_t *a = g_hash_table_lookup(attributes, name);
if(a) {
crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname);
write_or_elect_attribute(a);
}
} else if(safe_str_eq(op, "refresh")) {
GHashTableIter aIter;
attribute_t *a = NULL;
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname);
write_or_elect_attribute(a);
}
}
}
crm_element_value_int(xml, F_ATTRD_WRITER, &peer_state);
if(election_state(writer) == election_won
&& peer_state == election_won
&& safe_str_neq(peer->uname, attrd_cluster->uname)) {
crm_notice("Detected another attribute writer: %s", peer->uname);
election_vote(writer);
} else if(peer_state == election_won) {
if(peer_writer == NULL) {
peer_writer = strdup(peer->uname);
crm_notice("Recorded attribute writer: %s", peer->uname);
} else if(safe_str_neq(peer->uname, peer_writer)) {
crm_notice("Recorded new attribute writer: %s (was %s)", peer->uname, peer_writer);
free(peer_writer);
peer_writer = strdup(peer->uname);
}
}
if(safe_str_eq(op, "update")) {
attrd_peer_update(peer, xml, FALSE);
} else if(safe_str_eq(op, "sync")) {
attrd_peer_sync(peer, xml);
} else if(safe_str_eq(op, "peer-remove")) {
const char *host = crm_element_value(xml, F_ATTRD_HOST);
attrd_peer_remove(host, peer->uname);
} else if(safe_str_eq(op, "sync-response")
&& safe_str_neq(peer->uname, attrd_cluster->uname)) {
xmlNode *child = NULL;
crm_notice("Processing %s from %s", op, peer->uname);
for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
attrd_peer_update(peer, child, TRUE);
}
}
}
void
attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(sync, F_ATTRD_TASK, "sync-response");
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, v->nodename, v->nodeid, v->current);
}
}
crm_debug("Syncing values to %s", peer?peer->uname:"everyone");
send_attrd_message(peer, sync);
free_xml(sync);
}
void
attrd_peer_remove(const char *host, const char *source)
{
attribute_t *a = NULL;
GHashTableIter aIter;
crm_notice("Removing all %s attributes for %s", host, source);
if(host == NULL) {
return;
}
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
if(g_hash_table_remove(a->values, host)) {
crm_debug("Removed %s[%s] for %s", a->id, host, source);
}
}
/* if this matches a remote peer, it will be removed from the cache */
crm_remote_peer_cache_remove(host);
}
void
attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter)
{
bool changed = FALSE;
attribute_value_t *v = NULL;
const char *host = crm_element_value(xml, F_ATTRD_HOST);
const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
const char *value = crm_element_value(xml, F_ATTRD_VALUE);
attribute_t *a = g_hash_table_lookup(attributes, attr);
if(a == NULL) {
a = create_attribute(xml);
}
v = g_hash_table_lookup(a->values, host);
if(v == NULL) {
crm_trace("Setting %s[%s] to %s from %s", attr, host, value, peer->uname);
v = calloc(1, sizeof(attribute_value_t));
if(value) {
v->current = strdup(value);
}
v->nodename = strdup(host);
crm_element_value_int(xml, F_ATTRD_IS_REMOTE, &v->is_remote);
g_hash_table_replace(a->values, v->nodename, v);
if (v->is_remote == TRUE) {
crm_remote_peer_cache_add(host);
}
changed = TRUE;
} else if(filter
&& safe_str_neq(v->current, value)
&& safe_str_eq(host, attrd_cluster->uname)) {
xmlNode *sync = create_xml_node(NULL, __FUNCTION__);
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
a->id, host, v->current, value, peer->uname);
crm_xml_add(sync, F_ATTRD_TASK, "sync-response");
v = g_hash_table_lookup(a->values, host);
build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, v->nodename, v->nodeid, v->current);
crm_xml_add_int(sync, F_ATTRD_WRITER, election_state(writer));
send_attrd_message(peer, sync);
free_xml(sync);
} else if(safe_str_neq(v->current, value)) {
crm_info("Setting %s[%s]: %s -> %s from %s", attr, host, v->current, value, peer->uname);
free(v->current);
if(value) {
v->current = strdup(value);
} else {
v->current = NULL;
}
changed = TRUE;
} else {
crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value);
}
a->changed |= changed;
/* this only involves cluster nodes. */
if(v->nodeid == 0 && (v->is_remote == FALSE)) {
if(crm_element_value_int(xml, F_ATTRD_HOST_ID, (int*)&v->nodeid) == 0) {
/* Create the name/id association */
crm_node_t *peer = crm_get_peer(v->nodeid, host);
crm_trace("We know %s's node id now: %s", peer->uname, peer->uuid);
if(election_state(writer) == election_won) {
write_attributes(FALSE, TRUE);
return;
}
}
}
if(changed) {
if(a->timer) {
crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, a->id);
mainloop_timer_start(a->timer);
} else {
write_or_elect_attribute(a);
}
}
}
void
write_or_elect_attribute(attribute_t *a)
{
enum election_result rc = election_state(writer);
if(rc == election_won) {
write_attribute(a);
} else if(rc == election_in_progress) {
crm_trace("Election in progress to determine who will write out %s", a->id);
} else if(peer_writer == NULL) {
crm_info("Starting an election to determine who will write out %s", a->id);
election_vote(writer);
} else {
crm_trace("%s will write out %s, we are in state %d", peer_writer, a->id, rc);
}
}
gboolean
attrd_election_cb(gpointer user_data)
{
+ crm_trace("Election complete");
+
free(peer_writer);
peer_writer = strdup(attrd_cluster->uname);
/* Update the peers after an election */
attrd_peer_sync(NULL, NULL);
/* Update the CIB after an election */
write_attributes(TRUE, FALSE);
return FALSE;
}
void
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
{
if(election_state(writer) == election_won
&& kind == crm_status_nstate
&& safe_str_eq(peer->state, CRM_NODE_MEMBER)) {
attrd_peer_sync(peer, NULL);
} else if(kind == crm_status_nstate
&& safe_str_neq(peer->state, CRM_NODE_MEMBER)) {
attrd_peer_remove(peer->uname, __FUNCTION__);
if(peer_writer && safe_str_eq(peer->uname, peer_writer)) {
free(peer_writer);
peer_writer = NULL;
crm_notice("Lost attribute writer %s", peer->uname);
}
} else if(kind == crm_status_processes) {
if(is_set(peer->processes, crm_proc_cpg)) {
crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0);
} else {
crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0);
}
}
}
static void
attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
int level = LOG_ERR;
GHashTableIter iter;
const char *peer = NULL;
attribute_value_t *v = NULL;
char *name = user_data;
attribute_t *a = g_hash_table_lookup(attributes, name);
if(a == NULL) {
crm_info("Attribute %s no longer exists", name);
goto done;
}
a->update = 0;
if (rc == pcmk_ok && call_id < 0) {
rc = call_id;
}
switch (rc) {
case pcmk_ok:
level = LOG_INFO;
last_cib_op_done = call_id;
break;
case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */
case -ETIME: /* When an attr changes while there is a DC election */
case -ENXIO: /* When an attr changes while the CIB is syncing a
* newer config from a node that just came up
*/
level = LOG_WARNING;
break;
}
do_crm_log(level, "Update %d for %s: %s (%d)", call_id, name, pcmk_strerror(rc), rc);
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) {
crm_notice("Update %d for %s[%s]=%s: %s (%d)", call_id, a->id, peer, v->requested, pcmk_strerror(rc), rc);
if(rc == pcmk_ok) {
free(v->stored);
v->stored = v->requested;
v->requested = NULL;
} else {
free(v->requested);
v->requested = NULL;
a->changed = TRUE; /* Attempt write out again */
}
}
done:
free(name);
if(a && a->changed && election_state(writer) == election_won) {
write_attribute(a);
}
}
void
write_attributes(bool all, bool peer_discovered)
{
GHashTableIter iter;
attribute_t *a = NULL;
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
if (peer_discovered && a->unknown_peer_uuids) {
/* a new peer uuid has been discovered, try writing this attribute again. */
a->changed = TRUE;
}
if(all || a->changed) {
write_attribute(a);
} else {
crm_debug("Skipping unchanged attribute %s", a->id);
}
}
}
static void
build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value)
{
char *set = NULL;
char *uuid = NULL;
xmlNode *xml_obj = NULL;
if(a->set) {
set = g_strdup(a->set);
} else {
set = g_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, nodeid);
}
if(a->uuid) {
uuid = g_strdup(a->uuid);
} else {
int lpc;
uuid = g_strdup_printf("%s-%s", set, a->id);
/* Minimal attempt at sanitizing automatic IDs */
for (lpc = 0; uuid[lpc] != 0; lpc++) {
switch (uuid[lpc]) {
case ':':
uuid[lpc] = '.';
}
}
}
xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE);
crm_xml_add(xml_obj, XML_ATTR_ID, nodeid);
xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS);
crm_xml_add(xml_obj, XML_ATTR_ID, nodeid);
xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS);
crm_xml_add(xml_obj, XML_ATTR_ID, set);
xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR);
crm_xml_add(xml_obj, XML_ATTR_ID, uuid);
crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id);
if(value) {
crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value);
} else {
crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, "");
crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE);
}
g_free(uuid);
g_free(set);
}
void
write_attribute(attribute_t *a)
{
int updates = 0;
xmlNode *xml_top = NULL;
attribute_value_t *v = NULL;
GHashTableIter iter;
enum cib_call_options flags = cib_quorum_override;
if (a == NULL) {
return;
} else if (the_cib == NULL) {
crm_info("Write out of %s delayed: cib not connected", a->id);
return;
} else if(a->update && a->update < last_cib_op_done) {
crm_info("Write out of %s continuing: update %d considered lost", a->id, a->update);
} else if(a->update) {
crm_info("Write out of %s delayed: update %d in progress", a->id, a->update);
return;
} else if(mainloop_timer_running(a->timer)) {
crm_info("Write out of %s delayed: timer is running", a->id);
return;
}
a->changed = FALSE;
a->unknown_peer_uuids = FALSE;
xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) {
crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_REMOTE|CRM_GET_PEER_CLUSTER);
if(peer && peer->id && v->nodeid == 0) {
crm_trace("Updating value's nodeid");
v->nodeid = peer->id;
}
if (peer == NULL) {
/* If the user is trying to set an attribute on an unknown peer, ignore it. */
crm_notice("Update error (peer not found): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer);
} else if (peer->uuid == NULL) {
/* peer is found, but we don't know the uuid yet. Wait until we discover a new uuid before attempting to write */
a->unknown_peer_uuids = FALSE;
crm_notice("Update error (unknown peer uuid, retry will be attempted once uuid is discovered): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer);
} else {
crm_debug("Update: %s[%s]=%s (%s %u %u %s)", v->nodename, a->id, v->current, peer->uuid, peer->id, v->nodeid, peer->uname);
build_update_element(xml_top, a, peer->uuid, v->current);
updates++;
free(v->requested);
v->requested = NULL;
if(v->current) {
v->requested = strdup(v->current);
} else {
/* Older versions don't know about the cib_mixed_update flag
* Make sure it goes to the local cib which does
*/
flags |= cib_mixed_update|cib_scope_local;
}
}
}
if(updates) {
crm_log_xml_trace(xml_top, __FUNCTION__);
a->update = cib_internal_op(the_cib, CIB_OP_MODIFY, NULL, XML_CIB_TAG_STATUS, xml_top, NULL,
flags, a->user);
crm_notice("Sent update %d with %d changes for %s, id=%s, set=%s",
a->update, updates, a->id, a->uuid ? a->uuid : "<n/a>", a->set);
the_cib->cmds->register_callback(
the_cib, a->update, 120, FALSE, strdup(a->id), "attrd_cib_callback", attrd_cib_callback);
}
}
diff --git a/attrd/main.c b/attrd/main.c
index bc39cbbaf6..e36a22dd40 100644
--- a/attrd/main.c
+++ b/attrd/main.c
@@ -1,355 +1,355 @@
/*
* Copyright (C) 2013 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election.h>
#include <crm/common/mainloop.h>
#include <crm/common/xml.h>
#include <crm/attrd.h>
#include <internal.h>
cib_t *the_cib = NULL;
GMainLoop *mloop = NULL;
bool shutting_down = FALSE;
crm_cluster_t *attrd_cluster = NULL;
election_t *writer = NULL;
int attrd_error = pcmk_ok;
static void
attrd_shutdown(int nsig) {
shutting_down = TRUE;
crm_info("Shutting down");
if (mloop != NULL && g_main_is_running(mloop)) {
g_main_quit(mloop);
} else {
crm_exit(pcmk_ok);
}
}
static void
attrd_cpg_dispatch(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
}
if (xml == NULL) {
crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data);
} else {
crm_node_t *peer = crm_get_peer(nodeid, from);
attrd_peer_message(peer, xml);
}
free_xml(xml);
free(data);
}
static void
attrd_cpg_destroy(gpointer unused)
{
if (shutting_down) {
crm_info("Corosync disconnection complete");
} else {
crm_crit("Lost connection to Corosync service!");
attrd_error = ECONNRESET;
attrd_shutdown(0);
}
}
static void
attrd_cib_replaced_cb(const char *event, xmlNode * msg)
{
crm_notice("Updating all attributes after %s event", event);
if(election_state(writer) == election_won) {
write_attributes(TRUE, FALSE);
}
}
static void
attrd_cib_destroy_cb(gpointer user_data)
{
cib_t *conn = user_data;
conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */
if (shutting_down) {
crm_info("Connection disconnection complete");
} else {
/* eventually this should trigger a reconnect, not a shutdown */
crm_err("Lost connection to CIB service!");
attrd_error = ECONNRESET;
attrd_shutdown(0);
}
return;
}
static cib_t *
attrd_cib_connect(int max_retry)
{
int rc = -ENOTCONN;
static int attempts = 0;
cib_t *connection = cib_new();
do {
if(attempts > 0) {
sleep(attempts);
}
attempts++;
crm_debug("CIB signon attempt %d", attempts);
rc = connection->cmds->signon(connection, T_ATTRD, cib_command);
} while(rc != pcmk_ok && attempts < max_retry);
if (rc != pcmk_ok) {
crm_err("Signon to CIB failed: %s (%d)", pcmk_strerror(rc), rc);
goto cleanup;
}
crm_info("Connected to the CIB after %d attempts", attempts);
rc = connection->cmds->set_connection_dnotify(connection, attrd_cib_destroy_cb);
if (rc != pcmk_ok) {
crm_err("Could not set disconnection callback");
goto cleanup;
}
rc = connection->cmds->add_notify_callback(connection, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb);
if(rc != pcmk_ok) {
crm_err("Could not set CIB notification callback");
goto cleanup;
}
return connection;
cleanup:
if(connection) {
connection->cmds->signoff(connection);
cib_delete(connection);
}
return NULL;
}
static int32_t
attrd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (shutting_down) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
attrd_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
static int32_t
attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *client = crm_client_get(c);
xmlNode *xml = crm_ipcs_recv(client, data, size, &id, &flags);
crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
if (xml == NULL) {
crm_debug("No msg from %d (%p)", crm_ipcs_client_pid(c), c);
return 0;
}
#if ENABLE_ACL
determine_request_user(client->user, xml, F_ATTRD_USER);
#endif
crm_trace("Processing msg from %d (%p)", crm_ipcs_client_pid(c), c);
crm_log_xml_trace(xml, __PRETTY_FUNCTION__);
attrd_client_message(client, xml);
free_xml(xml);
return 0;
}
/* Error code means? */
static int32_t
attrd_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
attrd_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = attrd_ipc_accept,
.connection_created = attrd_ipc_created,
.msg_process = attrd_ipc_dispatch,
.connection_closed = attrd_ipc_closed,
.connection_destroyed = attrd_ipc_destroy
};
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
int
main(int argc, char **argv)
{
int rc = pcmk_ok;
int flag = 0;
int index = 0;
int argerr = 0;
qb_ipcs_service_t *ipcs = NULL;
mloop = g_main_new(FALSE);
crm_log_init(T_ATTRD, LOG_NOTICE, TRUE, FALSE, argc, argv, FALSE);
crm_set_options(NULL, "[options]", long_options,
"Daemon for aggregating and atomically storing node attribute updates into the CIB");
mainloop_add_signal(SIGTERM, attrd_shutdown);
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'h': /* Help message */
crm_help(flag, EX_OK);
break;
default:
++argerr;
break;
}
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_info("Starting up");
attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute);
attrd_cluster = malloc(sizeof(crm_cluster_t));
attrd_cluster->destroy = attrd_cpg_destroy;
attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch;
attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
crm_set_status_callback(attrd_peer_change_cb);
if (crm_cluster_connect(attrd_cluster) == FALSE) {
crm_err("Cluster connection failed");
rc = DAEMON_RESPAWN_STOP;
goto done;
}
crm_info("Cluster connection active");
- writer = election_init(T_ATTRD, attrd_cluster->uname, 120, attrd_election_cb);
+ writer = election_init(T_ATTRD, attrd_cluster->uname, 120000, attrd_election_cb);
attrd_ipc_server_init(&ipcs, &ipc_callbacks);
crm_info("Accepting attribute updates");
the_cib = attrd_cib_connect(10);
if (the_cib == NULL) {
rc = DAEMON_RESPAWN_STOP;
goto done;
}
crm_info("CIB connection active");
g_main_run(mloop);
done:
crm_notice("Cleaning up before exit");
election_fini(writer);
crm_client_disconnect_all(ipcs);
qb_ipcs_destroy(ipcs);
g_hash_table_destroy(attributes);
if (the_cib) {
the_cib->cmds->signoff(the_cib);
cib_delete(the_cib);
}
if(attrd_error) {
return crm_exit(attrd_error);
}
return crm_exit(rc);
}
diff --git a/include/crm/cluster/election.h b/include/crm/cluster/election.h
index caf4b286aa..b90b110666 100644
--- a/include/crm/cluster/election.h
+++ b/include/crm/cluster/election.h
@@ -1,51 +1,51 @@
/*
* Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_COMMON_ElECTION__H
# define CRM_COMMON_ElECTION__H
/**
* \file
* \brief Functions for conducting elections
* \ingroup core
*/
typedef struct election_s election_t;
enum election_result
{
election_start = 0,
election_in_progress,
election_lost,
election_won,
election_error,
};
void election_fini(election_t *e);
void election_reset(election_t *e);
-election_t *election_init(const char *name, const char *uname, guint period, GSourceFunc cb);
+election_t *election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb);
-void election_timeout_set_period(election_t *e, guint period);
+void election_timeout_set_period(election_t *e, guint period_ms);
void election_timeout_stop(election_t *e);
void election_vote(election_t *e);
bool election_check(election_t *e);
void election_remove(election_t *e, const char *uname);
enum election_result election_state(election_t *e);
enum election_result election_count_vote(election_t *e, xmlNode *vote, bool can_win);
#endif
diff --git a/lib/cluster/election.c b/lib/cluster/election.c
index f54199a9e2..1aa2c8a8c6 100644
--- a/lib/cluster/election.c
+++ b/lib/cluster/election.c
@@ -1,524 +1,530 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election.h>
#include <crm/crm.h>
#define STORM_INTERVAL 2 /* in seconds */
#define STORM_MULTIPLIER 5 /* multiplied by the number of nodes */
struct election_s
{
enum election_result state;
guint count;
char *name;
char *uname;
GSourceFunc cb;
GHashTable *voted;
- mainloop_timer_t *trigger; /* When to start */
mainloop_timer_t *timeout; /* When to stop if not everyone casts a vote */
};
-static gboolean election_timer_cb(gpointer user_data)
+static void election_complete(election_t *e)
{
- election_t *e = user_data;
-
crm_info("Election %s complete", e->name);
e->state = election_won;
if(e->cb) {
e->cb(e);
}
election_reset(e);
+}
+
+static gboolean election_timer_cb(gpointer user_data)
+{
+ election_t *e = user_data;
+
+ crm_info("Election %s %p timed out", e->name, e);
+ election_complete(e);
return FALSE;
}
enum election_result
election_state(election_t *e)
{
if(e) {
return e->state;
}
return election_error;
}
election_t *
-election_init(const char *name, const char *uname, guint period, GSourceFunc cb)
+election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
{
static guint count = 0;
election_t *e = calloc(1, sizeof(election_t));
if(e != NULL) {
if(name) {
e->name = g_strdup_printf("election-%s", name);
} else {
e->name = g_strdup_printf("election-%u", count++);
}
e->cb = cb;
e->uname = strdup(uname);
- e->timeout = mainloop_timer_add(e->name, period, FALSE, cb, &e);
- crm_trace("Created %s", e->name);
+ e->timeout = mainloop_timer_add(e->name, period_ms, FALSE, election_timer_cb, e);
+ crm_trace("Created %s %p", e->name, e);
}
return e;
}
void
election_remove(election_t *e, const char *uname)
{
if(e && uname && e->voted) {
g_hash_table_remove(e->voted, uname);
}
}
void
election_reset(election_t *e)
{
+ crm_trace("Resetting election %s", e->name);
if(e) {
mainloop_timer_stop(e->timeout);
}
if (e && e->voted) {
crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
g_hash_table_destroy(e->voted);
e->voted = NULL;
}
}
void
election_fini(election_t *e)
{
if(e) {
election_reset(e);
crm_trace("Destroying %s", e->name);
mainloop_timer_del(e->timeout);
free(e->uname);
free(e->name);
free(e);
}
}
static void
election_timeout_start(election_t *e)
{
if(e) {
mainloop_timer_start(e->timeout);
}
}
void
election_timeout_stop(election_t *e)
{
if(e) {
mainloop_timer_stop(e->timeout);
}
}
void
election_timeout_set_period(election_t *e, guint period)
{
if(e) {
mainloop_timer_set_period(e->timeout, period);
} else {
crm_err("No election defined");
}
}
static int
crm_uptime(struct timeval *output)
{
static time_t expires = 0;
static struct rusage info;
time_t tm_now = time(NULL);
if (expires < tm_now) {
int rc = getrusage(RUSAGE_SELF, &info);
output->tv_sec = 0;
output->tv_usec = 0;
if (rc < 0) {
crm_perror(LOG_ERR, "Could not calculate the current uptime");
expires = 0;
return -1;
}
crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
(long)info.ru_utime.tv_usec);
}
expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */
output->tv_sec = info.ru_utime.tv_sec;
output->tv_usec = info.ru_utime.tv_usec;
return 1;
}
static int
crm_compare_age(struct timeval your_age)
{
struct timeval our_age;
if (crm_uptime(&our_age) < 0) {
return -1;
}
/* We want these times to be "significantly" different */
if (our_age.tv_sec > your_age.tv_sec) {
crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
return 1;
} else if (our_age.tv_sec < your_age.tv_sec) {
crm_debug("Loose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
return -1;
} else if (our_age.tv_usec > your_age.tv_usec) {
crm_debug("Win: %ld.%ld vs %ld.%ld (usec)",
(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
return 1;
} else if (our_age.tv_usec < your_age.tv_usec) {
crm_debug("Loose: %ld.%ld vs %ld.%ld (usec)",
(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
return -1;
}
return 0;
}
void
election_vote(election_t *e)
{
struct timeval age;
xmlNode *vote = NULL;
crm_node_t *our_node = crm_get_peer(0, e->uname);
if(e == NULL) {
crm_trace("Not voting in election: not initialized");
return;
}
if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
crm_trace("Cannot vote yet: %p", our_node);
return;
}
e->state = election_in_progress;
vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
e->count++;
crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);
crm_uptime(&age);
crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);
send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
free_xml(vote);
crm_debug("Started election %d", e->count);
if (e->voted) {
g_hash_table_destroy(e->voted);
e->voted = NULL;
}
election_timeout_start(e);
return;
}
bool
election_check(election_t *e)
{
int voted_size = 0;
int num_members = crm_active_peers();
if(e == NULL) {
crm_trace("not initialized");
return FALSE;
}
if (e->voted) {
voted_size = g_hash_table_size(e->voted);
}
/* in the case of #voted > #members, it is better to
* wait for the timeout and give the cluster time to
* stabilize
*/
if (voted_size >= num_members) {
/* we won and everyone has voted */
election_timeout_stop(e);
if (voted_size > num_members) {
GHashTableIter gIter;
const crm_node_t *node;
char *key = NULL;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
if (crm_is_peer_active(node)) {
crm_err("member: %s proc=%.32x", node->uname, node->processes);
}
}
g_hash_table_iter_init(&gIter, e->voted);
while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
crm_err("voted: %s", key);
}
}
- election_timer_cb(e);
+ election_complete(e);
return TRUE;
} else {
crm_debug("Still waiting on %d non-votes (%d total)",
num_members - voted_size, num_members);
}
return FALSE;
}
#define loss_dampen 2 /* in seconds */
/* A_ELECTION_COUNT */
enum election_result
election_count_vote(election_t *e, xmlNode *vote, bool can_win)
{
int age = 0;
int election_id = -1;
int log_level = LOG_INFO;
gboolean use_born_on = FALSE;
gboolean done = FALSE;
gboolean we_loose = FALSE;
const char *op = NULL;
const char *from = NULL;
const char *reason = "unknown";
const char *election_owner = NULL;
crm_node_t *our_node = NULL, *your_node = NULL;
static int election_wins = 0;
xmlNode *novote = NULL;
time_t tm_now = time(NULL);
static time_t expires = 0;
static time_t last_election_loss = 0;
/* if the membership copy is NULL we REALLY shouldnt be voting
* the question is how we managed to get here.
*/
CRM_CHECK(vote != NULL, return election_error);
if(e == NULL) {
crm_info("Not voting in election: not initialized");
return election_lost;
} else if(crm_peer_cache == NULL) {
crm_info("Not voting in election: no peer cache");
return election_lost;
}
op = crm_element_value(vote, F_CRM_TASK);
from = crm_element_value(vote, F_CRM_HOST_FROM);
election_owner = crm_element_value(vote, F_CRM_ELECTION_OWNER);
crm_element_value_int(vote, F_CRM_ELECTION_ID, &election_id);
your_node = crm_get_peer(0, from);
our_node = crm_get_peer(0, e->uname);
if (e->voted == NULL) {
crm_debug("Created voted hash");
e->voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
}
if (is_heartbeat_cluster()) {
use_born_on = TRUE;
} else if (is_classic_ais_cluster()) {
use_born_on = TRUE;
}
if(can_win == FALSE) {
reason = "Not eligible";
we_loose = TRUE;
} else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
reason = "We are not part of the cluster";
log_level = LOG_ERR;
we_loose = TRUE;
} else if (election_id != e->count && crm_str_eq(our_node->uuid, election_owner, TRUE)) {
log_level = LOG_TRACE;
reason = "Superceeded";
done = TRUE;
} else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
/* Possibly we cached the message in the FSA queue at a point that it wasn't */
reason = "Peer is not part of our cluster";
log_level = LOG_WARNING;
done = TRUE;
} else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
char *op_copy = strdup(op);
char *uname_copy = strdup(from);
CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));
/* update the list of nodes that have voted */
g_hash_table_replace(e->voted, uname_copy, op_copy);
reason = "Recorded";
done = TRUE;
} else {
struct timeval your_age;
const char *your_version = crm_element_value(vote, F_CRM_VERSION);
int tv_sec = 0;
int tv_usec = 0;
crm_element_value_int(vote, F_CRM_ELECTION_AGE_S, &tv_sec);
crm_element_value_int(vote, F_CRM_ELECTION_AGE_US, &tv_usec);
your_age.tv_sec = tv_sec;
your_age.tv_usec = tv_usec;
age = crm_compare_age(your_age);
if(your_age.tv_sec == 0 && your_age.tv_usec == 0) {
crm_log_xml_trace(vote, "bad vote");
crm_write_blackbox(0, NULL);
}
if (crm_str_eq(from, e->uname, TRUE)) {
char *op_copy = strdup(op);
char *uname_copy = strdup(from);
CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));
/* update ourselves in the list of nodes that have voted */
g_hash_table_replace(e->voted, uname_copy, op_copy);
reason = "Recorded";
done = TRUE;
} else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
reason = "Version";
we_loose = TRUE;
} else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
reason = "Version";
} else if (age < 0) {
reason = "Uptime";
we_loose = TRUE;
} else if (age > 0) {
reason = "Uptime";
/* TODO: Check for y(our) born < 0 */
} else if (use_born_on && your_node->born < our_node->born) {
reason = "Born";
we_loose = TRUE;
} else if (use_born_on && your_node->born > our_node->born) {
reason = "Born";
} else if (e->uname == NULL) {
reason = "Unknown host name";
we_loose = TRUE;
} else if (strcasecmp(e->uname, from) > 0) {
reason = "Host name";
we_loose = TRUE;
} else {
reason = "Host name";
CRM_ASSERT(strcasecmp(e->uname, from) < 0);
/* cant happen...
* } else if(strcasecmp(e->uname, from) == 0) {
*
*/
}
}
if (expires < tm_now) {
election_wins = 0;
expires = tm_now + STORM_INTERVAL;
} else if (done == FALSE && we_loose == FALSE) {
int peers = 1 + g_hash_table_size(crm_peer_cache);
/* If every node has to vote down every other node, thats N*(N-1) total elections
* Allow some leway before _really_ complaining
*/
election_wins++;
if (election_wins > (peers * peers)) {
crm_warn("Election storm detected: %d elections in %d seconds", election_wins,
STORM_INTERVAL);
election_wins = 0;
expires = tm_now + STORM_INTERVAL;
crm_write_blackbox(0, NULL);
}
}
if (done) {
do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
election_id, e->count, election_owner, op, from, reason);
return e->state;
} else if(we_loose == FALSE) {
do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
election_id, election_owner, op, from, reason);
if (last_election_loss == 0
|| tm_now - last_election_loss > (time_t) loss_dampen) {
last_election_loss = 0;
election_timeout_stop(e);
/* Start a new election by voting down this, and other, peers */
e->state = election_start;
return e->state;
}
crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
election_id, loss_dampen, ctime(&last_election_loss));
}
novote = create_request(CRM_OP_NOVOTE, NULL, from,
CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
election_id, election_owner, op, from, reason);
election_timeout_stop(e);
crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
send_cluster_message(your_node, crm_msg_crmd, novote, TRUE);
free_xml(novote);
last_election_loss = tm_now;
e->state = election_lost;
return e->state;
}
diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c
index 18a67e67b2..7df6fa26b6 100644
--- a/lib/common/mainloop.c
+++ b/lib/common/mainloop.c
@@ -1,1105 +1,1105 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <sys/wait.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipcs.h>
struct mainloop_child_s {
pid_t pid;
char *desc;
unsigned timerid;
unsigned watchid;
gboolean timeout;
void *privatedata;
/* Called when a process dies */
void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
};
struct trigger_s {
GSource source;
gboolean running;
gboolean trigger;
void *user_data;
guint id;
};
static gboolean
crm_trigger_prepare(GSource * source, gint * timeout)
{
crm_trigger_t *trig = (crm_trigger_t *) source;
/* cluster-glue's FD and IPC related sources make use of
* g_source_add_poll() but do not set a timeout in their prepare
* functions
*
* This means mainloop's poll() will block until an event for one
* of these sources occurs - any /other/ type of source, such as
* this one or g_idle_*, that doesn't use g_source_add_poll() is
* S-O-L and wont be processed until there is something fd-based
* happens.
*
* Luckily the timeout we can set here affects all sources and
* puts an upper limit on how long poll() can take.
*
* So unconditionally set a small-ish timeout, not too small that
* we're in constant motion, which will act as an upper bound on
* how long the signal handling might be delayed for.
*/
*timeout = 500; /* Timeout in ms */
return trig->trigger;
}
static gboolean
crm_trigger_check(GSource * source)
{
crm_trigger_t *trig = (crm_trigger_t *) source;
return trig->trigger;
}
static gboolean
crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata)
{
int rc = TRUE;
crm_trigger_t *trig = (crm_trigger_t *) source;
if (trig->running) {
/* Wait until the existing job is complete before starting the next one */
return TRUE;
}
trig->trigger = FALSE;
if (callback) {
rc = callback(trig->user_data);
if (rc < 0) {
crm_trace("Trigger handler %p not yet complete", trig);
trig->running = TRUE;
rc = TRUE;
}
}
return rc;
}
static void
crm_trigger_finalize(GSource * source)
{
crm_trace("Trigger %p destroyed", source);
}
#if 0
struct _GSourceCopy
{
gpointer callback_data;
GSourceCallbackFuncs *callback_funcs;
const GSourceFuncs *source_funcs;
guint ref_count;
GMainContext *context;
gint priority;
guint flags;
guint source_id;
GSList *poll_fds;
GSource *prev;
GSource *next;
char *name;
void *priv;
};
static int
g_source_refcount(GSource * source)
{
/* Duplicating the contents of private header files is a necessary evil */
if (source) {
struct _GSourceCopy *evil = (struct _GSourceCopy*)source;
return evil->ref_count;
}
return 0;
}
#else
static int g_source_refcount(GSource * source)
{
return 0;
}
#endif
static GSourceFuncs crm_trigger_funcs = {
crm_trigger_prepare,
crm_trigger_check,
crm_trigger_dispatch,
crm_trigger_finalize,
};
static crm_trigger_t *
mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data),
gpointer userdata)
{
crm_trigger_t *trigger = NULL;
trigger = (crm_trigger_t *) source;
trigger->id = 0;
trigger->trigger = FALSE;
trigger->user_data = userdata;
if (dispatch) {
g_source_set_callback(source, dispatch, trigger, NULL);
}
g_source_set_priority(source, priority);
g_source_set_can_recurse(source, FALSE);
crm_trace("Setup %p with ref-count=%u", source, g_source_refcount(source));
trigger->id = g_source_attach(source, NULL);
crm_trace("Attached %p with ref-count=%u", source, g_source_refcount(source));
return trigger;
}
void
mainloop_trigger_complete(crm_trigger_t * trig)
{
crm_trace("Trigger handler %p complete", trig);
trig->running = FALSE;
}
/* If dispatch returns:
* -1: Job running but not complete
* 0: Remove the trigger from mainloop
* 1: Leave the trigger in mainloop
*/
crm_trigger_t *
mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata)
{
GSource *source = NULL;
CRM_ASSERT(sizeof(crm_trigger_t) > sizeof(GSource));
source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t));
CRM_ASSERT(source != NULL);
return mainloop_setup_trigger(source, priority, dispatch, userdata);
}
void
mainloop_set_trigger(crm_trigger_t * source)
{
if(source) {
source->trigger = TRUE;
}
}
gboolean
mainloop_destroy_trigger(crm_trigger_t * source)
{
GSource *gs = NULL;
if(source == NULL) {
return TRUE;
}
gs = (GSource *)source;
if(g_source_refcount(gs) > 2) {
crm_info("Trigger %p is still referenced %u times", gs, g_source_refcount(gs));
}
g_source_destroy(gs); /* Remove from mainloop, ref_count-- */
g_source_unref(gs); /* The caller no longer carries a reference to source
*
* At this point the source should be free'd,
* unless we're currently processing said
* source, in which case mainloop holds an
* additional reference and it will be free'd
* once our processing completes
*/
return TRUE;
}
typedef struct signal_s {
crm_trigger_t trigger; /* must be first */
void (*handler) (int sig);
int signal;
} crm_signal_t;
static crm_signal_t *crm_signals[NSIG];
static gboolean
crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata)
{
crm_signal_t *sig = (crm_signal_t *) source;
if(sig->signal != SIGCHLD) {
crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal));
}
sig->trigger.trigger = FALSE;
if (sig->handler) {
sig->handler(sig->signal);
}
return TRUE;
}
static void
mainloop_signal_handler(int sig)
{
if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) {
mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]);
}
}
static GSourceFuncs crm_signal_funcs = {
crm_trigger_prepare,
crm_trigger_check,
crm_signal_dispatch,
crm_trigger_finalize,
};
gboolean
crm_signal(int sig, void (*dispatch) (int sig))
{
sigset_t mask;
struct sigaction sa;
struct sigaction old;
if (sigemptyset(&mask) < 0) {
crm_perror(LOG_ERR, "Call to sigemptyset failed");
return FALSE;
}
memset(&sa, 0, sizeof(struct sigaction));
sa.sa_handler = dispatch;
sa.sa_flags = SA_RESTART;
sa.sa_mask = mask;
if (sigaction(sig, &sa, &old) < 0) {
crm_perror(LOG_ERR, "Could not install signal handler for signal %d", sig);
return FALSE;
}
return TRUE;
}
gboolean
mainloop_add_signal(int sig, void (*dispatch) (int sig))
{
GSource *source = NULL;
int priority = G_PRIORITY_HIGH - 1;
if (sig == SIGTERM) {
/* TERM is higher priority than other signals,
* signals are higher priority than other ipc.
* Yes, minus: smaller is "higher"
*/
priority--;
}
if (sig >= NSIG || sig < 0) {
crm_err("Signal %d is out of range", sig);
return FALSE;
} else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) {
crm_trace("Signal handler for %d is already installed", sig);
return TRUE;
} else if (crm_signals[sig] != NULL) {
crm_err("Different signal handler for %d is already installed", sig);
return FALSE;
}
CRM_ASSERT(sizeof(crm_signal_t) > sizeof(GSource));
source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t));
crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL);
CRM_ASSERT(crm_signals[sig] != NULL);
crm_signals[sig]->handler = dispatch;
crm_signals[sig]->signal = sig;
if (crm_signal(sig, mainloop_signal_handler) == FALSE) {
crm_signal_t *tmp = crm_signals[sig];
crm_signals[sig] = NULL;
mainloop_destroy_trigger((crm_trigger_t *) tmp);
return FALSE;
}
#if 0
/* If we want signals to interrupt mainloop's poll(), instead of waiting for
* the timeout, then we should call siginterrupt() below
*
* For now, just enforce a low timeout
*/
if (siginterrupt(sig, 1) < 0) {
crm_perror(LOG_INFO, "Could not enable system call interruptions for signal %d", sig);
}
#endif
return TRUE;
}
gboolean
mainloop_destroy_signal(int sig)
{
crm_signal_t *tmp = NULL;
if (sig >= NSIG || sig < 0) {
crm_err("Signal %d is out of range", sig);
return FALSE;
} else if (crm_signal(sig, NULL) == FALSE) {
crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig);
return FALSE;
} else if (crm_signals[sig] == NULL) {
return TRUE;
}
crm_trace("Destroying signal %d", sig);
tmp = crm_signals[sig];
crm_signals[sig] = NULL;
mainloop_destroy_trigger((crm_trigger_t *) tmp);
return TRUE;
}
static qb_array_t *gio_map = NULL;
void
mainloop_cleanup(void)
{
if(gio_map) {
qb_array_free(gio_map);
}
}
/*
* libqb...
*/
struct gio_to_qb_poll {
int32_t is_used;
guint source;
int32_t events;
void *data;
qb_ipcs_dispatch_fn_t fn;
enum qb_loop_priority p;
};
static gboolean
gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data)
{
struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
gint fd = g_io_channel_unix_get_fd(gio);
crm_trace("%p.%d %d", data, fd, condition);
if (condition & G_IO_NVAL) {
crm_trace("Marking failed adaptor %p unused", adaptor);
adaptor->is_used = QB_FALSE;
}
return (adaptor->fn(fd, condition, adaptor->data) == 0);
}
static void
gio_poll_destroy(gpointer data)
{
struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
adaptor->is_used = QB_FALSE;
adaptor->source = 0;
}
static int32_t
gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts,
void *data, qb_ipcs_dispatch_fn_t fn)
{
struct gio_to_qb_poll *adaptor;
GIOChannel *channel;
int32_t res = 0;
res = qb_array_index(gio_map, fd, (void **)&adaptor);
if (res < 0) {
crm_err("Array lookup failed for fd=%d: %d", fd, res);
return res;
}
crm_trace("Adding fd=%d to mainloop as adapater %p", fd, adaptor);
if (adaptor->is_used) {
crm_err("Adapter for descriptor %d is still in-use", fd);
return -EEXIST;
}
/* channel is created with ref_count = 1 */
channel = g_io_channel_unix_new(fd);
if (!channel) {
crm_err("No memory left to add fd=%d", fd);
return -ENOMEM;
}
/* Because unlike the poll() API, glib doesn't tell us about HUPs by default */
evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR);
adaptor->fn = fn;
adaptor->events = evts;
adaptor->data = data;
adaptor->p = p;
adaptor->is_used = QB_TRUE;
adaptor->source =
g_io_add_watch_full(channel, G_PRIORITY_DEFAULT, evts, gio_read_socket, adaptor,
gio_poll_destroy);
/* Now that mainloop now holds a reference to channel,
* thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
*
* This means that channel will be free'd by:
* g_main_context_dispatch()
* -> g_source_destroy_internal()
* -> g_source_callback_unref()
* shortly after gio_poll_destroy() completes
*/
g_io_channel_unref(channel);
crm_trace("Added to mainloop with gsource id=%d", adaptor->source);
if (adaptor->source > 0) {
return 0;
}
return -EINVAL;
}
static int32_t
gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts,
void *data, qb_ipcs_dispatch_fn_t fn)
{
return 0;
}
static int32_t
gio_poll_dispatch_del(int32_t fd)
{
struct gio_to_qb_poll *adaptor;
crm_trace("Looking for fd=%d", fd);
if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) {
crm_trace("Marking adaptor %p unused", adaptor);
if (adaptor->source) {
g_source_remove(adaptor->source);
adaptor->source = 0;
}
adaptor->is_used = QB_FALSE;
}
return 0;
}
struct qb_ipcs_poll_handlers gio_poll_funcs = {
.job_add = NULL,
.dispatch_add = gio_poll_dispatch_add,
.dispatch_mod = gio_poll_dispatch_mod,
.dispatch_del = gio_poll_dispatch_del,
};
static enum qb_ipc_type
pick_ipc_type(enum qb_ipc_type requested)
{
const char *env = getenv("PCMK_ipc_type");
if (env && strcmp("shared-mem", env) == 0) {
return QB_IPC_SHM;
} else if (env && strcmp("socket", env) == 0) {
return QB_IPC_SOCKET;
} else if (env && strcmp("posix", env) == 0) {
return QB_IPC_POSIX_MQ;
} else if (env && strcmp("sysv", env) == 0) {
return QB_IPC_SYSV_MQ;
} else if (requested == QB_IPC_NATIVE) {
/* We prefer shared memory because the server never blocks on
* send. If part of a message fits into the socket, libqb
* needs to block until the remainder can be sent also.
* Otherwise the client will wait forever for the remaining
* bytes.
*/
return QB_IPC_SHM;
}
return requested;
}
qb_ipcs_service_t *
mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
struct qb_ipcs_service_handlers * callbacks)
{
int rc = 0;
qb_ipcs_service_t *server = NULL;
if (gio_map == NULL) {
gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1);
}
crm_client_init();
server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks);
qb_ipcs_poll_handlers_set(server, &gio_poll_funcs);
rc = qb_ipcs_run(server);
if (rc < 0) {
crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc);
return NULL;
}
return server;
}
void
mainloop_del_ipc_server(qb_ipcs_service_t * server)
{
if (server) {
qb_ipcs_destroy(server);
}
}
struct mainloop_io_s {
char *name;
void *userdata;
int fd;
guint source;
crm_ipc_t *ipc;
GIOChannel *channel;
int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata);
int (*dispatch_fn_io) (gpointer userdata);
void (*destroy_fn) (gpointer userdata);
};
static gboolean
mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data)
{
gboolean keep = TRUE;
mainloop_io_t *client = data;
CRM_ASSERT(client->fd == g_io_channel_unix_get_fd(gio));
if (condition & G_IO_IN) {
if (client->ipc) {
long rc = 0;
int max = 10;
do {
rc = crm_ipc_read(client->ipc);
if (rc <= 0) {
crm_trace("Message acquisition from %s[%p] failed: %s (%ld)",
client->name, client, pcmk_strerror(rc), rc);
} else if (client->dispatch_fn_ipc) {
const char *buffer = crm_ipc_buffer(client->ipc);
crm_trace("New message from %s[%p] = %d", client->name, client, rc, condition);
if (client->dispatch_fn_ipc(buffer, rc, client->userdata) < 0) {
crm_trace("Connection to %s no longer required", client->name);
keep = FALSE;
}
}
} while (keep && rc > 0 && --max > 0);
} else {
crm_trace("New message from %s[%p] %u", client->name, client, condition);
if (client->dispatch_fn_io) {
if (client->dispatch_fn_io(client->userdata) < 0) {
crm_trace("Connection to %s no longer required", client->name);
keep = FALSE;
}
}
}
}
if (client->ipc && crm_ipc_connected(client->ipc) == FALSE) {
crm_err("Connection to %s[%p] closed (I/O condition=%d)", client->name, client, condition);
keep = FALSE;
} else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) {
crm_trace("The connection %s[%p] has been closed (I/O condition=%d)",
client->name, client, condition);
keep = FALSE;
} else if ((condition & G_IO_IN) == 0) {
/*
#define GLIB_SYSDEF_POLLIN =1
#define GLIB_SYSDEF_POLLPRI =2
#define GLIB_SYSDEF_POLLOUT =4
#define GLIB_SYSDEF_POLLERR =8
#define GLIB_SYSDEF_POLLHUP =16
#define GLIB_SYSDEF_POLLNVAL =32
typedef enum
{
G_IO_IN GLIB_SYSDEF_POLLIN,
G_IO_OUT GLIB_SYSDEF_POLLOUT,
G_IO_PRI GLIB_SYSDEF_POLLPRI,
G_IO_ERR GLIB_SYSDEF_POLLERR,
G_IO_HUP GLIB_SYSDEF_POLLHUP,
G_IO_NVAL GLIB_SYSDEF_POLLNVAL
} GIOCondition;
A bitwise combination representing a condition to watch for on an event source.
G_IO_IN There is data to read.
G_IO_OUT Data can be written (without blocking).
G_IO_PRI There is urgent data to read.
G_IO_ERR Error condition.
G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets).
G_IO_NVAL Invalid request. The file descriptor is not open.
*/
crm_err("Strange condition: %d", condition);
}
/* keep == FALSE results in mainloop_gio_destroy() being called
* just before the source is removed from mainloop
*/
return keep;
}
static void
mainloop_gio_destroy(gpointer c)
{
mainloop_io_t *client = c;
char *c_name = strdup(client->name);
/* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c
* client->channel will still have ref_count > 0... should be == 1
*/
crm_trace("Destroying client %s[%p]", c_name, c);
if (client->ipc) {
crm_ipc_close(client->ipc);
}
if (client->destroy_fn) {
void (*destroy_fn) (gpointer userdata) = client->destroy_fn;
client->destroy_fn = NULL;
destroy_fn(client->userdata);
}
if (client->ipc) {
crm_ipc_t *ipc = client->ipc;
client->ipc = NULL;
crm_ipc_destroy(ipc);
}
crm_trace("Destroyed client %s[%p]", c_name, c);
free(client->name); client->name = NULL;
free(client);
free(c_name);
}
mainloop_io_t *
mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata,
struct ipc_client_callbacks *callbacks)
{
mainloop_io_t *client = NULL;
crm_ipc_t *conn = crm_ipc_new(name, max_size);
if (conn && crm_ipc_connect(conn)) {
int32_t fd = crm_ipc_get_fd(conn);
client = mainloop_add_fd(name, priority, fd, userdata, NULL);
client->ipc = conn;
client->destroy_fn = callbacks->destroy;
client->dispatch_fn_ipc = callbacks->dispatch;
}
if (conn && client == NULL) {
crm_trace("Connection to %s failed", name);
crm_ipc_close(conn);
crm_ipc_destroy(conn);
}
return client;
}
void
mainloop_del_ipc_client(mainloop_io_t * client)
{
mainloop_del_fd(client);
}
crm_ipc_t *
mainloop_get_ipc_client(mainloop_io_t * client)
{
if (client) {
return client->ipc;
}
return NULL;
}
mainloop_io_t *
mainloop_add_fd(const char *name, int priority, int fd, void *userdata,
struct mainloop_fd_callbacks * callbacks)
{
mainloop_io_t *client = NULL;
if (fd > 0) {
client = calloc(1, sizeof(mainloop_io_t));
client->name = strdup(name);
client->userdata = userdata;
if (callbacks) {
client->destroy_fn = callbacks->destroy;
client->dispatch_fn_io = callbacks->dispatch;
}
client->fd = fd;
client->channel = g_io_channel_unix_new(fd);
client->source =
g_io_add_watch_full(client->channel, priority,
(G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback,
client, mainloop_gio_destroy);
/* Now that mainloop now holds a reference to channel,
* thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
*
* This means that channel will be free'd by:
* g_main_context_dispatch() or g_source_remove()
* -> g_source_destroy_internal()
* -> g_source_callback_unref()
* shortly after mainloop_gio_destroy() completes
*/
g_io_channel_unref(client->channel);
crm_trace("Added connection %d for %s[%p].%d", client->source, client->name, client, fd);
}
return client;
}
void
mainloop_del_fd(mainloop_io_t * client)
{
if (client != NULL) {
crm_trace("Removing client %s[%p]", client->name, client);
if (client->source) {
/* Results in mainloop_gio_destroy() being called just
* before the source is removed from mainloop
*/
g_source_remove(client->source);
}
}
}
pid_t
mainloop_child_pid(mainloop_child_t * child)
{
return child->pid;
}
const char *
mainloop_child_name(mainloop_child_t * child)
{
return child->desc;
}
int
mainloop_child_timeout(mainloop_child_t * child)
{
return child->timeout;
}
void *
mainloop_child_userdata(mainloop_child_t * child)
{
return child->privatedata;
}
void
mainloop_clear_child_userdata(mainloop_child_t * child)
{
child->privatedata = NULL;
}
static gboolean
child_timeout_callback(gpointer p)
{
mainloop_child_t *child = p;
child->timerid = 0;
if (child->timeout) {
crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid);
return FALSE;
}
child->timeout = TRUE;
crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid);
if (kill(child->pid, SIGKILL) < 0) {
if (errno == ESRCH) {
/* Nothing left to do */
return FALSE;
}
crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid);
}
child->timerid = g_timeout_add(5000, child_timeout_callback, child);
return FALSE;
}
static GListPtr child_list = NULL;
static void
child_death_dispatch(int signal)
{
GListPtr iter = child_list;
while(iter) {
int rc = 0;
int core = 0;
int signo = 0;
int status = 0;
int exitcode = 0;
GListPtr saved = NULL;
mainloop_child_t *child = iter->data;
rc = waitpid(child->pid, &status, WNOHANG);
if(rc == 0) {
iter = iter->next;
continue;
} else if(rc != child->pid) {
signo = signal;
exitcode = 1;
status = 1;
crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid);
} else {
crm_trace("Managed process %d exited: %p", child->pid, child);
if (WIFEXITED(status)) {
exitcode = WEXITSTATUS(status);
crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode);
} else if (WIFSIGNALED(status)) {
signo = WTERMSIG(status);
crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo);
}
#ifdef WCOREDUMP
if (WCOREDUMP(status)) {
core = 1;
crm_err("Managed process %d (%s) dumped core", child->pid, child->desc);
}
#endif
}
if (child->callback) {
child->callback(child, child->pid, core, signo, exitcode);
}
crm_trace("Removing process entry %p for %d", child, child->pid);
saved = iter;
iter = iter->next;
child_list = g_list_remove_link(child_list, saved);
g_list_free(saved);
if (child->timerid != 0) {
crm_trace("Removing timer %d", child->timerid);
g_source_remove(child->timerid);
child->timerid = 0;
}
free(child->desc);
free(child);
}
}
/* Create/Log a new tracked process
* To track a process group, use -pid
*/
void
mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata,
void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode))
{
static bool need_init = TRUE;
mainloop_child_t *child = g_new(mainloop_child_t, 1);
child->pid = pid;
child->timerid = 0;
child->timeout = FALSE;
child->privatedata = privatedata;
child->callback = callback;
if(desc) {
child->desc = strdup(desc);
}
if (timeout) {
child->timerid = g_timeout_add(timeout, child_timeout_callback, child);
}
child_list = g_list_append(child_list, child);
if(need_init) {
need_init = FALSE;
/* Do NOT use g_child_watch_add() and friends, they rely on pthreads */
mainloop_add_signal(SIGCHLD, child_death_dispatch);
/* In case they terminated before the signal handler was installed */
child_death_dispatch(SIGCHLD);
}
}
struct mainloop_timer_s {
guint id;
guint period_ms;
bool repeat;
char *name;
GSourceFunc cb;
void *userdata;
};
struct mainloop_timer_s mainloop;
static gboolean mainloop_timer_cb(gpointer user_data)
{
int id = 0;
bool repeat = FALSE;
struct mainloop_timer_s *t = user_data;
CRM_ASSERT(t != NULL);
id = t->id;
t->id = 0; /* Ensure its unset during callbacks so that
* mainloop_timer_running() works as expected
*/
if(t->cb) {
crm_trace("Invoking callbacks for timer %s", t->name);
repeat = t->repeat;
if(t->cb(t->userdata) == FALSE) {
crm_trace("Timer %s complete", t->name);
repeat = FALSE;
}
}
if(repeat) {
/* Restore if repeating */
t->id = id;
}
return repeat;
}
bool mainloop_timer_running(mainloop_timer_t *t)
{
if(t && t->id != 0) {
return TRUE;
}
return FALSE;
}
void mainloop_timer_start(mainloop_timer_t *t)
{
mainloop_timer_stop(t);
if(t && t->period_ms > 0) {
crm_trace("Starting timer %s", t->name);
t->id = g_timeout_add(t->period_ms, mainloop_timer_cb, t);
}
}
void mainloop_timer_stop(mainloop_timer_t *t)
{
if(t && t->id != 0) {
crm_trace("Stopping timer %s", t->name);
g_source_remove(t->id);
t->id = 0;
}
}
guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms)
{
guint last = 0;
if(t) {
last = t->period_ms;
t->period_ms = period_ms;
}
if(t && t->id != 0 && last != t->period_ms) {
mainloop_timer_start(t);
}
return last;
}
mainloop_timer_t *
mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata)
{
mainloop_timer_t *t = calloc(1, sizeof(mainloop_timer_t));
if(t) {
if(name) {
t->name = g_strdup_printf("%s-%u-%d", name, period_ms, repeat);
} else {
t->name = g_strdup_printf("%p-%u-%d", t, period_ms, repeat);
}
t->id = 0;
t->period_ms = period_ms;
t->repeat = repeat;
t->cb = cb;
t->userdata = userdata;
- crm_trace("Created timer %s", t->name);
+ crm_trace("Created timer %s with %p %p", t->name, userdata, t->userdata);
}
return t;
}
void
mainloop_timer_del(mainloop_timer_t *t)
{
if(t) {
crm_trace("Destroying timer %s", t->name);
mainloop_timer_stop(t);
free(t->name);
free(t);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:09 PM (21 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1981178
Default Alt Text
(81 KB)

Event Timeline