Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/crmd/pengine.c b/crmd/pengine.c
index 5546d7e3f2..2f3eba85b3 100644
--- a/crmd/pengine.c
+++ b/crmd/pengine.c
@@ -1,295 +1,298 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crmd_fsa.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h> /* for access */
#include <sys/types.h> /* for calls to open */
#include <sys/stat.h> /* for calls to open */
#include <fcntl.h> /* for calls to open */
#include <pwd.h> /* for getpwuid */
#include <grp.h> /* for initgroups */
#include <sys/time.h> /* for getrlimit */
#include <sys/resource.h> /* for getrlimit */
#include <errno.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crm/cib.h>
#include <crmd.h>
struct crm_subsystem_s *pe_subsystem = NULL;
void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
static void
save_cib_contents(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
char *id = user_data;
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
CRM_CHECK(id != NULL, return);
if (rc == pcmk_ok) {
int len = 15;
char *filename = NULL;
len += strlen(id);
len += strlen(PE_STATE_DIR);
filename = calloc(1, len);
CRM_CHECK(filename != NULL, return);
sprintf(filename, PE_STATE_DIR "/pe-core-%s.bz2", id);
if (write_xml_file(output, filename, TRUE) < 0) {
crm_err("Could not save CIB contents after PE crash to %s", filename);
} else {
crm_notice("Saved CIB contents after PE crash to %s", filename);
}
free(filename);
}
free(id);
}
static void
pe_ipc_destroy(gpointer user_data)
{
clear_bit(fsa_input_register, pe_subsystem->flag_connected);
if (is_set(fsa_input_register, pe_subsystem->flag_required)) {
int rc = pcmk_ok;
char *uuid_str = crm_generate_uuid();
crm_crit("Connection to the Policy Engine failed (pid=%d, uuid=%s)",
pe_subsystem->pid, uuid_str);
/*
*The PE died...
*
* Save the current CIB so that we have a chance of
* figuring out what killed it.
*
* Delay raising the I_ERROR until the query below completes or
* 5s is up, whichever comes first.
*
*/
rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);
} else {
crm_info("Connection to the Policy Engine released");
}
pe_subsystem->pid = -1;
pe_subsystem->source = NULL;
pe_subsystem->client = NULL;
mainloop_set_trigger(fsa_source);
return;
}
static int
pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
{
xmlNode *msg = string2xml(buffer);
if (msg) {
route_message(C_IPC_MESSAGE, msg);
}
free_xml(msg);
return 0;
}
/* A_PE_START, A_PE_STOP, A_TE_RESTART */
void
do_pe_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
struct crm_subsystem_s *this_subsys = pe_subsystem;
long long stop_actions = A_PE_STOP;
long long start_actions = A_PE_START;
static struct ipc_client_callbacks pe_callbacks = {
.dispatch = pe_ipc_dispatch,
.destroy = pe_ipc_destroy
};
if (action & stop_actions) {
clear_bit(fsa_input_register, pe_subsystem->flag_required);
mainloop_del_ipc_client(pe_subsystem->source);
pe_subsystem->source = NULL;
clear_bit(fsa_input_register, pe_subsystem->flag_connected);
}
if ((action & start_actions) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) {
if (cur_state != S_STOPPING) {
set_bit(fsa_input_register, pe_subsystem->flag_required);
pe_subsystem->source =
mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, G_PRIORITY_DEFAULT,
5 * 1024 * 1024 /* 5Mb */ , NULL, &pe_callbacks);
if (pe_subsystem->source == NULL) {
crm_warn("Setup of client connection failed, not adding channel to mainloop");
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
return;
}
/* if (is_openais_cluster()) { */
/* pe_subsystem->pid = pe_subsystem->ipc->farside_pid; */
/* } */
set_bit(fsa_input_register, pe_subsystem->flag_connected);
} else {
crm_info("Ignoring request to start %s while shutting down", this_subsys->name);
}
}
}
int fsa_pe_query = 0;
char *fsa_pe_ref = NULL;
/* A_PE_INVOKE */
void
do_pe_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (AM_I_DC == FALSE) {
crm_err("Not DC: No need to invoke the PE (anymore): %s", fsa_action2string(action));
return;
}
if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Cannot shut down gracefully without the PE");
register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
} else {
crm_info("Waiting for the PE to connect");
crmd_fsa_stall(FALSE);
register_fsa_action(A_PE_START);
}
return;
}
if (cur_state != S_POLICY_ENGINE) {
crm_notice("No need to invoke the PE in state %s", fsa_state2string(cur_state));
return;
}
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
crm_err("Attempted to invoke the PE without a consistent copy of the CIB!");
/* start the join from scratch */
register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
return;
}
fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
fsa_state2string(fsa_state));
/* Make sure any queued calculations are discarded */
free(fsa_pe_ref);
fsa_pe_ref = NULL;
fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
}
void
do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
int sent;
xmlNode *cmd = NULL;
if (rc != pcmk_ok) {
crm_err("Cant retrieve the CIB: %s (call %d)", pcmk_strerror(rc), call_id);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
return;
} else if (call_id != fsa_pe_query) {
crm_trace("Skipping superceeded CIB query: %d (current=%d)", call_id, fsa_pe_query);
return;
} else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
crm_debug("No need to invoke the PE anymore");
return;
} else if (fsa_state != S_POLICY_ENGINE) {
crm_debug("Discarding PE request in state: %s", fsa_state2string(fsa_state));
return;
} else if (last_peer_update != 0) {
crm_debug("Re-asking for the CIB: peer update %d still pending", last_peer_update);
sleep(1);
register_fsa_action(A_PE_INVOKE);
return;
} else if (fsa_state != S_POLICY_ENGINE) {
crm_err("Invoking PE in state: %s", fsa_state2string(fsa_state));
return;
}
CRM_LOG_ASSERT(output != NULL);
+ /* refresh our remote-node cache when the pengine is invoked */
+ crm_remote_peer_cache_refresh(output);
+
crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
if (ever_had_quorum && crm_have_quorum == FALSE) {
crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
}
cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL);
free(fsa_pe_ref);
fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE);
sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem->source), cmd, 0, 0, NULL);
if (sent <= 0) {
crm_err("Could not contact the pengine: %d", sent);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
}
crm_debug("Invoking the PE: query=%d, ref=%s, seq=%llu, quorate=%d",
fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum);
free_xml(cmd);
}
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index 54fae0435f..239af6335e 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -1,474 +1,465 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crm/fencing/internal.h>
crm_trigger_t *stonith_reconnect = NULL;
GListPtr stonith_cleanup_list = NULL;
static gboolean
fail_incompletable_stonith(crm_graph_t * graph)
{
GListPtr lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GListPtr lpc2 = NULL;
synapse_t *synapse = (synapse_t *) lpc->data;
if (synapse->confirmed) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
crm_action_t *action = (crm_action_t *) lpc2->data;
if (action->type != action_type_crm || action->confirmed) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && safe_str_eq(task, CRM_OP_FENCE)) {
action->failed = TRUE;
last_action = action->xml;
update_graph(graph, action);
crm_notice("Failing action %d (%s): STONITHd terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("STONITHd failure resulted in un-runnable actions");
abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
{
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
/* cbchan will be garbage at this point, arrange for it to be reset */
if(stonith_api) {
stonith_api->state = stonith_disconnected;
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
#if SUPPORT_CMAN
# include <libfenced.h>
#endif
char *te_client_id = NULL;
#ifdef HAVE_SYS_REBOOT_H
# include <unistd.h>
# include <sys/reboot.h>
#endif
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
if(te_client_id == NULL) {
te_client_id = g_strdup_printf("%s.%d", crm_system_name, getpid());
}
if (st_event == NULL) {
crm_err("Notify data not found");
return;
}
if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
crm_crit("We were alegedly just fenced by %s for %s with %s!", st_event->executioner,
st_event->origin, st_event->device); /* Dumps blackbox if enabled */
qb_log_fini(); /* Try to get the above log message to disk - somehow */
/* Get out ASAP and do not come back up.
*
* Triggering a reboot is also not the worst idea either since
* the rest of the cluster thinks we're safely down
*/
#ifdef RB_HALT_SYSTEM
reboot(RB_HALT_SYSTEM);
#endif
/*
* If reboot() fails or is not supported, coming back up will
* probably lead to a situation where the other nodes set our
* status to 'lost' because of the fencing callback and will
* discard subsequent election votes with:
*
* Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
*
* So just stay dead, something is seriously messed up anyway.
*
*/
exit(100); /* None of our wrappers since we already called qb_log_fini() */
return;
}
if (st_event->result == pcmk_ok &&
safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
st_fail_count_reset(st_event->target);
}
crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
st_event->target, st_event->result == pcmk_ok ? "" : " not",
st_event->action,
st_event->executioner ? st_event->executioner : "<anyone>",
st_event->origin, pcmk_strerror(st_event->result), st_event->id,
st_event->client_origin ? st_event->client_origin : "<unknown>");
#if SUPPORT_CMAN
if (st_event->result == pcmk_ok && is_cman_cluster()) {
int local_rc = 0;
char *target_copy = strdup(st_event->target);
/* In case fenced hasn't noticed yet
*
* Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
*/
local_rc = fenced_external(target_copy);
if (local_rc != 0) {
crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
local_rc);
} else {
crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
}
free(target_copy);
}
#endif
if (st_event->result == pcmk_ok) {
crm_node_t *peer = crm_get_peer(0, st_event->target);
const char *uuid = crm_peer_uuid(peer);
gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
if(AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, st_event->target, uuid);
if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
/* Abort the current transition graph if it wasn't us
* that invoked stonith to fence someone
*/
crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
}
/* Assume it was our leader if we dont currently have one */
} else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
crm_notice("Target %s our leader %s (recorded: %s)",
fsa_our_dc ? "was" : "may have been", st_event->target,
fsa_our_dc ? fsa_our_dc : "<unset>");
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (we_are_executioner) {
send_stonith_update(NULL, st_event->target, uuid);
}
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));
}
/* Everyone records them as safely down */
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL);
crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0);
crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
crm_update_peer_join(__FUNCTION__, peer, crm_join_none);
}
}
gboolean
te_connect_stonith(gpointer user_data)
{
int lpc = 0;
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Still connected");
return TRUE;
}
for (lpc = 0; lpc < 30; lpc++) {
crm_debug("Attempting connection to fencing daemon...");
sleep(1);
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (rc == pcmk_ok) {
break;
}
if (user_data != NULL) {
crm_err("Sign-in failed: triggered a retry");
mainloop_set_trigger(stonith_reconnect);
return TRUE;
}
crm_err("Sign-in failed: pausing and trying again in 2s...");
sleep(1);
}
CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */
stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
tengine_stonith_notify);
crm_trace("Connected");
return TRUE;
}
gboolean
stop_te_timer(crm_action_timer_t * timer)
{
const char *timer_desc = "action timer";
if (timer == NULL) {
return FALSE;
}
if (timer->reason == timeout_abort) {
timer_desc = "global timer";
crm_trace("Stopping %s", timer_desc);
}
if (timer->source_id != 0) {
crm_trace("Stopping %s", timer_desc);
g_source_remove(timer->source_id);
timer->source_id = 0;
} else {
crm_trace("%s was already stopped", timer_desc);
return FALSE;
}
return TRUE;
}
gboolean
te_graph_trigger(gpointer user_data)
{
enum transition_status graph_rc = -1;
if (transition_graph == NULL) {
crm_debug("Nothing to do");
return TRUE;
}
crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
return TRUE;
break;
default:
break;
}
if (transition_graph->complete == FALSE) {
graph_rc = run_graph(transition_graph);
print_graph(LOG_DEBUG_3, transition_graph);
if (graph_rc == transition_active) {
crm_trace("Transition not yet complete");
return TRUE;
} else if (graph_rc == transition_pending) {
crm_trace("Transition not yet complete - no actions fired");
return TRUE;
}
if (graph_rc != transition_complete) {
crm_warn("Transition failed: %s", transition_status(graph_rc));
print_graph(LOG_NOTICE, transition_graph);
}
}
crm_debug("Transition %d is now complete", transition_graph->id);
transition_graph->complete = TRUE;
notify_crmd(transition_graph);
return TRUE;
}
void
trigger_graph_processing(const char *fn, int line)
{
crm_trace("%s:%d - Triggered graph processing", fn, line);
mainloop_set_trigger(transition_trigger);
}
void
abort_transition_graph(int abort_priority, enum transition_action abort_action,
const char *abort_text, xmlNode * reason, const char *fn, int line)
{
const char *magic = NULL;
CRM_CHECK(transition_graph != NULL, return);
if (reason) {
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
const char *uname = "";
xmlNode *search = reason;
xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2);
magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
while(search) {
const char *kind = TYPE(search);
if (safe_str_eq(XML_CIB_TAG_STATE, kind)
|| safe_str_eq(XML_CIB_TAG_NODE, kind)) {
- if (crm_is_true(crm_element_value(search, XML_NODE_IS_REMOTE))) {
- /* Remote node uname and uuids are the same.
- * We also don't want them to be present in the
- * peer cache, so we shouldn't look them up with
- * crm_peer_uname()
- */
- uname = ID(search);
- } else {
- uname = crm_peer_uname(ID(search));
- }
+ uname = crm_peer_uname(ID(search));
break;
}
search = search->parent;
}
if (diff) {
cib_diff_version_details(diff,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) {
crm_info
("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s",
fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), NAME(reason),
VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch,
diff_add_updates, abort_text);
} else {
crm_info
("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s",
fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason),
magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates,
abort_text);
}
} else {
crm_info
("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s) : %s",
fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason),
magic ? magic : "NA", abort_text);
}
} else {
crm_info("%s:%d - Triggered transition abort (complete=%d) : %s",
fn, line, transition_graph->complete, abort_text);
}
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
crm_info("Abort suppressed: state=%s (complete=%d)",
fsa_state2string(fsa_state), transition_graph->complete);
return;
default:
break;
}
if (magic == NULL && reason != NULL) {
crm_log_xml_debug(reason, "Cause");
}
/* Make sure any queued calculations are discarded ASAP */
free(fsa_pe_ref);
fsa_pe_ref = NULL;
if (transition_graph->complete) {
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
}
return;
}
update_abort_priority(transition_graph, abort_priority, abort_action, abort_text);
mainloop_set_trigger(transition_trigger);
}
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index e645abb8ea..219d4dfbe9 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -1,203 +1,222 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_COMMON_CLUSTER__H
# define CRM_COMMON_CLUSTER__H
# include <crm/common/xml.h>
# include <crm/common/util.h>
# if SUPPORT_HEARTBEAT
# include <heartbeat/hb_api.h>
# include <ocf/oc_event.h>
# endif
# if SUPPORT_COROSYNC
# include <corosync/cpg.h>
# endif
extern gboolean crm_have_quorum;
extern GHashTable *crm_peer_cache;
+extern GHashTable *crm_remote_peer_cache;
extern unsigned long long crm_peer_seq;
# ifndef CRM_SERVICE
# define CRM_SERVICE PCMK_SERVICE_ID
# endif
/* *INDENT-OFF* */
#define CRM_NODE_LOST "lost"
#define CRM_NODE_MEMBER "member"
#define CRM_NODE_ACTIVE CRM_NODE_MEMBER
#define CRM_NODE_EVICTED "evicted"
enum crm_join_phase
{
crm_join_nack = -1,
crm_join_none = 0,
crm_join_welcomed = 1,
crm_join_integrated = 2,
crm_join_finalized = 3,
crm_join_confirmed = 4,
};
/* *INDENT-ON* */
+enum crm_node_flags
+{
+ crm_remote_node = 0x0001,
+};
typedef struct crm_peer_node_s {
uint32_t id; /* Only used by corosync derivatives */
uint64_t born; /* Only used by heartbeat and the legacy plugin */
uint64_t last_seen;
- uint64_t flags; /* Unused, but might be a good place to specify 'remote' */
+ uint64_t flags; /* Specified by crm_node_flags enum */
int32_t votes; /* Only used by the legacy plugin */
uint32_t processes;
enum crm_join_phase join;
char *uname;
char *uuid;
char *state;
char *expected;
char *addr; /* Only used by the legacy plugin */
char *version; /* Unused */
} crm_node_t;
void crm_peer_init(void);
void crm_peer_destroy(void);
typedef struct crm_cluster_s {
char *uuid;
char *uname;
uint32_t nodeid;
void (*destroy) (gpointer);
# if SUPPORT_HEARTBEAT
ll_cluster_t *hb_conn;
void (*hb_dispatch) (HA_Message * msg, void *private);
# endif
# if SUPPORT_COROSYNC
struct cpg_name group;
cpg_callbacks_t cpg;
cpg_handle_t cpg_handle;
# endif
} crm_cluster_t;
gboolean crm_cluster_connect(crm_cluster_t * cluster);
void crm_cluster_disconnect(crm_cluster_t * cluster);
/* *INDENT-OFF* */
enum crm_ais_msg_class {
crm_class_cluster = 0,
crm_class_members = 1,
crm_class_notify = 2,
crm_class_nodeid = 3,
crm_class_rmpeer = 4,
crm_class_quorum = 5,
};
/* order here matters - its used to index into the crm_children array */
enum crm_ais_msg_types {
crm_msg_none = 0,
crm_msg_ais = 1,
crm_msg_lrmd = 2,
crm_msg_cib = 3,
crm_msg_crmd = 4,
crm_msg_attrd = 5,
crm_msg_stonithd = 6,
crm_msg_te = 7,
crm_msg_pe = 8,
crm_msg_stonith_ng = 9,
};
+
+/* used with crm_get_peer_full */
+enum crm_get_peer_flags {
+ CRM_GET_PEER_CLUSTER = 0x0001,
+ CRM_GET_PEER_REMOTE = 0x0002,
+};
/* *INDENT-ON* */
gboolean send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service,
xmlNode * data, gboolean ordered);
+
+/* Initialize and refresh the remote peer cache from a cib config */
+void crm_remote_peer_cache_refresh(xmlNode *cib);
+
+/* allows filtering of remote and cluster nodes using crm_get_peer_flags */
+crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags);
+
+/* only searches cluster nodes */
crm_node_t *crm_get_peer(unsigned int id, const char *uname);
guint crm_active_peers(void);
gboolean crm_is_peer_active(const crm_node_t * node);
guint reap_crm_member(uint32_t id, const char *name);
int crm_terminate_member(int nodeid, const char *uname, void *unused);
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection);
# if SUPPORT_HEARTBEAT
gboolean crm_is_heartbeat_peer_active(const crm_node_t * node);
# endif
# if SUPPORT_COROSYNC
extern int ais_fd_sync;
uint32_t get_local_nodeid(cpg_handle_t handle);
gboolean cluster_connect_cpg(crm_cluster_t *cluster);
void cluster_disconnect_cpg(crm_cluster_t * cluster);
void pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries);
gboolean crm_is_corosync_peer_active(const crm_node_t * node);
gboolean send_cluster_text(int class, const char *data, gboolean local,
crm_node_t * node, enum crm_ais_msg_types dest);
# endif
const char *crm_peer_uuid(crm_node_t *node);
const char *crm_peer_uname(const char *uuid);
void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node);
enum crm_status_type {
crm_status_uname,
crm_status_nstate,
crm_status_processes,
};
enum crm_ais_msg_types text2msg_type(const char *text);
void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *));
/* *INDENT-OFF* */
enum cluster_type_e
{
pcmk_cluster_unknown = 0x0001,
pcmk_cluster_invalid = 0x0002,
pcmk_cluster_heartbeat = 0x0004,
pcmk_cluster_classic_ais = 0x0010,
pcmk_cluster_corosync = 0x0020,
pcmk_cluster_cman = 0x0040,
};
/* *INDENT-ON* */
enum cluster_type_e get_cluster_type(void);
const char *name_for_cluster_type(enum cluster_type_e type);
gboolean is_corosync_cluster(void);
gboolean is_cman_cluster(void);
gboolean is_openais_cluster(void);
gboolean is_classic_ais_cluster(void);
gboolean is_heartbeat_cluster(void);
const char *get_local_node_name(void);
char *get_node_name(uint32_t nodeid);
# if SUPPORT_COROSYNC
char *pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *msg,
uint32_t *kind, const char **from);
# endif
#endif
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index 5820c8df2a..5b743f953a 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -1,632 +1,637 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <dlfcn.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
CRM_TRACE_INIT_DATA(cluster);
#if SUPPORT_HEARTBEAT
void *hb_library = NULL;
#endif
static char *
get_heartbeat_uuid(const char *uname)
{
char *uuid_calc = NULL;
#if SUPPORT_HEARTBEAT
cl_uuid_t uuid_raw;
const char *unknown = "00000000-0000-0000-0000-000000000000";
if (heartbeat_cluster == NULL) {
crm_warn("No connection to heartbeat, using uuid=uname");
return NULL;
} else if(uname == NULL) {
return NULL;
}
if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) ==
HA_FAIL) {
crm_err("get_uuid_by_name() call failed for host %s", uname);
free(uuid_calc);
return NULL;
}
uuid_calc = calloc(1, 50);
cl_uuid_unparse(&uuid_raw, uuid_calc);
if (safe_str_eq(uuid_calc, unknown)) {
crm_warn("Could not calculate UUID for %s", uname);
free(uuid_calc);
return NULL;
}
#endif
return uuid_calc;
}
static gboolean
uname_is_uuid(void)
{
static const char *uuid_pref = NULL;
if (uuid_pref == NULL) {
uuid_pref = getenv("PCMK_uname_is_uuid");
}
if (uuid_pref == NULL) {
/* true is legacy mode */
uuid_pref = "false";
}
return crm_is_true(uuid_pref);
}
int
get_corosync_id(int id, const char *uuid)
{
if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) {
id = crm_atoi(uuid, "0");
}
return id;
}
char *
get_corosync_uuid(crm_node_t *node)
{
if(node == NULL) {
return NULL;
} else if (!uname_is_uuid() && is_corosync_cluster()) {
if (node->id > 0) {
int len = 32;
char *buffer = NULL;
buffer = calloc(1, (len + 1));
if (buffer != NULL) {
snprintf(buffer, len, "%u", node->id);
}
return buffer;
} else {
crm_info("Node %s is not yet known by corosync", node->uname);
}
} else if (node->uname != NULL) {
return strdup(node->uname);
}
return NULL;
}
const char *
crm_peer_uuid(crm_node_t *peer)
{
char *uuid = NULL;
enum cluster_type_e type = get_cluster_type();
/* avoid blocking heartbeat calls where possible */
if(peer == NULL) {
return NULL;
} else if (peer->uuid) {
return peer->uuid;
}
switch (type) {
case pcmk_cluster_corosync:
uuid = get_corosync_uuid(peer);
break;
case pcmk_cluster_cman:
case pcmk_cluster_classic_ais:
if (peer->uname) {
uuid = strdup(peer->uname);
}
break;
case pcmk_cluster_heartbeat:
uuid = get_heartbeat_uuid(peer->uname);
break;
case pcmk_cluster_unknown:
case pcmk_cluster_invalid:
crm_err("Unsupported cluster type");
break;
}
peer->uuid = uuid;
return peer->uuid;
}
gboolean
crm_cluster_connect(crm_cluster_t * cluster)
{
enum cluster_type_e type = get_cluster_type();
crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type));
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
crm_peer_init();
return init_cs_connection(cluster);
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
int rv;
/* coverity[var_deref_op] False positive */
if (cluster->hb_conn == NULL) {
/* No object passed in, create a new one. */
ll_cluster_t *(*new_cluster) (const char *llctype) =
find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
cluster->hb_conn = (*new_cluster) ("heartbeat");
/* dlclose(handle); */
} else {
/* Object passed in. Disconnect first, then reconnect below. */
cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE);
}
/* make sure we are disconnected first with the old object, if any. */
if (heartbeat_cluster && heartbeat_cluster != cluster->hb_conn) {
heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE);
}
CRM_ASSERT(cluster->hb_conn != NULL);
heartbeat_cluster = cluster->hb_conn;
rv = register_heartbeat_conn(cluster);
if (rv) {
/* we'll benefit from a bigger queue length on heartbeat side.
* Otherwise, if peers send messages faster than we can consume
* them right now, heartbeat messaging layer will kick us out once
* it's (small) default queue fills up :(
* If we fail to adjust the sendq length, that's not yet fatal, though.
*/
if (HA_OK != heartbeat_cluster->llc_ops->set_sendq_len(heartbeat_cluster, 1024)) {
crm_warn("Cannot set sendq length: %s",
heartbeat_cluster->llc_ops->errmsg(heartbeat_cluster));
}
}
return rv;
}
#endif
crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
return FALSE;
}
void
crm_cluster_disconnect(crm_cluster_t * cluster)
{
enum cluster_type_e type = get_cluster_type();
const char *type_str = name_for_cluster_type(type);
crm_info("Disconnecting from cluster infrastructure: %s", type_str);
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
crm_peer_destroy();
terminate_cs_connection(cluster);
crm_info("Disconnected from %s", type_str);
return;
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
if (cluster == NULL) {
crm_info("No cluster connection");
return;
} else if (cluster->hb_conn) {
cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE);
cluster->hb_conn = NULL;
crm_info("Disconnected from %s", type_str);
return;
} else {
crm_info("No %s connection", type_str);
return;
}
}
#endif
crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
}
gboolean
send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode * data,
gboolean ordered)
{
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
return send_cluster_message_cs(data, FALSE, node, service);
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
return send_ha_message(heartbeat_cluster, data, node ? node->uname : NULL, ordered);
}
#endif
return FALSE;
}
const char *
get_local_node_name(void)
{
static char *name = NULL;
if(name) {
return name;
}
name = get_node_name(0);
return name;
}
char *
get_node_name(uint32_t nodeid)
{
char *name = NULL;
enum cluster_type_e stack = get_cluster_type();
switch (stack) {
case pcmk_cluster_heartbeat:
break;
#if SUPPORT_PLUGIN
case pcmk_cluster_classic_ais:
name = classic_node_name(nodeid);
break;
#else
# if SUPPORT_COROSYNC
case pcmk_cluster_corosync:
name = corosync_node_name(0, nodeid);
break;
# endif
#endif
#if SUPPORT_CMAN
case pcmk_cluster_cman:
name = cman_node_name(nodeid);
break;
#endif
default:
crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack);
}
if(name == NULL && nodeid == 0) {
struct utsname res;
int rc = uname(&res);
if (rc == 0) {
crm_notice("Defaulting to uname -n for the local %s node name",
name_for_cluster_type(stack));
name = strdup(res.nodename);
}
if (name == NULL) {
crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack));
crm_exit(DAEMON_RESPAWN_STOP);
}
}
if (name == NULL) {
crm_notice("Could not obtain a node name for %s nodeid %u",
name_for_cluster_type(stack), nodeid);
}
return name;
}
/* Only used by update_failcount() in te_utils.c */
const char *
crm_peer_uname(const char *uuid)
{
GHashTableIter iter;
crm_node_t *node = NULL;
CRM_CHECK(uuid != NULL, return NULL);
+ /* remote nodes have the same uname and uuid */
+ if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
+ return uuid;
+ }
+
/* avoid blocking calls where possible */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->uuid && strcasecmp(node->uuid, uuid) == 0) {
if(node->uname) {
return node->uname;
}
break;
}
}
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
if (uname_is_uuid() == FALSE && is_corosync_cluster()) {
uint32_t id = crm_int_helper(uuid, NULL);
node = crm_get_peer(id, NULL);
} else {
node = crm_get_peer(0, uuid);
}
if (node) {
crm_info("Setting uuid for node %s[%u] to '%s'", node->uname, node->id, uuid);
node->uuid = strdup(uuid);
if(node->uname) {
return node->uname;
}
}
return NULL;
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
if (heartbeat_cluster != NULL) {
cl_uuid_t uuid_raw;
char *uuid_copy = strdup(uuid);
char *uname = malloc(MAX_NAME);
cl_uuid_parse(uuid_copy, &uuid_raw);
if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname,
MAX_NAME) == HA_FAIL) {
crm_err("Could not calculate uname for %s", uuid);
} else {
node = crm_get_peer(0, uname);
}
free(uuid_copy);
free(uname);
}
if (node) {
crm_info("Setting uuid for node %s to '%s'", node->uname, uuid);
node->uuid = strdup(uuid);
if(node->uname) {
return node->uname;
}
}
return NULL;
}
#endif
return NULL;
}
void
set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
{
const char *uuid_calc = crm_peer_uuid(node);
crm_xml_add(xml, attr, uuid_calc);
return;
}
const char *
name_for_cluster_type(enum cluster_type_e type)
{
switch (type) {
case pcmk_cluster_classic_ais:
return "classic openais (with plugin)";
case pcmk_cluster_cman:
return "cman";
case pcmk_cluster_corosync:
return "corosync";
case pcmk_cluster_heartbeat:
return "heartbeat";
case pcmk_cluster_unknown:
return "unknown";
case pcmk_cluster_invalid:
return "invalid";
}
crm_err("Invalid cluster type: %d", type);
return "invalid";
}
/* Do not expose these two */
int set_cluster_type(enum cluster_type_e type);
static enum cluster_type_e cluster_type = pcmk_cluster_unknown;
int
set_cluster_type(enum cluster_type_e type)
{
if (cluster_type == pcmk_cluster_unknown) {
crm_info("Cluster type set to: %s", name_for_cluster_type(type));
cluster_type = type;
return 0;
} else if (cluster_type == type) {
return 0;
} else if (pcmk_cluster_unknown == type) {
cluster_type = type;
return 0;
}
crm_err("Cluster type already set to %s, ignoring %s",
name_for_cluster_type(cluster_type), name_for_cluster_type(type));
return -1;
}
enum cluster_type_e
get_cluster_type(void)
{
bool detected = FALSE;
const char *cluster = NULL;
/* Return the previous calculation, if any */
if (cluster_type != pcmk_cluster_unknown) {
return cluster_type;
}
cluster = getenv("HA_cluster_type");
#if SUPPORT_HEARTBEAT
/* If nothing is defined in the environment, try heartbeat (if supported) */
if(cluster == NULL) {
ll_cluster_t *hb;
ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(
&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
hb = (*new_cluster) ("heartbeat");
crm_debug("Testing with Heartbeat");
if (hb->llc_ops->signon(hb, crm_system_name) == HA_OK) {
hb->llc_ops->signoff(hb, FALSE);
cluster_type = pcmk_cluster_heartbeat;
detected = TRUE;
goto done;
}
}
#endif
#if SUPPORT_COROSYNC
/* If nothing is defined in the environment, try corosync (if supported) */
if(cluster == NULL) {
crm_debug("Testing with Corosync");
cluster_type = find_corosync_variant();
if (cluster_type != pcmk_cluster_unknown) {
detected = TRUE;
goto done;
}
}
#endif
/* Something was defined in the environment, test it against what we support */
crm_info("Verifying cluster type: '%s'", cluster?cluster:"-unspecified-");
if (cluster == NULL) {
#if SUPPORT_HEARTBEAT
} else if (safe_str_eq(cluster, "heartbeat")) {
cluster_type = pcmk_cluster_heartbeat;
#endif
#if SUPPORT_COROSYNC
} else if (safe_str_eq(cluster, "openais")
|| safe_str_eq(cluster, "classic openais (with plugin)")) {
cluster_type = pcmk_cluster_classic_ais;
} else if (safe_str_eq(cluster, "corosync")) {
cluster_type = pcmk_cluster_corosync;
#endif
#if SUPPORT_CMAN
} else if (safe_str_eq(cluster, "cman")) {
cluster_type = pcmk_cluster_cman;
#endif
} else {
cluster_type = pcmk_cluster_invalid;
goto done; /* Keep the compiler happy when no stacks are supported */
}
done:
if (cluster_type == pcmk_cluster_unknown) {
crm_notice("Could not determin the current cluster type");
} else if (cluster_type == pcmk_cluster_invalid) {
crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.",
cluster);
crm_exit(DAEMON_RESPAWN_STOP);
} else {
crm_info("%s an active '%s' cluster", detected?"Detected":"Assuming", name_for_cluster_type(cluster_type));
}
return cluster_type;
}
gboolean
is_cman_cluster(void)
{
return get_cluster_type() == pcmk_cluster_cman;
}
gboolean
is_corosync_cluster(void)
{
return get_cluster_type() == pcmk_cluster_corosync;
}
gboolean
is_classic_ais_cluster(void)
{
return get_cluster_type() == pcmk_cluster_classic_ais;
}
gboolean
is_openais_cluster(void)
{
enum cluster_type_e type = get_cluster_type();
if (type == pcmk_cluster_classic_ais) {
return TRUE;
} else if (type == pcmk_cluster_corosync) {
return TRUE;
} else if (type == pcmk_cluster_cman) {
return TRUE;
}
return FALSE;
}
gboolean
is_heartbeat_cluster(void)
{
return get_cluster_type() == pcmk_cluster_heartbeat;
}
gboolean
node_name_is_valid(const char *key, const char *name)
{
int octet;
if (name == NULL) {
crm_trace("%s is empty", key);
return FALSE;
} else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
crm_trace("%s contains an ipv4 address, ignoring: %s", key, name);
return FALSE;
} else if (strstr(name, ":") != NULL) {
crm_trace("%s contains an ipv6 address, ignoring: %s", key, name);
return FALSE;
}
crm_trace("%s is valid", key);
return TRUE;
}
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index bc1684e23b..e82cadc85e 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -1,510 +1,573 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/param.h>
#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <glib.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/msg_xml.h>
#include <crm/stonith-ng.h>
GHashTable *crm_peer_cache = NULL;
+GHashTable *crm_remote_peer_cache = NULL;
unsigned long long crm_peer_seq = 0;
gboolean crm_have_quorum = FALSE;
+void crm_remote_peer_cache_refresh(xmlNode *cib)
+{
+ crm_node_t *node = NULL;
+ xmlXPathObjectPtr xpathObj = NULL;
+ const char *remote = NULL;
+ const char *xpath = NULL;
+ int max = 0;
+ int lpc = 0;
+
+ g_hash_table_remove_all(crm_remote_peer_cache);
+
+ xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR "[@name='remote-node']";
+ xpathObj = xpath_search(cib, xpath);
+ max = numXpathResults(xpathObj);
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *xml = getXpathResult(xpathObj, lpc);
+
+ CRM_CHECK(xml != NULL, continue);
+
+ remote = crm_element_value(xml, "value");
+ if (remote) {
+ crm_trace("added %s to remote cache", remote);
+ node = calloc(1, sizeof(crm_node_t));
+ node->flags = crm_remote_node;
+ CRM_ASSERT(node);
+ node->uname = strdup(remote);
+ node->uuid = strdup(remote);
+ g_hash_table_replace(crm_remote_peer_cache, node->uname, node);
+ }
+ }
+ freeXpathObject(xpathObj);
+}
+
gboolean
crm_is_peer_active(const crm_node_t * node)
{
if(node == NULL) {
return FALSE;
}
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
return crm_is_corosync_peer_active(node);
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
return crm_is_heartbeat_peer_active(node);
}
#endif
crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
return FALSE;
}
static gboolean
crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
crm_node_t *search = user_data;
if (search == NULL) {
return FALSE;
} else if (search->id && node->id != search->id) {
return FALSE;
} else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
return FALSE;
} else if (crm_is_peer_active(value) == FALSE) {
crm_notice("Removing %s/%u from the membership list", node->uname, node->id);
return TRUE;
}
return FALSE;
}
guint
reap_crm_member(uint32_t id, const char *name)
{
int matches = 0;
crm_node_t search;
if (crm_peer_cache == NULL) {
crm_trace("Nothing to do, cache not initialized");
return 0;
}
search.id = id;
search.uname = strdup(name);
matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
if(matches) {
crm_notice("Purged %d peers with id=%u and/or uname=%s from the membership cache", matches, id, name);
} else {
crm_info("No peers with id=%u and/or uname=%s exist", id, name);
}
free(search.uname);
return matches;
}
static void
crm_count_peer(gpointer key, gpointer value, gpointer user_data)
{
guint *count = user_data;
crm_node_t *node = value;
if (crm_is_peer_active(node)) {
*count = *count + 1;
}
}
guint
crm_active_peers(void)
{
guint count = 0;
if (crm_peer_cache) {
g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
}
return count;
}
static void
destroy_crm_node(gpointer data)
{
crm_node_t *node = data;
crm_trace("Destroying entry for node %u", node->id);
free(node->addr);
free(node->uname);
free(node->state);
free(node->uuid);
free(node->expected);
free(node);
}
void
crm_peer_init(void)
{
if (crm_peer_cache == NULL) {
crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_crm_node);
}
+
+ if (crm_remote_peer_cache == NULL) {
+ crm_remote_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node);
+ }
}
void
crm_peer_destroy(void)
{
if (crm_peer_cache != NULL) {
crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
g_hash_table_destroy(crm_peer_cache);
crm_peer_cache = NULL;
}
+
+ if (crm_remote_peer_cache != NULL) {
+ crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
+ g_hash_table_destroy(crm_remote_peer_cache);
+ crm_remote_peer_cache = NULL;
+ }
}
void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
void
crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
{
crm_status_callback = dispatch;
}
static void crm_dump_peer_hash(int level, const char *caller)
{
GHashTableIter iter;
const char *id = NULL;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
}
}
static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
{
if(value == user_data) {
return TRUE;
}
return FALSE;
}
+crm_node_t *
+crm_get_peer_full(unsigned int id, const char *uname, int flags)
+{
+ crm_node_t *node = NULL;
+
+ CRM_ASSERT(id > 0 || uname != NULL);
+
+ crm_peer_init();
+
+ if (flags & CRM_GET_PEER_REMOTE) {
+ node = g_hash_table_lookup(crm_remote_peer_cache, uname);
+ }
+
+ if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
+ node = crm_get_peer(id, uname);
+ }
+ return node;
+}
+
/* coverity[-alloc] Memory is referenced in one or both hashtables */
crm_node_t *
crm_get_peer(unsigned int id, const char *uname)
{
GHashTableIter iter;
crm_node_t *node = NULL;
crm_node_t *by_id = NULL;
crm_node_t *by_name = NULL;
CRM_ASSERT(id > 0 || uname != NULL);
crm_peer_init();
if (uname != NULL) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->uname && strcasecmp(node->uname, uname) == 0) {
crm_trace("Name match: %s = %p", node->uname, node);
by_name = node;
break;
}
}
}
if (id > 0) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->id == id) {
crm_trace("ID match: %u = %p", node->id, node);
by_id = node;
break;
}
}
}
node = by_id; /* Good default */
if(by_id == by_name) {
/* Nothing to do if they match (both NULL counts) */
crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
} else if(by_id == NULL && by_name) {
crm_trace("Only one: %p for %u/%s", by_name, id, uname);
if(id && by_name->id) {
crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
crm_crit("Node %u and %u share the same name '%s'",
id, by_name->id, uname);
node = NULL; /* Create a new one */
} else {
node = by_name;
}
} else if(by_name == NULL && by_id) {
crm_trace("Only one: %p for %u/%s", by_id, id, uname);
if(uname && by_id->uname) {
crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
uname, by_id->uname, id, uname);
}
} else if(uname && by_id->uname) {
crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u", by_id->uname, by_name->uname, id);
} else if(id && by_name->id) {
crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
} else {
/* Simple merge */
/* Only corosync based clusters use nodeid's
*
* The functions that call crm_update_peer_state() only know nodeid
* so 'by_id' is authorative when merging
*
* Same for crm_update_peer_proc()
*/
crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
crm_info("Merging %p into %p", by_name, by_id);
g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
}
if (node == NULL) {
char *uniqueid = crm_generate_uuid();
node = calloc(1, sizeof(crm_node_t));
CRM_ASSERT(node);
crm_info("Created entry %s/%p for node %s/%u (%d total)",
uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
g_hash_table_replace(crm_peer_cache, uniqueid, node);
}
if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
crm_info("Node %u is now known as %s", id, uname);
}
if(id > 0 && node->id == 0) {
node->id = id;
}
if(uname && node->uname == NULL) {
int lpc, len = strlen(uname);
for (lpc = 0; lpc < len; lpc++) {
if (uname[lpc] >= 'A' && uname[lpc] <= 'Z') {
crm_warn("Node names with capitals are discouraged, consider changing '%s' to something else",
uname);
break;
}
}
node->uname = strdup(uname);
if (crm_status_callback) {
crm_status_callback(crm_status_uname, node, NULL);
}
}
if(node->uuid == NULL) {
const char *uuid = crm_peer_uuid(node);
if (uuid) {
crm_info("Node %u has uuid %s", id, uuid);
} else {
crm_info("Cannot obtain a UUID for node %d/%s", id, node->uname);
}
}
return node;
}
crm_node_t *
crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
uint32_t children, const char *uuid, const char *uname, const char *addr,
const char *state)
{
#if SUPPORT_PLUGIN
gboolean addr_changed = FALSE;
gboolean votes_changed = FALSE;
#endif
crm_node_t *node = NULL;
id = get_corosync_id(id, uuid);
node = crm_get_peer(id, uname);
CRM_ASSERT(node != NULL);
if (node->uuid == NULL) {
if (is_openais_cluster()) {
/* Yes, overrule whatever was passed in */
crm_peer_uuid(node);
} else if (uuid != NULL) {
node->uuid = strdup(uuid);
}
}
if (children > 0) {
crm_update_peer_proc(source, node, children, state);
}
if (state != NULL) {
crm_update_peer_state(source, node, state, seen);
}
#if SUPPORT_HEARTBEAT
if (born != 0) {
node->born = born;
}
#endif
#if SUPPORT_PLUGIN
/* These were only used by the plugin */
if (born != 0) {
node->born = born;
}
if (votes > 0 && node->votes != votes) {
votes_changed = TRUE;
node->votes = votes;
}
if (addr != NULL) {
if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
addr_changed = TRUE;
free(node->addr);
node->addr = strdup(addr);
}
}
if (addr_changed || votes_changed) {
crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
" proc=%.32x", source, node->uname, node->id, node->state,
node->addr, addr_changed ? " (new)" : "", node->votes,
votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
}
#endif
return node;
}
void
crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
{
uint32_t last = 0;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
source, peer2text(flag), status); return);
last = node->processes;
if (status == NULL) {
node->processes = flag;
if (node->processes != last) {
changed = TRUE;
}
} else if (safe_str_eq(status, ONLINESTATUS)) {
if ((node->processes & flag) == 0) {
set_bit(node->processes, flag);
changed = TRUE;
}
#if SUPPORT_PLUGIN
} else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
if (flag > 0 && node->processes != flag) {
node->processes = flag;
changed = TRUE;
}
#endif
} else if (node->processes & flag) {
clear_bit(node->processes, flag);
changed = TRUE;
}
if (changed) {
if (status == NULL && flag <= crm_proc_none) {
crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
node->id);
} else {
crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
peer2text(flag), status);
}
if (crm_status_callback) {
crm_status_callback(crm_status_processes, node, &last);
}
} else {
crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
peer2text(flag), status);
}
}
void
crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
{
char *last = NULL;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
return);
last = node->expected;
if (expected != NULL && safe_str_neq(node->expected, expected)) {
node->expected = strdup(expected);
changed = TRUE;
}
if (changed) {
crm_info("%s: Node %s[%u] - expected state is now %s", source, node->uname, node->id,
expected);
free(last);
} else {
crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
node->id, expected);
}
}
void
crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
{
char *last = NULL;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set 'state' to %s", source, state);
return);
last = node->state;
if (state != NULL && safe_str_neq(node->state, state)) {
node->state = strdup(state);
changed = TRUE;
}
if (membership != 0 && safe_str_eq(node->state, CRM_NODE_MEMBER)) {
node->last_seen = membership;
}
if (changed) {
crm_notice("%s: Node %s[%u] - state is now %s (was %s)", source, node->uname, node->id, state, last);
if (crm_status_callback) {
crm_status_callback(crm_status_nstate, node, last);
}
free(last);
} else {
crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id,
state);
}
}
int
crm_terminate_member(int nodeid, const char *uname, void *unused)
{
/* Always use the synchronous, non-mainloop version */
return stonith_api_kick(nodeid, uname, 120, TRUE);
}
int
crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
{
return stonith_api_kick(nodeid, uname, 120, TRUE);
}
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index 37161dc40c..7aa3349247 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -1,2010 +1,2035 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <glib.h>
#include <dirent.h>
#include <crm/crm.h>
#include <crm/lrmd.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipcs.h>
#include <crm/msg_xml.h>
#include <crm/stonith-ng.h>
#ifdef HAVE_GNUTLS_GNUTLS_H
# undef KEYFILE
# include <gnutls/gnutls.h>
#endif
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <netdb.h>
CRM_TRACE_INIT_DATA(lrmd);
static int lrmd_api_disconnect(lrmd_t * lrmd);
static int lrmd_api_is_connected(lrmd_t * lrmd);
/* IPC proxy functions */
int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg);
void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
#ifdef HAVE_GNUTLS_GNUTLS_H
# define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */
gnutls_psk_client_credentials_t psk_cred_s;
int lrmd_tls_set_key(gnutls_datum_t * key);
static void lrmd_tls_disconnect(lrmd_t * lrmd);
static int global_remote_msg_id = 0;
int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type);
static void lrmd_tls_connection_destroy(gpointer userdata);
#endif
typedef struct lrmd_private_s {
enum client_type type;
char *token;
mainloop_io_t *source;
/* IPC parameters */
crm_ipc_t *ipc;
crm_remote_t *remote;
/* Extra TLS parameters */
char *remote_nodename;
#ifdef HAVE_GNUTLS_GNUTLS_H
char *server;
int port;
gnutls_psk_client_credentials_t psk_cred_c;
int sock;
+ /* since tls requires a round trip across the network for a
+ * request/reply, there are times where we just want to be able
+ * to send a request from the client and not wait around (or even care
+ * about) what the reply is. */
+ int expected_late_replies;
GList *pending_notify;
crm_trigger_t *process_notify;
#endif
lrmd_event_callback callback;
/* Internal IPC proxy msg passing for remote guests */
void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg);
void *proxy_callback_userdata;
} lrmd_private_t;
static lrmd_list_t *
lrmd_list_add(lrmd_list_t * head, const char *value)
{
lrmd_list_t *p, *end;
p = calloc(1, sizeof(lrmd_list_t));
p->val = strdup(value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
lrmd_list_freeall(lrmd_list_t * head)
{
lrmd_list_t *p;
while (head) {
char *val = (char *)head->val;
p = head->next;
free(val);
free(head);
head = p;
}
}
lrmd_key_value_t *
lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value)
{
lrmd_key_value_t *p, *end;
p = calloc(1, sizeof(lrmd_key_value_t));
p->key = strdup(key);
p->value = strdup(value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
lrmd_key_value_freeall(lrmd_key_value_t * head)
{
lrmd_key_value_t *p;
while (head) {
p = head->next;
free(head->key);
free(head->value);
free(head);
head = p;
}
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, strdup(key), strdup(value));
}
lrmd_event_data_t *
lrmd_copy_event(lrmd_event_data_t * event)
{
lrmd_event_data_t *copy = NULL;
copy = calloc(1, sizeof(lrmd_event_data_t));
/* This will get all the int values.
* we just have to be careful not to leave any
* dangling pointers to strings. */
memcpy(copy, event, sizeof(lrmd_event_data_t));
copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL;
copy->op_type = event->op_type ? strdup(event->op_type) : NULL;
copy->user_data = event->user_data ? strdup(event->user_data) : NULL;
copy->output = event->output ? strdup(event->output) : NULL;
copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL;
if (event->params) {
copy->params = g_hash_table_new_full(crm_str_hash,
g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
if (copy->params != NULL) {
g_hash_table_foreach(event->params, dup_attr, copy->params);
}
}
return copy;
}
void
lrmd_free_event(lrmd_event_data_t * event)
{
if (!event) {
return;
}
/* free gives me grief if i try to cast */
free((char *)event->rsc_id);
free((char *)event->op_type);
free((char *)event->user_data);
free((char *)event->output);
free((char *)event->remote_nodename);
if (event->params) {
g_hash_table_destroy(event->params);
}
free(event);
}
static int
lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
{
const char *type;
const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION);
lrmd_private_t *native = lrmd->private;
lrmd_event_data_t event = { 0, };
if (proxy_session != NULL) {
/* this is proxy business */
lrmd_internal_proxy_dispatch(lrmd, msg);
return 1;
- }
-
- if (!native->callback) {
+ } else if (!native->callback) {
/* no callback set */
crm_trace("notify event received but client has not set callback");
return 1;
}
event.remote_nodename = native->remote_nodename;
type = crm_element_value(msg, F_LRMD_OPERATION);
crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id);
event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID);
if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) {
event.type = lrmd_event_register;
} else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) {
event.type = lrmd_event_unregister;
} else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) {
crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout);
crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval);
crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay);
crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc);
crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status);
crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted);
crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run);
crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange);
crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time);
crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time);
event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION);
event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR);
event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT);
event.type = lrmd_event_exec_complete;
event.params = xml2list(msg);
} else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) {
event.type = lrmd_event_poke;
} else {
return 1;
}
crm_trace("op %s notify event received", type);
native->callback(&event);
if (event.params) {
g_hash_table_destroy(event.params);
}
return 1;
}
static int
lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
xmlNode *msg;
int rc;
if (!native->callback) {
/* no callback set */
return 1;
}
msg = string2xml(buffer);
rc = lrmd_dispatch_internal(lrmd, msg);
free_xml(msg);
return rc;
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_free_xml(gpointer userdata)
{
free_xml((xmlNode *) userdata);
}
static int
lrmd_tls_connected(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
if (native->remote->tls_session) {
return TRUE;
}
return FALSE;
}
static int
lrmd_tls_dispatch(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
xmlNode *xml = NULL;
int rc = 0;
int disconnected = 0;
if (lrmd_tls_connected(lrmd) == FALSE) {
crm_trace("tls dispatch triggered after disconnect");
return 0;
}
crm_trace("tls_dispatch triggered");
/* First check if there are any pending notifies to process that came
* while we were waiting for replies earlier. */
if (native->pending_notify) {
GList *iter = NULL;
crm_trace("Processing pending notifies");
for (iter = native->pending_notify; iter; iter = iter->next) {
lrmd_dispatch_internal(lrmd, iter->data);
}
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
/* Next read the current buffer and see if there are any messages to handle. */
rc = crm_remote_ready(native->remote, 0);
if (rc == 0) {
/* nothing to read, see if any full messages are already in buffer. */
xml = crm_remote_parse_buffer(native->remote);
} else if (rc < 0) {
disconnected = 1;
} else {
crm_remote_recv(native->remote, -1, &disconnected);
xml = crm_remote_parse_buffer(native->remote);
}
while (xml) {
- lrmd_dispatch_internal(lrmd, xml);
+ const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
+ if (safe_str_eq(msg_type, "notify")) {
+ lrmd_dispatch_internal(lrmd, xml);
+ } else if (safe_str_eq(msg_type, "reply")) {
+ if (native->expected_late_replies > 0) {
+ native->expected_late_replies--;
+ } else {
+ int reply_id = 0;
+ crm_element_value_int(xml, F_LRMD_CALLID, &reply_id);
+ /* if this happens, we want to know about it */
+ crm_err("Got outdated reply %d", reply_id);
+ }
+ }
free_xml(xml);
xml = crm_remote_parse_buffer(native->remote);
}
if (disconnected) {
crm_info("Server disconnected while reading remote server msg.");
lrmd_tls_disconnect(lrmd);
return 0;
}
return 1;
}
#endif
/* Not used with mainloop */
int
lrmd_poll(lrmd_t * lrmd, int timeout)
{
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
return crm_ipc_ready(native->ipc);
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
if (native->pending_notify) {
return 1;
} else if (native->remote->buffer
&& strstr(native->remote->buffer, REMOTE_MSG_TERMINATOR)) {
return 1;
}
return crm_remote_ready(native->remote, 0);
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return 0;
}
/* Not used with mainloop */
bool
lrmd_dispatch(lrmd_t * lrmd)
{
lrmd_private_t *private = NULL;
CRM_ASSERT(lrmd != NULL);
private = lrmd->private;
switch (private->type) {
case CRM_CLIENT_IPC:
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
lrmd_ipc_dispatch(msg, strlen(msg), lrmd);
}
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
lrmd_tls_dispatch(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", private->type);
}
if (lrmd_api_is_connected(lrmd) == FALSE) {
crm_err("Connection closed");
return FALSE;
}
return TRUE;
}
static xmlNode *
lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options)
{
xmlNode *op_msg = create_xml_node(NULL, "lrmd_command");
CRM_CHECK(op_msg != NULL, return NULL);
CRM_CHECK(token != NULL, return NULL);
crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command");
crm_xml_add(op_msg, F_TYPE, T_LRMD);
crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token);
crm_xml_add(op_msg, F_LRMD_OPERATION, op);
crm_trace("Sending call options: %.8lx, %d", (long)options, options);
crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options);
if (data != NULL) {
add_message_xml(op_msg, F_LRMD_CALLDATA, data);
}
return op_msg;
}
static void
lrmd_ipc_connection_destroy(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
crm_info("IPC connection destroyed");
/* Prevent these from being cleaned up in lrmd_api_disconnect() */
native->ipc = NULL;
native->source = NULL;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.type = lrmd_event_disconnect;
event.remote_nodename = native->remote_nodename;
native->callback(&event);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tls_connection_destroy(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
crm_info("TLS connection destroyed");
if (native->remote->tls_session) {
gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
}
if (native->psk_cred_c) {
gnutls_psk_free_client_credentials(native->psk_cred_c);
}
if (native->sock) {
close(native->sock);
}
if (native->process_notify) {
mainloop_destroy_trigger(native->process_notify);
native->process_notify = NULL;
}
if (native->pending_notify) {
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
free(native->remote->buffer);
native->remote->buffer = NULL;
native->source = 0;
native->sock = 0;
native->psk_cred_c = NULL;
native->remote->tls_session = NULL;
native->sock = 0;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.remote_nodename = native->remote_nodename;
event.type = lrmd_event_disconnect;
native->callback(&event);
}
return;
}
int
lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type)
{
int rc = -1;
crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id);
crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type);
rc = crm_remote_send(session, msg);
if (rc < 0) {
crm_err("Failed to send remote lrmd tls msg, rc = %d", rc);
return rc;
}
return rc;
}
static xmlNode *
lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected)
{
lrmd_private_t *native = lrmd->private;
xmlNode *xml = NULL;
time_t start = time(NULL);
const char *msg_type = NULL;
int reply_id = 0;
int remaining_timeout = 0;
/* A timeout of 0 here makes no sense. We have to wait a period of time
* for the response to come back. If -1 or 0, default to 10 seconds. */
if (total_timeout <= 0) {
total_timeout = 10000;
}
while (!xml) {
xml = crm_remote_parse_buffer(native->remote);
if (!xml) {
/* read some more off the tls buffer if we still have time left. */
if (remaining_timeout) {
remaining_timeout = remaining_timeout - ((time(NULL) - start) * 1000);
} else {
remaining_timeout = total_timeout;
}
if (remaining_timeout <= 0) {
return NULL;
}
crm_remote_recv(native->remote, remaining_timeout, disconnected);
xml = crm_remote_parse_buffer(native->remote);
if (!xml || *disconnected) {
return NULL;
}
}
CRM_ASSERT(xml != NULL);
crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id);
msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
if (!msg_type) {
crm_err("Empty msg type received while waiting for reply");
free_xml(xml);
xml = NULL;
} else if (safe_str_eq(msg_type, "notify")) {
/* got a notify while waiting for reply, trigger the notify to be processed later */
crm_info("queueing notify");
native->pending_notify = g_list_append(native->pending_notify, xml);
if (native->process_notify) {
crm_info("notify trigger set.");
mainloop_set_trigger(native->process_notify);
}
xml = NULL;
} else if (safe_str_neq(msg_type, "reply")) {
/* msg isn't a reply, make some noise */
crm_err("Expected a reply, got %s", msg_type);
free_xml(xml);
xml = NULL;
} else if (reply_id != expected_reply_id) {
- crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
+ if (native->expected_late_replies > 0) {
+ native->expected_late_replies--;
+ } else {
+ crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
+ }
free_xml(xml);
xml = NULL;
}
}
if (native->remote->buffer && native->process_notify) {
mainloop_set_trigger(native->process_notify);
}
return xml;
}
static int
lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg)
{
int rc = 0;
lrmd_private_t *native = lrmd->private;
global_remote_msg_id++;
if (global_remote_msg_id <= 0) {
global_remote_msg_id = 1;
}
rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request");
if (rc <= 0) {
crm_err("Remote lrmd send failed, disconnecting");
lrmd_tls_disconnect(lrmd);
return -ENOTCONN;
}
return pcmk_ok;
}
static int
lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
{
int rc = 0;
int disconnected = 0;
xmlNode *xml = NULL;
if (lrmd_tls_connected(lrmd) == FALSE) {
return -1;
}
rc = lrmd_tls_send(lrmd, msg);
if (rc < 0) {
return rc;
}
xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected);
if (disconnected) {
crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ",
global_remote_msg_id);
lrmd_tls_disconnect(lrmd);
rc = -ENOTCONN;
} else if (!xml) {
crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ",
global_remote_msg_id, timeout);
rc = -ECOMM;
}
if (reply) {
*reply = xml;
} else {
free_xml(xml);
}
return rc;
}
#endif
static int
lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
{
int rc = -1;
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static int
lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg)
{
int rc = -1;
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_none, 0, NULL);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_send(lrmd, msg);
+ if (rc == pcmk_ok) {
+ /* we don't want to wait around for the reply, but
+ * since the request/reply protocol needs to behave the same
+ * as libqb, a reply will eventually come later anyway. */
+ native->expected_late_replies++;
+ }
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static int
lrmd_api_is_connected(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
return crm_ipc_connected(native->ipc);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
return lrmd_tls_connected(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return 0;
}
static int
lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout, /* ms. defaults to 1000 if set to 0 */
enum lrmd_call_options options, gboolean expect_reply)
{ /* TODO we need to reduce usage of this boolean */
int rc = pcmk_ok;
int reply_id = -1;
lrmd_private_t *native = lrmd->private;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
if (!lrmd_api_is_connected(lrmd)) {
return -ENOTCONN;
}
if (op == NULL) {
crm_err("No operation specified");
return -EINVAL;
}
CRM_CHECK(native->token != NULL,;
);
crm_trace("sending %s op to lrmd", op);
op_msg = lrmd_create_op(native->token, op, data, options);
if (op_msg == NULL) {
return -EINVAL;
}
crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout);
if (expect_reply) {
rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply);
} else {
rc = lrmd_send_xml_no_reply(lrmd, op_msg);
goto done;
}
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
}
rc = pcmk_ok;
crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id);
crm_trace("%s op reply received", op);
if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) {
rc = -ENOMSG;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (output_data) {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
done:
if (lrmd_api_is_connected(lrmd) == FALSE) {
crm_err("LRMD disconnected");
}
free_xml(op_msg);
free_xml(op_reply);
return rc;
}
static int
lrmd_api_poke_connection(lrmd_t * lrmd)
{
int rc;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, FALSE);
free_xml(data);
return rc;
}
static int
lrmd_handshake(lrmd_t * lrmd, const char *name)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->private;
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, "lrmd_command");
crm_xml_add(hello, F_TYPE, T_LRMD);
crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(hello, F_LRMD_CLIENTNAME, name);
crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
/* advertise that we are a proxy provider */
if (native->proxy_callback) {
crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true");
}
rc = lrmd_send_xml(lrmd, hello, -1, &reply);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc);
rc = -ECOMM;
} else if (reply == NULL) {
crm_err("Did not receive registration reply");
rc = -EPROTO;
} else {
const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION);
const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID);
crm_element_value_int(reply, F_LRMD_RC, &rc);
if (rc == -EPROTO) {
crm_err("LRMD protocol mismatch client version %s, server version %s",
LRMD_PROTOCOL_VERSION, crm_element_value(reply, F_LRMD_PROTOCOL_VERSION));
crm_log_xml_err(reply, "Protocol Error");
} else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
crm_err("Invalid registration message: %s", msg_type);
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else if (tmp_ticket == NULL) {
crm_err("No registration token provided");
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else {
crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
rc = pcmk_ok;
}
}
free_xml(reply);
free_xml(hello);
if (rc != pcmk_ok) {
lrmd_api_disconnect(lrmd);
}
return rc;
}
static int
lrmd_ipc_connect(lrmd_t * lrmd, int *fd)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->private;
static struct ipc_client_callbacks lrmd_callbacks = {
.dispatch = lrmd_ipc_dispatch,
.destroy = lrmd_ipc_connection_destroy
};
crm_info("Connecting to lrmd");
if (fd) {
/* No mainloop */
native->ipc = crm_ipc_new("lrmd", 0);
if (native->ipc && crm_ipc_connect(native->ipc)) {
*fd = crm_ipc_get_fd(native->ipc);
} else if (native->ipc) {
rc = -ENOTCONN;
}
} else {
native->source = mainloop_add_ipc_client("lrmd", G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
crm_debug("Could not connect to the LRMD API");
rc = -ENOTCONN;
}
return rc;
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static int
set_key(gnutls_datum_t * key, const char *location)
{
FILE *stream;
int read_len = 256;
int cur_len = 0;
int buf_len = read_len;
static char *key_cache = NULL;
static size_t key_cache_len = 0;
static time_t key_cache_updated;
if (location == NULL) {
return -1;
}
if (key_cache) {
time_t now = time(NULL);
if ((now - key_cache_updated) < 60) {
key->data = gnutls_malloc(key_cache_len + 1);
key->size = key_cache_len;
memcpy(key->data, key_cache, key_cache_len);
crm_debug("using cached LRMD key");
return 0;
} else {
key_cache_len = 0;
key_cache_updated = 0;
free(key_cache);
key_cache = NULL;
crm_debug("clearing lrmd key cache");
}
}
stream = fopen(location, "r");
if (!stream) {
return -1;
}
key->data = gnutls_malloc(read_len);
while (!feof(stream)) {
int next;
if (cur_len == buf_len) {
buf_len = cur_len + read_len;
key->data = gnutls_realloc(key->data, buf_len);
}
next = fgetc(stream);
if (next == EOF && feof(stream)) {
break;
}
key->data[cur_len] = next;
cur_len++;
}
fclose(stream);
key->size = cur_len;
if (!cur_len) {
gnutls_free(key->data);
key->data = 0;
return -1;
}
if (!key_cache) {
key_cache = calloc(1, key->size + 1);
memcpy(key_cache, key->data, key->size);
key_cache_len = key->size;
key_cache_updated = time(NULL);
}
return 0;
}
int
lrmd_tls_set_key(gnutls_datum_t * key)
{
int rc = 0;
const char *specific_location = getenv("PCMK_authkey_location");
if (set_key(key, specific_location) == 0) {
crm_debug("Using custom authkey location %s", specific_location);
return 0;
}
if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) {
rc = set_key(key, ALT_REMOTE_KEY_LOCATION);
}
if (rc) {
crm_err("No lrmd remote key found");
return -1;
}
return rc;
}
static void
lrmd_gnutls_global_init(void)
{
static int gnutls_init = 0;
if (!gnutls_init) {
gnutls_global_init();
}
gnutls_init = 1;
}
#endif
static void
report_async_connection_result(lrmd_t * lrmd, int rc)
{
lrmd_private_t *native = lrmd->private;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.type = lrmd_event_connect;
event.remote_nodename = native->remote_nodename;
event.connection_rc = rc;
native->callback(&event);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tcp_connect_cb(void *userdata, int sock)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
char name[256] = { 0, };
static struct mainloop_fd_callbacks lrmd_tls_callbacks = {
.dispatch = lrmd_tls_dispatch,
.destroy = lrmd_tls_connection_destroy,
};
int rc = sock;
gnutls_datum_t psk_key = { NULL, 0 };
if (rc < 0) {
lrmd_tls_connection_destroy(lrmd);
crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port);
report_async_connection_result(lrmd, rc);
return;
}
/* TODO continue with tls stuff now that tcp connect passed. make this async as well soon
* to avoid all blocking code in the client. */
native->sock = sock;
if (lrmd_tls_set_key(&psk_key) != 0) {
lrmd_tls_connection_destroy(lrmd);
return;
}
gnutls_psk_allocate_client_credentials(&native->psk_cred_c);
gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW);
gnutls_free(psk_key.data);
native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c);
if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) {
crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server,
native->port);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = NULL;
lrmd_tls_connection_destroy(lrmd);
report_async_connection_result(lrmd, -1);
return;
}
crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server,
native->port);
snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port);
native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd);
native->source =
mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks);
rc = lrmd_handshake(lrmd, name);
report_async_connection_result(lrmd, rc);
return;
}
static int
lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ )
{
int rc = 0;
lrmd_private_t *native = lrmd->private;
lrmd_gnutls_global_init();
rc = crm_remote_tcp_connect_async(native->server, native->port, timeout, lrmd,
lrmd_tcp_connect_cb);
return rc;
}
static int
lrmd_tls_connect(lrmd_t * lrmd, int *fd)
{
static struct mainloop_fd_callbacks lrmd_tls_callbacks = {
.dispatch = lrmd_tls_dispatch,
.destroy = lrmd_tls_connection_destroy,
};
lrmd_private_t *native = lrmd->private;
int sock;
gnutls_datum_t psk_key = { NULL, 0 };
lrmd_gnutls_global_init();
sock = crm_remote_tcp_connect(native->server, native->port);
if (sock < 0) {
crm_warn("Could not establish remote lrmd connection to %s", native->server);
lrmd_tls_connection_destroy(lrmd);
return -ENOTCONN;
}
native->sock = sock;
if (lrmd_tls_set_key(&psk_key) != 0) {
lrmd_tls_connection_destroy(lrmd);
return -1;
}
gnutls_psk_allocate_client_credentials(&native->psk_cred_c);
gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW);
gnutls_free(psk_key.data);
native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c);
if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) {
crm_err("Session creation for %s:%d failed", native->server, native->port);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = NULL;
lrmd_tls_connection_destroy(lrmd);
return -1;
}
crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server,
native->port);
if (fd) {
*fd = sock;
} else {
char name[256] = { 0, };
snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port);
native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd);
native->source =
mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks);
}
return pcmk_ok;
}
#endif
static int
lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd)
{
int rc = -ENOTCONN;
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = lrmd_ipc_connect(lrmd, fd);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_connect(lrmd, fd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
if (rc == pcmk_ok) {
rc = lrmd_handshake(lrmd, name);
}
return rc;
}
static int
lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout)
{
int rc = 0;
lrmd_private_t *native = lrmd->private;
if (!native->callback) {
crm_err("Async connect not possible, no lrmd client callback set.");
return -1;
}
switch (native->type) {
case CRM_CLIENT_IPC:
/* fake async connection with ipc. it should be fast
* enough that we gain very little from async */
rc = lrmd_api_connect(lrmd, name, NULL);
if (!rc) {
report_async_connection_result(lrmd, rc);
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_connect_async(lrmd, timeout);
if (rc) {
/* connection failed, report rc now */
report_async_connection_result(lrmd, rc);
}
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static void
lrmd_ipc_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tls_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
if (native->remote->tls_session) {
gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = 0;
}
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
} else if (native->sock) {
close(native->sock);
}
if (native->pending_notify) {
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
}
#endif
static int
lrmd_api_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
crm_info("Disconnecting from lrmd service");
switch (native->type) {
case CRM_CLIENT_IPC:
lrmd_ipc_disconnect(lrmd);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
lrmd_tls_disconnect(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
free(native->token);
native->token = NULL;
return 0;
}
static int
lrmd_api_register_rsc(lrmd_t * lrmd,
const char *rsc_id,
const char *class,
const char *provider, const char *type, enum lrmd_call_options options)
{
int rc = pcmk_ok;
xmlNode *data = NULL;
if (!class || !type || !rsc_id) {
return -EINVAL;
}
if (safe_str_eq(class, "ocf") && !provider) {
return -EINVAL;
}
data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add(data, F_LRMD_CLASS, class);
crm_xml_add(data, F_LRMD_PROVIDER, provider);
crm_xml_add(data, F_LRMD_TYPE, type);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE);
free_xml(data);
return rc;
}
static int
lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE);
free_xml(data);
return rc;
}
lrmd_rsc_info_t *
lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info)
{
lrmd_rsc_info_t *copy = NULL;
copy = calloc(1, sizeof(lrmd_rsc_info_t));
copy->id = strdup(rsc_info->id);
copy->type = strdup(rsc_info->type);
copy->class = strdup(rsc_info->class);
if (rsc_info->provider) {
copy->provider = strdup(rsc_info->provider);
}
return copy;
}
void
lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info)
{
if (!rsc_info) {
return;
}
free(rsc_info->id);
free(rsc_info->type);
free(rsc_info->class);
free(rsc_info->provider);
free(rsc_info);
}
static lrmd_rsc_info_t *
lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
{
lrmd_rsc_info_t *rsc_info = NULL;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
xmlNode *output = NULL;
const char *class = NULL;
const char *provider = NULL;
const char *type = NULL;
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE);
free_xml(data);
if (!output) {
return NULL;
}
class = crm_element_value(output, F_LRMD_CLASS);
provider = crm_element_value(output, F_LRMD_PROVIDER);
type = crm_element_value(output, F_LRMD_TYPE);
if (!class || !type) {
free_xml(output);
return NULL;
} else if (safe_str_eq(class, "ocf") && !provider) {
free_xml(output);
return NULL;
}
rsc_info = calloc(1, sizeof(lrmd_rsc_info_t));
rsc_info->id = strdup(rsc_id);
rsc_info->class = strdup(class);
if (provider) {
rsc_info->provider = strdup(provider);
}
rsc_info->type = strdup(type);
free_xml(output);
return rsc_info;
}
static void
lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback)
{
lrmd_private_t *native = lrmd->private;
native->callback = callback;
}
void
lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg))
{
lrmd_private_t *native = lrmd->private;
native->proxy_callback = callback;
native->proxy_callback_userdata = userdata;
}
void
lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg)
{
lrmd_private_t *native = lrmd->private;
if (native->proxy_callback) {
crm_log_xml_trace(msg, "PROXY_INBOUND");
native->proxy_callback(lrmd, native->proxy_callback_userdata, msg);
}
}
int
lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg)
{
if (lrmd == NULL) {
return -ENOTCONN;
}
crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD);
crm_log_xml_trace(msg, "PROXY_OUTBOUND");
return lrmd_send_xml_no_reply(lrmd, msg);
}
static int
stonith_get_metadata(const char *provider, const char *type, char **output)
{
int rc = pcmk_ok;
stonith_t *stonith_api = stonith_api_new();
if(stonith_api) {
stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0);
stonith_api->cmds->free(stonith_api);
}
if (*output == NULL) {
rc = -EIO;
}
return rc;
}
#define lsb_metadata_template \
"<?xml version='1.0'?>\n" \
"<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n" \
"<resource-agent name='%s' version='0.1'>\n" \
" <version>1.0</version>\n" \
" <longdesc lang='en'>\n" \
" %s\n" \
" </longdesc>\n" \
" <shortdesc lang='en'>%s</shortdesc>\n" \
" <parameters>\n" \
" </parameters>\n" \
" <actions>\n" \
" <action name='meta-data' timeout='5' />\n" \
" <action name='start' timeout='15' />\n" \
" <action name='stop' timeout='15' />\n" \
" <action name='status' timeout='15' />\n" \
" <action name='restart' timeout='15' />\n" \
" <action name='force-reload' timeout='15' />\n" \
" <action name='monitor' timeout='15' interval='15' />\n" \
" </actions>\n" \
" <special tag='LSB'>\n" \
" <Provides>%s</Provides>\n" \
" <Required-Start>%s</Required-Start>\n" \
" <Required-Stop>%s</Required-Stop>\n" \
" <Should-Start>%s</Should-Start>\n" \
" <Should-Stop>%s</Should-Stop>\n" \
" <Default-Start>%s</Default-Start>\n" \
" <Default-Stop>%s</Default-Stop>\n" \
" </special>\n" \
"</resource-agent>\n"
#define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO"
#define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO"
#define PROVIDES "# Provides:"
#define REQ_START "# Required-Start:"
#define REQ_STOP "# Required-Stop:"
#define SHLD_START "# Should-Start:"
#define SHLD_STOP "# Should-Stop:"
#define DFLT_START "# Default-Start:"
#define DFLT_STOP "# Default-Stop:"
#define SHORT_DSCR "# Short-Description:"
#define DESCRIPTION "# Description:"
#define lsb_meta_helper_free_value(m) \
if ((m) != NULL) { \
xmlFree(m); \
(m) = NULL; \
}
#define lsb_meta_helper_get_value(buffer, ptr, keyword) \
if (!ptr && !strncasecmp(buffer, keyword, strlen(keyword))) { \
(ptr) = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer+strlen(keyword)); \
continue; \
}
static int
lsb_get_metadata(const char *type, char **output)
{
char ra_pathname[PATH_MAX] = { 0, };
FILE *fp;
GString *meta_data = NULL;
char buffer[1024];
char *provides = NULL;
char *req_start = NULL;
char *req_stop = NULL;
char *shld_start = NULL;
char *shld_stop = NULL;
char *dflt_start = NULL;
char *dflt_stop = NULL;
char *s_dscrpt = NULL;
char *xml_l_dscrpt = NULL;
GString *l_dscrpt = NULL;
if(type[0] == '/') {
snprintf(ra_pathname, sizeof(ra_pathname), "%s", type);
} else {
snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type);
}
crm_trace("Looking into %s", ra_pathname);
if (!(fp = fopen(ra_pathname, "r"))) {
return -errno;
}
/* Enter into the lsb-compliant comment block */
while (fgets(buffer, sizeof(buffer), fp)) {
/* Now suppose each of the following eight arguments contain only one line */
lsb_meta_helper_get_value(buffer, provides, PROVIDES)
lsb_meta_helper_get_value(buffer, req_start, REQ_START)
lsb_meta_helper_get_value(buffer, req_stop, REQ_STOP)
lsb_meta_helper_get_value(buffer, shld_start, SHLD_START)
lsb_meta_helper_get_value(buffer, shld_stop, SHLD_STOP)
lsb_meta_helper_get_value(buffer, dflt_start, DFLT_START)
lsb_meta_helper_get_value(buffer, dflt_stop, DFLT_STOP)
lsb_meta_helper_get_value(buffer, s_dscrpt, SHORT_DSCR)
/* Long description may cross multiple lines */
if ((l_dscrpt == NULL) && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) {
l_dscrpt = g_string_new(buffer + strlen(DESCRIPTION));
/* Between # and keyword, more than one space, or a tab character,
* indicates the continuation line. Extracted from LSB init script standard */
while (fgets(buffer, sizeof(buffer), fp)) {
if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) {
buffer[0] = ' ';
l_dscrpt = g_string_append(l_dscrpt, buffer);
} else {
fputs(buffer, fp);
break; /* Long description ends */
}
}
continue;
}
if (l_dscrpt) {
xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(l_dscrpt->str));
}
if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) {
/* Get to the out border of LSB comment block */
break;
}
if (buffer[0] != '#') {
break; /* Out of comment block in the beginning */
}
}
fclose(fp);
meta_data = g_string_new("");
g_string_sprintf(meta_data, lsb_metadata_template, type,
(xml_l_dscrpt == NULL) ? type : xml_l_dscrpt,
(s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides,
(req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop,
(shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop,
(dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop);
lsb_meta_helper_free_value(xml_l_dscrpt);
lsb_meta_helper_free_value(s_dscrpt);
lsb_meta_helper_free_value(provides);
lsb_meta_helper_free_value(req_start);
lsb_meta_helper_free_value(req_stop);
lsb_meta_helper_free_value(shld_start);
lsb_meta_helper_free_value(shld_stop);
lsb_meta_helper_free_value(dflt_start);
lsb_meta_helper_free_value(dflt_stop);
if (l_dscrpt) {
g_string_free(l_dscrpt, TRUE);
}
*output = strdup(meta_data->str);
g_string_free(meta_data, TRUE);
crm_trace("Created fake metadata: %d", strlen(*output));
return pcmk_ok;
}
#if SUPPORT_NAGIOS
static int
nagios_get_metadata(const char *type, char **output)
{
int rc = pcmk_ok;
FILE *file_strm = NULL;
int start = 0, length = 0, read_len = 0;
char *metadata_file = NULL;
int len = 36;
len += strlen(NAGIOS_METADATA_DIR);
len += strlen(type);
metadata_file = calloc(1, len);
CRM_CHECK(metadata_file != NULL, return -ENOMEM);
sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type);
file_strm = fopen(metadata_file, "r");
if (file_strm == NULL) {
crm_err("Metadata file %s does not exist", metadata_file);
free(metadata_file);
return -EIO;
}
/* see how big the file is */
start = ftell(file_strm);
fseek(file_strm, 0L, SEEK_END);
length = ftell(file_strm);
fseek(file_strm, 0L, start);
CRM_ASSERT(length >= 0);
CRM_ASSERT(start == ftell(file_strm));
if (length <= 0) {
crm_info("%s was not valid", metadata_file);
free(*output);
*output = NULL;
rc = -EIO;
} else {
crm_trace("Reading %d bytes from file", length);
*output = calloc(1, (length + 1));
read_len = fread(*output, 1, length, file_strm);
if (read_len != length) {
crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len);
free(*output);
*output = NULL;
rc = -EIO;
}
}
fclose(file_strm);
free(metadata_file);
return rc;
}
#endif
static int
generic_get_metadata(const char *standard, const char *provider, const char *type, char **output)
{
svc_action_t *action = resources_action_create(type,
standard,
provider,
type,
"meta-data",
0,
5000,
NULL);
if (!(services_action_sync(action))) {
crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type);
services_action_free(action);
return -EIO;
}
if (!action->stdout_data) {
crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type);
services_action_free(action);
return -EIO;
}
*output = strdup(action->stdout_data);
services_action_free(action);
return pcmk_ok;
}
static int
lrmd_api_get_metadata(lrmd_t * lrmd,
const char *class,
const char *provider,
const char *type, char **output, enum lrmd_call_options options)
{
if (!class || !type) {
return -EINVAL;
}
if (safe_str_eq(class, "stonith")) {
return stonith_get_metadata(provider, type, output);
} else if (safe_str_eq(class, "lsb")) {
return lsb_get_metadata(type, output);
#if SUPPORT_NAGIOS
} else if (safe_str_eq(class, "nagios")) {
return nagios_get_metadata(type, output);
#endif
}
return generic_get_metadata(class, provider, type, output);
}
static int
lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
int timeout, /* ms */
int start_delay, /* ms */
enum lrmd_call_options options, lrmd_key_value_t * params)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
lrmd_key_value_t *tmp = NULL;
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add(data, F_LRMD_RSC_ACTION, action);
crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata);
crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval);
crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout);
crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay);
for (tmp = params; tmp; tmp = tmp->next) {
hash2field((gpointer) tmp->key, (gpointer) tmp->value, args);
}
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE);
free_xml(data);
lrmd_key_value_freeall(params);
return rc;
}
static int
lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ACTION, action);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE);
free_xml(data);
return rc;
}
static int
list_stonith_agents(lrmd_list_t ** resources)
{
int rc = 0;
stonith_t *stonith_api = stonith_api_new();
stonith_key_value_t *stonith_resources = NULL;
stonith_key_value_t *dIter = NULL;
if(stonith_api) {
stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0);
stonith_api->cmds->free(stonith_api);
}
for (dIter = stonith_resources; dIter; dIter = dIter->next) {
rc++;
if (resources) {
*resources = lrmd_list_add(*resources, dIter->value);
}
}
stonith_key_value_freeall(stonith_resources, 1, 0);
return rc;
}
static int
lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class,
const char *provider)
{
int rc = 0;
if (safe_str_eq(class, "stonith")) {
rc += list_stonith_agents(resources);
} else {
GListPtr gIter = NULL;
GList *agents = resources_list_agents(class, provider);
for (gIter = agents; gIter != NULL; gIter = gIter->next) {
*resources = lrmd_list_add(*resources, (const char *)gIter->data);
rc++;
}
g_list_free_full(agents, free);
if (!class) {
rc += list_stonith_agents(resources);
}
}
if (rc == 0) {
crm_notice("No agents found for class %s", class);
rc = -EPROTONOSUPPORT;
}
return rc;
}
static int
does_provider_have_agent(const char *agent, const char *provider, const char *class)
{
int found = 0;
GList *agents = NULL;
GListPtr gIter2 = NULL;
agents = resources_list_agents(class, provider);
for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) {
if (safe_str_eq(agent, gIter2->data)) {
found = 1;
}
}
g_list_free_full(agents, free);
return found;
}
static int
lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers)
{
int rc = pcmk_ok;
char *provider = NULL;
GList *ocf_providers = NULL;
GListPtr gIter = NULL;
ocf_providers = resources_list_providers("ocf");
for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) {
provider = gIter->data;
if (!agent || does_provider_have_agent(agent, provider, "ocf")) {
*providers = lrmd_list_add(*providers, (const char *)gIter->data);
rc++;
}
}
g_list_free_full(ocf_providers, free);
return rc;
}
static int
lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported)
{
int rc = 0;
GList *standards = NULL;
GListPtr gIter = NULL;
standards = resources_list_standards();
for (gIter = standards; gIter != NULL; gIter = gIter->next) {
*supported = lrmd_list_add(*supported, (const char *)gIter->data);
rc++;
}
if (list_stonith_agents(NULL) > 0) {
*supported = lrmd_list_add(*supported, "stonith");
rc++;
}
g_list_free_full(standards, free);
return rc;
}
lrmd_t *
lrmd_api_new(void)
{
lrmd_t *new_lrmd = NULL;
lrmd_private_t *pvt = NULL;
new_lrmd = calloc(1, sizeof(lrmd_t));
pvt = calloc(1, sizeof(lrmd_private_t));
pvt->remote = calloc(1, sizeof(crm_remote_t));
new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t));
pvt->type = CRM_CLIENT_IPC;
new_lrmd->private = pvt;
new_lrmd->cmds->connect = lrmd_api_connect;
new_lrmd->cmds->connect_async = lrmd_api_connect_async;
new_lrmd->cmds->is_connected = lrmd_api_is_connected;
new_lrmd->cmds->poke_connection = lrmd_api_poke_connection;
new_lrmd->cmds->disconnect = lrmd_api_disconnect;
new_lrmd->cmds->register_rsc = lrmd_api_register_rsc;
new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc;
new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info;
new_lrmd->cmds->set_callback = lrmd_api_set_callback;
new_lrmd->cmds->get_metadata = lrmd_api_get_metadata;
new_lrmd->cmds->exec = lrmd_api_exec;
new_lrmd->cmds->cancel = lrmd_api_cancel;
new_lrmd->cmds->list_agents = lrmd_api_list_agents;
new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers;
new_lrmd->cmds->list_standards = lrmd_api_list_standards;
return new_lrmd;
}
lrmd_t *
lrmd_remote_api_new(const char *nodename, const char *server, int port)
{
#ifdef HAVE_GNUTLS_GNUTLS_H
lrmd_t *new_lrmd = lrmd_api_new();
lrmd_private_t *native = new_lrmd->private;
if (!nodename && !server) {
lrmd_api_delete(new_lrmd);
return NULL;
}
native->type = CRM_CLIENT_TLS;
native->remote_nodename = nodename ? strdup(nodename) : strdup(server);
native->server = server ? strdup(server) : strdup(nodename);
native->port = port;
if (native->port == 0) {
const char *remote_port_str = getenv("PCMK_remote_port");
native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT;
}
return new_lrmd;
#else
crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created");
return NULL;
#endif
}
void
lrmd_api_delete(lrmd_t * lrmd)
{
if (!lrmd) {
return;
}
lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */
free(lrmd->cmds);
if (lrmd->private) {
lrmd_private_t *native = lrmd->private;
#ifdef HAVE_GNUTLS_GNUTLS_H
free(native->server);
#endif
free(native->remote_nodename);
free(native->remote);
}
free(lrmd->private);
free(lrmd);
}
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index 14079daaa6..2802caa3e2 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -1,1321 +1,1322 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/msg_xml.h>
#include <lrmd_private.h>
#ifdef HAVE_SYS_TIMEB_H
# include <sys/timeb.h>
#endif
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
int interval;
int start_delay;
int timeout_orig;
int call_id;
int exec_rc;
int lrmd_op_status;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *output;
char *userdata_str;
#ifdef HAVE_SYS_TIMEB_H
/* Timestamp of when op first ran */
struct timeb t_first_run;
/* Timestamp of when op ran */
struct timeb t_run;
/* Timestamp of when op was queued */
struct timeb t_queue;
/* Timestamp of last rc change */
struct timeb t_rcchange;
#endif
int first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
static void
log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
{
char pid_str[32] = { 0, };
int log_level = LOG_INFO;
if (cmd->last_pid) {
snprintf(pid_str, 32, "%d", cmd->last_pid);
}
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
#ifdef HAVE_SYS_TIMEB_H
do_crm_log(log_level,
"finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms",
cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str,
cmd->exec_rc, exec_time, queue_time);
#else
do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d",
cmd->rsc_id,
cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc);
#endif
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode * msg, crm_client_t * client)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
return cmd;
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
send_reply(crm_client_t * client, int rc, uint32_t id, int call_id)
{
int send_rc = 0;
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
send_rc = lrmd_server_send_reply(client, id, reply);
free_xml(reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
if (client == NULL) {
crm_err("Asked to send event to NULL client");
return;
} else if (client->name == NULL) {
crm_trace("Asked to send event to client with no name");
return;
}
if (lrmd_server_send_notify(client, update_msg) <= 0) {
crm_warn("Notification of client %s/%s failed", client->name, client->id);
}
}
#ifdef HAVE_SYS_TIMEB_H
static int
time_diff_ms(struct timeb *now, struct timeb *old)
{
int sec = difftime(now->time, old->time);
int ms = now->millitm - old->millitm;
if (old->time == 0) {
return 0;
}
return (sec * 1000) + ms;
}
#endif
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
int exec_time = 0;
int queue_time = 0;
xmlNode *notify = NULL;
#ifdef HAVE_SYS_TIMEB_H
struct timeb now = { 0, };
ftime(&now);
exec_time = time_diff_ms(&now, &cmd->t_run);
queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue);
#endif
log_finished(cmd, exec_time, queue_time);
/* if the first notify result for a cmd has already been sent earlier, and the
* the option to only send notifies on result changes is set. Check to see
* if the last result is the same as the new one. If so, suppress this update */
if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
if (cmd->last_notify_rc == cmd->exec_rc &&
cmd->last_notify_op_status == cmd->lrmd_op_status) {
/* only send changes */
return;
}
}
cmd->first_notify_sent = 1;
cmd->last_notify_rc = cmd->exec_rc;
cmd->last_notify_op_status = cmd->lrmd_op_status;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
#ifdef HAVE_SYS_TIMEB_H
crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time);
crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time);
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2field((gpointer) key, (gpointer) value, args);
}
}
if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
crm_client_t *client = crm_client_get_by_id(cmd->client_id);
if (client) {
send_client_notify(client->id, client, notify);
}
} else {
g_hash_table_foreach(client_connections, send_client_notify, notify);
}
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
g_hash_table_foreach(client_connections, send_client_notify, notify);
free_xml(notify);
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
send_cmd_complete_notify(cmd);
/* crmd expects lrmd to automatically cancel recurring ops after rsc stops */
if (rsc && safe_str_eq(cmd->action, "stop")) {
cancel_all_recurring(rsc, NULL);
}
if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
free(cmd->output);
cmd->output = NULL;
}
}
static int
lsb2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
/* status has different return codes that everything else. */
if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) {
if (rc > PCMK_LSB_NOT_RUNNING) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
switch (rc) {
case PCMK_LSB_STATUS_OK:
return PCMK_OCF_OK;
case PCMK_LSB_STATUS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case PCMK_LSB_STATUS_VAR_PID:
case PCMK_LSB_STATUS_VAR_LOCK:
case PCMK_LSB_STATUS_NOT_RUNNING:
return PCMK_OCF_NOT_RUNNING;
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
if (rc == -ENODEV) {
if (safe_str_eq(action, "stop")) {
rc = PCMK_OCF_OK;
} else if (safe_str_eq(action, "start")) {
rc = PCMK_OCF_NOT_INSTALLED;
} else {
rc = PCMK_OCF_NOT_RUNNING;
}
} else if (rc != 0) {
rc = PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
#if SUPPORT_NAGIOS
static int
nagios2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
switch (rc) {
case NAGIOS_STATE_OK:
return PCMK_OCF_OK;
case NAGIOS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case NAGIOS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case NAGIOS_STATE_WARNING:
case NAGIOS_STATE_CRITICAL:
case NAGIOS_STATE_UNKNOWN:
case NAGIOS_STATE_DEPENDENT:
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
#endif
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, "ocf")) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, "stonith")) {
return stonith2uniform_rc(action, rc);
} else if (safe_str_eq(standard, "systemd")) {
return rc;
} else if (safe_str_eq(standard, "upstart")) {
return rc;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(standard, "nagios")) {
return nagios2uniform_rc(action, rc);
#endif
} else {
return lsb2uniform_rc(action, rc);
}
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (rsc->call_opts & lrmd_opt_drop_recurring) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
if (!cmd) {
crm_err("LRMD action (%s) completed does not match any known operations.", action->id);
return;
}
#ifdef HAVE_SYS_TIMEB_H
if (cmd->exec_rc != action->rc) {
ftime(&cmd->t_rcchange);
}
#endif
cmd->last_pid = action->pid;
cmd->exec_rc = get_uniform_rc(action->standard, cmd->action, action->rc);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (action->stdout_data) {
cmd->output = strdup(action->stdout_data);
}
#if SUPPORT_NAGIOS
if (rsc && safe_str_eq(rsc->class, "nagios")) {
if (safe_str_eq(cmd->action, "monitor") &&
cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) {
/* Successfully executed --version for the nagios plugin */
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) {
int time_sum = 0;
int timeout_left = 0;
int delay = cmd->timeout_orig / 10;
# ifdef HAVE_SYS_TIMEB_H
struct timeb now = { 0, };
ftime(&now);
time_sum = time_diff_ms(&now, &cmd->t_first_run);
timeout_left = cmd->timeout_orig - time_sum;
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
crm_notice
("%s %s failed (rc=%d): re-scheduling (time_sum=%dms, start_delay=%dms, timeout=%dms)",
cmd->rsc_id, cmd->action, cmd->exec_rc, time_sum, cmd->start_delay,
cmd->timeout);
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
free(cmd->output);
cmd->output = NULL;
rsc->active = NULL;
schedule_lrmd_cmd(rsc, cmd);
return;
}
# endif
}
}
#endif
cmd_finalize(cmd, rsc);
}
static void
stonith_action_complete(lrmd_cmd_t * cmd, int rc)
{
int recurring = cmd->interval;
lrmd_rsc_t *rsc = NULL;
cmd->exec_rc = get_uniform_rc("stonith", cmd->action, rc);
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) {
recurring = 0;
/* do nothing */
} else if (rc) {
/* Attempt to map return codes to op status if possible */
switch (rc) {
case -EPROTONOSUPPORT:
cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED;
break;
case -ETIME:
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
break;
default:
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
} else {
/* command successful */
cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
if (safe_str_eq(cmd->action, "start") && rsc) {
rsc->stonith_started = 1;
}
}
if (recurring && rsc) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
stonith_action_complete(data->userdata, data->rc);
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (safe_str_eq(rsc->class, "stonith")) {
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
}
rsc->pending_ops = rsc->recurring_ops = NULL;
}
}
if (!cmd_list) {
return;
}
crm_err("STONITH connection failed, finalizing %d pending operations.",
g_list_length(cmd_list));
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
stonith_action_complete(cmd_iter->data, -ENOTCONN);
}
g_list_free(cmd_list);
}
static int
lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
int rc = 0;
int do_monitor = 0;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
cmd->exec_rc = get_uniform_rc("stonith", cmd->action, -ENOTCONN);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
cmd_finalize(cmd, rsc);
return -EUNATCH;
}
if (safe_str_eq(cmd->action, "start")) {
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
/* Stonith automatically registers devices from the IPC when changes occur,
* but to avoid a possible race condition between stonith receiving the IPC update
* and the lrmd requesting that resource, the lrmd still registers the device as well.
* Stonith knows how to handle duplicate device registrations correctly. */
rc = stonith_api->cmds->register_device(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
rsc->provider, rsc->type, device_params);
stonith_key_value_freeall(device_params, 1, 1);
if (rc == 0) {
do_monitor = 1;
}
} else if (safe_str_eq(cmd->action, "stop")) {
rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id);
rsc->stonith_started = 0;
} else if (safe_str_eq(cmd->action, "monitor")) {
if (cmd->interval) {
do_monitor = 1;
} else {
rc = rsc->stonith_started ? 0 : -ENODEV;
}
}
if (!do_monitor) {
goto cleanup_stonith_exec;
}
rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000);
rc = stonith_api->cmds->register_callback(stonith_api,
rc,
0,
0,
cmd, "lrmd_stonith_callback", lrmd_stonith_callback);
/* don't cleanup yet, we will find out the result of the monitor later */
if (rc > 0) {
rsc->active = cmd;
return rc;
} else if (rc == 0) {
rc = -1;
}
cleanup_stonith_exec:
stonith_action_complete(cmd, rc);
return rc;
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
(safe_str_eq(rsc->class, "lsb") ||
safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) {
return "status";
}
return action;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, strdup(key), strdup(value));
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
CRM_ASSERT(rsc);
CRM_ASSERT(cmd);
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
#if SUPPORT_NAGIOS
/* Recurring operations are cancelled anyway for a stop operation */
if (safe_str_eq(rsc->class, "nagios") && safe_str_eq(cmd->action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
goto exec_done;
}
#endif
if (cmd->params) {
params_copy = g_hash_table_new_full(crm_str_hash,
g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
if (params_copy != NULL) {
g_hash_table_foreach(cmd->params, dup_attr, params_copy);
}
}
action = resources_action_create(rsc->rsc_id,
rsc->class,
rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval, cmd->timeout, params_copy);
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
/* 'cmd' may not be valid after this point if
* services_action_async() returned TRUE
*
* Upstart and systemd both synchronously determine monitor/status
* results and call action_complete (which may free 'cmd') if necessary.
*/
if (services_action_async(action, action_complete)) {
return TRUE;
}
cmd->exec_rc = action->rc;
if(action->status != PCMK_LRM_OP_DONE) {
cmd->lrmd_op_status = action->status;
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
services_action_free(action);
action = NULL;
exec_done:
cmd_finalize(cmd, rsc);
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t * rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef HAVE_SYS_TIMEB_H
if (cmd->t_first_run.time == 0) {
ftime(&cmd->t_first_run);
}
ftime(&cmd->t_run);
#endif
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (safe_str_eq(rsc->class, "stonith")) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, "stonith");
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id, cmd->action, cmd->interval);
}
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
if (safe_str_neq(protocol_version, LRMD_PROTOCOL_VERSION)) {
crm_xml_add_int(reply, F_LRMD_RC, -EPROTO);
crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
}
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
lrmd_server_send_reply(client, id, reply);
if (crm_is_true(is_ipc_provider)) {
/* this is a remote connection from a cluster nodes crmd */
#ifdef SUPPORT_REMOTE
ipc_proxy_add_provider(client);
#endif
}
free_xml(reply);
return pcmk_ok;
}
static int
process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) {
crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Added '%s' to the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
return rc;
}
static void
process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int send_rc = 0;
int call_id = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (!rsc_id) {
rc = -ENODEV;
goto get_rsc_done;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
goto get_rsc_done;
}
get_rsc_done:
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
send_rc = lrmd_server_send_reply(client, id, reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
free_xml(reply);
}
static int
process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation still in progress: %p", rsc->active);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client);
schedule_lrmd_cmd(rsc, cmd);
return cmd->call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, int interval)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then its either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (safe_str_eq(rsc->class, "stonith")) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith opereations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval == 0) {
continue;
}
if (client_id && safe_str_neq(cmd->client_id, client_id)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
int interval = 0;
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval);
}
void
process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) {
#ifdef SUPPORT_REMOTE
ipc_proxy_forward_client(client, request);
#endif
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
rc = process_lrmd_signon(client, id, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
process_lrmd_get_rsc_info(client, id, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
do_notify = 1;
+ do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown %s from %s", op, client->name);
crm_log_xml_warn(request, "UnknownOp");
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d, exit=%d",
op, client->id, rc, do_reply, do_notify, exit);
if (do_reply) {
send_reply(client, rc, id, call_id);
}
if (do_notify) {
send_generic_notify(rc, request);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:25 PM (12 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1980863
Default Alt Text
(166 KB)

Event Timeline