Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624472
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
166 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/crmd/pengine.c b/crmd/pengine.c
index 5546d7e3f2..2f3eba85b3 100644
--- a/crmd/pengine.c
+++ b/crmd/pengine.c
@@ -1,295 +1,298 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crmd_fsa.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h> /* for access */
#include <sys/types.h> /* for calls to open */
#include <sys/stat.h> /* for calls to open */
#include <fcntl.h> /* for calls to open */
#include <pwd.h> /* for getpwuid */
#include <grp.h> /* for initgroups */
#include <sys/time.h> /* for getrlimit */
#include <sys/resource.h> /* for getrlimit */
#include <errno.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crm/cib.h>
#include <crmd.h>
struct crm_subsystem_s *pe_subsystem = NULL;
void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
static void
save_cib_contents(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
char *id = user_data;
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
CRM_CHECK(id != NULL, return);
if (rc == pcmk_ok) {
int len = 15;
char *filename = NULL;
len += strlen(id);
len += strlen(PE_STATE_DIR);
filename = calloc(1, len);
CRM_CHECK(filename != NULL, return);
sprintf(filename, PE_STATE_DIR "/pe-core-%s.bz2", id);
if (write_xml_file(output, filename, TRUE) < 0) {
crm_err("Could not save CIB contents after PE crash to %s", filename);
} else {
crm_notice("Saved CIB contents after PE crash to %s", filename);
}
free(filename);
}
free(id);
}
static void
pe_ipc_destroy(gpointer user_data)
{
clear_bit(fsa_input_register, pe_subsystem->flag_connected);
if (is_set(fsa_input_register, pe_subsystem->flag_required)) {
int rc = pcmk_ok;
char *uuid_str = crm_generate_uuid();
crm_crit("Connection to the Policy Engine failed (pid=%d, uuid=%s)",
pe_subsystem->pid, uuid_str);
/*
*The PE died...
*
* Save the current CIB so that we have a chance of
* figuring out what killed it.
*
* Delay raising the I_ERROR until the query below completes or
* 5s is up, whichever comes first.
*
*/
rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);
} else {
crm_info("Connection to the Policy Engine released");
}
pe_subsystem->pid = -1;
pe_subsystem->source = NULL;
pe_subsystem->client = NULL;
mainloop_set_trigger(fsa_source);
return;
}
static int
pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
{
xmlNode *msg = string2xml(buffer);
if (msg) {
route_message(C_IPC_MESSAGE, msg);
}
free_xml(msg);
return 0;
}
/* A_PE_START, A_PE_STOP, A_TE_RESTART */
void
do_pe_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
struct crm_subsystem_s *this_subsys = pe_subsystem;
long long stop_actions = A_PE_STOP;
long long start_actions = A_PE_START;
static struct ipc_client_callbacks pe_callbacks = {
.dispatch = pe_ipc_dispatch,
.destroy = pe_ipc_destroy
};
if (action & stop_actions) {
clear_bit(fsa_input_register, pe_subsystem->flag_required);
mainloop_del_ipc_client(pe_subsystem->source);
pe_subsystem->source = NULL;
clear_bit(fsa_input_register, pe_subsystem->flag_connected);
}
if ((action & start_actions) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) {
if (cur_state != S_STOPPING) {
set_bit(fsa_input_register, pe_subsystem->flag_required);
pe_subsystem->source =
mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, G_PRIORITY_DEFAULT,
5 * 1024 * 1024 /* 5Mb */ , NULL, &pe_callbacks);
if (pe_subsystem->source == NULL) {
crm_warn("Setup of client connection failed, not adding channel to mainloop");
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
return;
}
/* if (is_openais_cluster()) { */
/* pe_subsystem->pid = pe_subsystem->ipc->farside_pid; */
/* } */
set_bit(fsa_input_register, pe_subsystem->flag_connected);
} else {
crm_info("Ignoring request to start %s while shutting down", this_subsys->name);
}
}
}
int fsa_pe_query = 0;
char *fsa_pe_ref = NULL;
/* A_PE_INVOKE */
void
do_pe_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (AM_I_DC == FALSE) {
crm_err("Not DC: No need to invoke the PE (anymore): %s", fsa_action2string(action));
return;
}
if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Cannot shut down gracefully without the PE");
register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
} else {
crm_info("Waiting for the PE to connect");
crmd_fsa_stall(FALSE);
register_fsa_action(A_PE_START);
}
return;
}
if (cur_state != S_POLICY_ENGINE) {
crm_notice("No need to invoke the PE in state %s", fsa_state2string(cur_state));
return;
}
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
crm_err("Attempted to invoke the PE without a consistent copy of the CIB!");
/* start the join from scratch */
register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
return;
}
fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
fsa_state2string(fsa_state));
/* Make sure any queued calculations are discarded */
free(fsa_pe_ref);
fsa_pe_ref = NULL;
fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
}
void
do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
int sent;
xmlNode *cmd = NULL;
if (rc != pcmk_ok) {
crm_err("Cant retrieve the CIB: %s (call %d)", pcmk_strerror(rc), call_id);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
return;
} else if (call_id != fsa_pe_query) {
crm_trace("Skipping superceeded CIB query: %d (current=%d)", call_id, fsa_pe_query);
return;
} else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
crm_debug("No need to invoke the PE anymore");
return;
} else if (fsa_state != S_POLICY_ENGINE) {
crm_debug("Discarding PE request in state: %s", fsa_state2string(fsa_state));
return;
} else if (last_peer_update != 0) {
crm_debug("Re-asking for the CIB: peer update %d still pending", last_peer_update);
sleep(1);
register_fsa_action(A_PE_INVOKE);
return;
} else if (fsa_state != S_POLICY_ENGINE) {
crm_err("Invoking PE in state: %s", fsa_state2string(fsa_state));
return;
}
CRM_LOG_ASSERT(output != NULL);
+ /* refresh our remote-node cache when the pengine is invoked */
+ crm_remote_peer_cache_refresh(output);
+
crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
if (ever_had_quorum && crm_have_quorum == FALSE) {
crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
}
cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL);
free(fsa_pe_ref);
fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE);
sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem->source), cmd, 0, 0, NULL);
if (sent <= 0) {
crm_err("Could not contact the pengine: %d", sent);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
}
crm_debug("Invoking the PE: query=%d, ref=%s, seq=%llu, quorate=%d",
fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum);
free_xml(cmd);
}
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index 54fae0435f..239af6335e 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -1,474 +1,465 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crm/fencing/internal.h>
crm_trigger_t *stonith_reconnect = NULL;
GListPtr stonith_cleanup_list = NULL;
static gboolean
fail_incompletable_stonith(crm_graph_t * graph)
{
GListPtr lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GListPtr lpc2 = NULL;
synapse_t *synapse = (synapse_t *) lpc->data;
if (synapse->confirmed) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
crm_action_t *action = (crm_action_t *) lpc2->data;
if (action->type != action_type_crm || action->confirmed) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && safe_str_eq(task, CRM_OP_FENCE)) {
action->failed = TRUE;
last_action = action->xml;
update_graph(graph, action);
crm_notice("Failing action %d (%s): STONITHd terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("STONITHd failure resulted in un-runnable actions");
abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
{
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
/* cbchan will be garbage at this point, arrange for it to be reset */
if(stonith_api) {
stonith_api->state = stonith_disconnected;
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
#if SUPPORT_CMAN
# include <libfenced.h>
#endif
char *te_client_id = NULL;
#ifdef HAVE_SYS_REBOOT_H
# include <unistd.h>
# include <sys/reboot.h>
#endif
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
if(te_client_id == NULL) {
te_client_id = g_strdup_printf("%s.%d", crm_system_name, getpid());
}
if (st_event == NULL) {
crm_err("Notify data not found");
return;
}
if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
crm_crit("We were alegedly just fenced by %s for %s with %s!", st_event->executioner,
st_event->origin, st_event->device); /* Dumps blackbox if enabled */
qb_log_fini(); /* Try to get the above log message to disk - somehow */
/* Get out ASAP and do not come back up.
*
* Triggering a reboot is also not the worst idea either since
* the rest of the cluster thinks we're safely down
*/
#ifdef RB_HALT_SYSTEM
reboot(RB_HALT_SYSTEM);
#endif
/*
* If reboot() fails or is not supported, coming back up will
* probably lead to a situation where the other nodes set our
* status to 'lost' because of the fencing callback and will
* discard subsequent election votes with:
*
* Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
*
* So just stay dead, something is seriously messed up anyway.
*
*/
exit(100); /* None of our wrappers since we already called qb_log_fini() */
return;
}
if (st_event->result == pcmk_ok &&
safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
st_fail_count_reset(st_event->target);
}
crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
st_event->target, st_event->result == pcmk_ok ? "" : " not",
st_event->action,
st_event->executioner ? st_event->executioner : "<anyone>",
st_event->origin, pcmk_strerror(st_event->result), st_event->id,
st_event->client_origin ? st_event->client_origin : "<unknown>");
#if SUPPORT_CMAN
if (st_event->result == pcmk_ok && is_cman_cluster()) {
int local_rc = 0;
char *target_copy = strdup(st_event->target);
/* In case fenced hasn't noticed yet
*
* Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
*/
local_rc = fenced_external(target_copy);
if (local_rc != 0) {
crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
local_rc);
} else {
crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
}
free(target_copy);
}
#endif
if (st_event->result == pcmk_ok) {
crm_node_t *peer = crm_get_peer(0, st_event->target);
const char *uuid = crm_peer_uuid(peer);
gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
if(AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, st_event->target, uuid);
if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
/* Abort the current transition graph if it wasn't us
* that invoked stonith to fence someone
*/
crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
}
/* Assume it was our leader if we dont currently have one */
} else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
crm_notice("Target %s our leader %s (recorded: %s)",
fsa_our_dc ? "was" : "may have been", st_event->target,
fsa_our_dc ? fsa_our_dc : "<unset>");
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (we_are_executioner) {
send_stonith_update(NULL, st_event->target, uuid);
}
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));
}
/* Everyone records them as safely down */
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL);
crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0);
crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
crm_update_peer_join(__FUNCTION__, peer, crm_join_none);
}
}
gboolean
te_connect_stonith(gpointer user_data)
{
int lpc = 0;
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Still connected");
return TRUE;
}
for (lpc = 0; lpc < 30; lpc++) {
crm_debug("Attempting connection to fencing daemon...");
sleep(1);
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (rc == pcmk_ok) {
break;
}
if (user_data != NULL) {
crm_err("Sign-in failed: triggered a retry");
mainloop_set_trigger(stonith_reconnect);
return TRUE;
}
crm_err("Sign-in failed: pausing and trying again in 2s...");
sleep(1);
}
CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */
stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
tengine_stonith_notify);
crm_trace("Connected");
return TRUE;
}
gboolean
stop_te_timer(crm_action_timer_t * timer)
{
const char *timer_desc = "action timer";
if (timer == NULL) {
return FALSE;
}
if (timer->reason == timeout_abort) {
timer_desc = "global timer";
crm_trace("Stopping %s", timer_desc);
}
if (timer->source_id != 0) {
crm_trace("Stopping %s", timer_desc);
g_source_remove(timer->source_id);
timer->source_id = 0;
} else {
crm_trace("%s was already stopped", timer_desc);
return FALSE;
}
return TRUE;
}
gboolean
te_graph_trigger(gpointer user_data)
{
enum transition_status graph_rc = -1;
if (transition_graph == NULL) {
crm_debug("Nothing to do");
return TRUE;
}
crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
return TRUE;
break;
default:
break;
}
if (transition_graph->complete == FALSE) {
graph_rc = run_graph(transition_graph);
print_graph(LOG_DEBUG_3, transition_graph);
if (graph_rc == transition_active) {
crm_trace("Transition not yet complete");
return TRUE;
} else if (graph_rc == transition_pending) {
crm_trace("Transition not yet complete - no actions fired");
return TRUE;
}
if (graph_rc != transition_complete) {
crm_warn("Transition failed: %s", transition_status(graph_rc));
print_graph(LOG_NOTICE, transition_graph);
}
}
crm_debug("Transition %d is now complete", transition_graph->id);
transition_graph->complete = TRUE;
notify_crmd(transition_graph);
return TRUE;
}
void
trigger_graph_processing(const char *fn, int line)
{
crm_trace("%s:%d - Triggered graph processing", fn, line);
mainloop_set_trigger(transition_trigger);
}
void
abort_transition_graph(int abort_priority, enum transition_action abort_action,
const char *abort_text, xmlNode * reason, const char *fn, int line)
{
const char *magic = NULL;
CRM_CHECK(transition_graph != NULL, return);
if (reason) {
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
const char *uname = "";
xmlNode *search = reason;
xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2);
magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
while(search) {
const char *kind = TYPE(search);
if (safe_str_eq(XML_CIB_TAG_STATE, kind)
|| safe_str_eq(XML_CIB_TAG_NODE, kind)) {
- if (crm_is_true(crm_element_value(search, XML_NODE_IS_REMOTE))) {
- /* Remote node uname and uuids are the same.
- * We also don't want them to be present in the
- * peer cache, so we shouldn't look them up with
- * crm_peer_uname()
- */
- uname = ID(search);
- } else {
- uname = crm_peer_uname(ID(search));
- }
+ uname = crm_peer_uname(ID(search));
break;
}
search = search->parent;
}
if (diff) {
cib_diff_version_details(diff,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) {
crm_info
("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s",
fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), NAME(reason),
VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch,
diff_add_updates, abort_text);
} else {
crm_info
("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s",
fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason),
magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates,
abort_text);
}
} else {
crm_info
("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s) : %s",
fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason),
magic ? magic : "NA", abort_text);
}
} else {
crm_info("%s:%d - Triggered transition abort (complete=%d) : %s",
fn, line, transition_graph->complete, abort_text);
}
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
crm_info("Abort suppressed: state=%s (complete=%d)",
fsa_state2string(fsa_state), transition_graph->complete);
return;
default:
break;
}
if (magic == NULL && reason != NULL) {
crm_log_xml_debug(reason, "Cause");
}
/* Make sure any queued calculations are discarded ASAP */
free(fsa_pe_ref);
fsa_pe_ref = NULL;
if (transition_graph->complete) {
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
}
return;
}
update_abort_priority(transition_graph, abort_priority, abort_action, abort_text);
mainloop_set_trigger(transition_trigger);
}
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index e645abb8ea..219d4dfbe9 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -1,203 +1,222 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_COMMON_CLUSTER__H
# define CRM_COMMON_CLUSTER__H
# include <crm/common/xml.h>
# include <crm/common/util.h>
# if SUPPORT_HEARTBEAT
# include <heartbeat/hb_api.h>
# include <ocf/oc_event.h>
# endif
# if SUPPORT_COROSYNC
# include <corosync/cpg.h>
# endif
extern gboolean crm_have_quorum;
extern GHashTable *crm_peer_cache;
+extern GHashTable *crm_remote_peer_cache;
extern unsigned long long crm_peer_seq;
# ifndef CRM_SERVICE
# define CRM_SERVICE PCMK_SERVICE_ID
# endif
/* *INDENT-OFF* */
#define CRM_NODE_LOST "lost"
#define CRM_NODE_MEMBER "member"
#define CRM_NODE_ACTIVE CRM_NODE_MEMBER
#define CRM_NODE_EVICTED "evicted"
enum crm_join_phase
{
crm_join_nack = -1,
crm_join_none = 0,
crm_join_welcomed = 1,
crm_join_integrated = 2,
crm_join_finalized = 3,
crm_join_confirmed = 4,
};
/* *INDENT-ON* */
+enum crm_node_flags
+{
+ crm_remote_node = 0x0001,
+};
typedef struct crm_peer_node_s {
uint32_t id; /* Only used by corosync derivatives */
uint64_t born; /* Only used by heartbeat and the legacy plugin */
uint64_t last_seen;
- uint64_t flags; /* Unused, but might be a good place to specify 'remote' */
+ uint64_t flags; /* Specified by crm_node_flags enum */
int32_t votes; /* Only used by the legacy plugin */
uint32_t processes;
enum crm_join_phase join;
char *uname;
char *uuid;
char *state;
char *expected;
char *addr; /* Only used by the legacy plugin */
char *version; /* Unused */
} crm_node_t;
void crm_peer_init(void);
void crm_peer_destroy(void);
typedef struct crm_cluster_s {
char *uuid;
char *uname;
uint32_t nodeid;
void (*destroy) (gpointer);
# if SUPPORT_HEARTBEAT
ll_cluster_t *hb_conn;
void (*hb_dispatch) (HA_Message * msg, void *private);
# endif
# if SUPPORT_COROSYNC
struct cpg_name group;
cpg_callbacks_t cpg;
cpg_handle_t cpg_handle;
# endif
} crm_cluster_t;
gboolean crm_cluster_connect(crm_cluster_t * cluster);
void crm_cluster_disconnect(crm_cluster_t * cluster);
/* *INDENT-OFF* */
enum crm_ais_msg_class {
crm_class_cluster = 0,
crm_class_members = 1,
crm_class_notify = 2,
crm_class_nodeid = 3,
crm_class_rmpeer = 4,
crm_class_quorum = 5,
};
/* order here matters - its used to index into the crm_children array */
enum crm_ais_msg_types {
crm_msg_none = 0,
crm_msg_ais = 1,
crm_msg_lrmd = 2,
crm_msg_cib = 3,
crm_msg_crmd = 4,
crm_msg_attrd = 5,
crm_msg_stonithd = 6,
crm_msg_te = 7,
crm_msg_pe = 8,
crm_msg_stonith_ng = 9,
};
+
+/* used with crm_get_peer_full */
+enum crm_get_peer_flags {
+ CRM_GET_PEER_CLUSTER = 0x0001,
+ CRM_GET_PEER_REMOTE = 0x0002,
+};
/* *INDENT-ON* */
gboolean send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service,
xmlNode * data, gboolean ordered);
+
+/* Initialize and refresh the remote peer cache from a cib config */
+void crm_remote_peer_cache_refresh(xmlNode *cib);
+
+/* allows filtering of remote and cluster nodes using crm_get_peer_flags */
+crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags);
+
+/* only searches cluster nodes */
crm_node_t *crm_get_peer(unsigned int id, const char *uname);
guint crm_active_peers(void);
gboolean crm_is_peer_active(const crm_node_t * node);
guint reap_crm_member(uint32_t id, const char *name);
int crm_terminate_member(int nodeid, const char *uname, void *unused);
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection);
# if SUPPORT_HEARTBEAT
gboolean crm_is_heartbeat_peer_active(const crm_node_t * node);
# endif
# if SUPPORT_COROSYNC
extern int ais_fd_sync;
uint32_t get_local_nodeid(cpg_handle_t handle);
gboolean cluster_connect_cpg(crm_cluster_t *cluster);
void cluster_disconnect_cpg(crm_cluster_t * cluster);
void pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries);
gboolean crm_is_corosync_peer_active(const crm_node_t * node);
gboolean send_cluster_text(int class, const char *data, gboolean local,
crm_node_t * node, enum crm_ais_msg_types dest);
# endif
const char *crm_peer_uuid(crm_node_t *node);
const char *crm_peer_uname(const char *uuid);
void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node);
enum crm_status_type {
crm_status_uname,
crm_status_nstate,
crm_status_processes,
};
enum crm_ais_msg_types text2msg_type(const char *text);
void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *));
/* *INDENT-OFF* */
enum cluster_type_e
{
pcmk_cluster_unknown = 0x0001,
pcmk_cluster_invalid = 0x0002,
pcmk_cluster_heartbeat = 0x0004,
pcmk_cluster_classic_ais = 0x0010,
pcmk_cluster_corosync = 0x0020,
pcmk_cluster_cman = 0x0040,
};
/* *INDENT-ON* */
enum cluster_type_e get_cluster_type(void);
const char *name_for_cluster_type(enum cluster_type_e type);
gboolean is_corosync_cluster(void);
gboolean is_cman_cluster(void);
gboolean is_openais_cluster(void);
gboolean is_classic_ais_cluster(void);
gboolean is_heartbeat_cluster(void);
const char *get_local_node_name(void);
char *get_node_name(uint32_t nodeid);
# if SUPPORT_COROSYNC
char *pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *msg,
uint32_t *kind, const char **from);
# endif
#endif
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index 5820c8df2a..5b743f953a 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -1,632 +1,637 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <dlfcn.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
CRM_TRACE_INIT_DATA(cluster);
#if SUPPORT_HEARTBEAT
void *hb_library = NULL;
#endif
static char *
get_heartbeat_uuid(const char *uname)
{
char *uuid_calc = NULL;
#if SUPPORT_HEARTBEAT
cl_uuid_t uuid_raw;
const char *unknown = "00000000-0000-0000-0000-000000000000";
if (heartbeat_cluster == NULL) {
crm_warn("No connection to heartbeat, using uuid=uname");
return NULL;
} else if(uname == NULL) {
return NULL;
}
if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) ==
HA_FAIL) {
crm_err("get_uuid_by_name() call failed for host %s", uname);
free(uuid_calc);
return NULL;
}
uuid_calc = calloc(1, 50);
cl_uuid_unparse(&uuid_raw, uuid_calc);
if (safe_str_eq(uuid_calc, unknown)) {
crm_warn("Could not calculate UUID for %s", uname);
free(uuid_calc);
return NULL;
}
#endif
return uuid_calc;
}
static gboolean
uname_is_uuid(void)
{
static const char *uuid_pref = NULL;
if (uuid_pref == NULL) {
uuid_pref = getenv("PCMK_uname_is_uuid");
}
if (uuid_pref == NULL) {
/* true is legacy mode */
uuid_pref = "false";
}
return crm_is_true(uuid_pref);
}
int
get_corosync_id(int id, const char *uuid)
{
if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) {
id = crm_atoi(uuid, "0");
}
return id;
}
char *
get_corosync_uuid(crm_node_t *node)
{
if(node == NULL) {
return NULL;
} else if (!uname_is_uuid() && is_corosync_cluster()) {
if (node->id > 0) {
int len = 32;
char *buffer = NULL;
buffer = calloc(1, (len + 1));
if (buffer != NULL) {
snprintf(buffer, len, "%u", node->id);
}
return buffer;
} else {
crm_info("Node %s is not yet known by corosync", node->uname);
}
} else if (node->uname != NULL) {
return strdup(node->uname);
}
return NULL;
}
const char *
crm_peer_uuid(crm_node_t *peer)
{
char *uuid = NULL;
enum cluster_type_e type = get_cluster_type();
/* avoid blocking heartbeat calls where possible */
if(peer == NULL) {
return NULL;
} else if (peer->uuid) {
return peer->uuid;
}
switch (type) {
case pcmk_cluster_corosync:
uuid = get_corosync_uuid(peer);
break;
case pcmk_cluster_cman:
case pcmk_cluster_classic_ais:
if (peer->uname) {
uuid = strdup(peer->uname);
}
break;
case pcmk_cluster_heartbeat:
uuid = get_heartbeat_uuid(peer->uname);
break;
case pcmk_cluster_unknown:
case pcmk_cluster_invalid:
crm_err("Unsupported cluster type");
break;
}
peer->uuid = uuid;
return peer->uuid;
}
gboolean
crm_cluster_connect(crm_cluster_t * cluster)
{
enum cluster_type_e type = get_cluster_type();
crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type));
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
crm_peer_init();
return init_cs_connection(cluster);
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
int rv;
/* coverity[var_deref_op] False positive */
if (cluster->hb_conn == NULL) {
/* No object passed in, create a new one. */
ll_cluster_t *(*new_cluster) (const char *llctype) =
find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
cluster->hb_conn = (*new_cluster) ("heartbeat");
/* dlclose(handle); */
} else {
/* Object passed in. Disconnect first, then reconnect below. */
cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE);
}
/* make sure we are disconnected first with the old object, if any. */
if (heartbeat_cluster && heartbeat_cluster != cluster->hb_conn) {
heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE);
}
CRM_ASSERT(cluster->hb_conn != NULL);
heartbeat_cluster = cluster->hb_conn;
rv = register_heartbeat_conn(cluster);
if (rv) {
/* we'll benefit from a bigger queue length on heartbeat side.
* Otherwise, if peers send messages faster than we can consume
* them right now, heartbeat messaging layer will kick us out once
* it's (small) default queue fills up :(
* If we fail to adjust the sendq length, that's not yet fatal, though.
*/
if (HA_OK != heartbeat_cluster->llc_ops->set_sendq_len(heartbeat_cluster, 1024)) {
crm_warn("Cannot set sendq length: %s",
heartbeat_cluster->llc_ops->errmsg(heartbeat_cluster));
}
}
return rv;
}
#endif
crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
return FALSE;
}
void
crm_cluster_disconnect(crm_cluster_t * cluster)
{
enum cluster_type_e type = get_cluster_type();
const char *type_str = name_for_cluster_type(type);
crm_info("Disconnecting from cluster infrastructure: %s", type_str);
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
crm_peer_destroy();
terminate_cs_connection(cluster);
crm_info("Disconnected from %s", type_str);
return;
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
if (cluster == NULL) {
crm_info("No cluster connection");
return;
} else if (cluster->hb_conn) {
cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE);
cluster->hb_conn = NULL;
crm_info("Disconnected from %s", type_str);
return;
} else {
crm_info("No %s connection", type_str);
return;
}
}
#endif
crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
}
gboolean
send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode * data,
gboolean ordered)
{
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
return send_cluster_message_cs(data, FALSE, node, service);
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
return send_ha_message(heartbeat_cluster, data, node ? node->uname : NULL, ordered);
}
#endif
return FALSE;
}
const char *
get_local_node_name(void)
{
static char *name = NULL;
if(name) {
return name;
}
name = get_node_name(0);
return name;
}
char *
get_node_name(uint32_t nodeid)
{
char *name = NULL;
enum cluster_type_e stack = get_cluster_type();
switch (stack) {
case pcmk_cluster_heartbeat:
break;
#if SUPPORT_PLUGIN
case pcmk_cluster_classic_ais:
name = classic_node_name(nodeid);
break;
#else
# if SUPPORT_COROSYNC
case pcmk_cluster_corosync:
name = corosync_node_name(0, nodeid);
break;
# endif
#endif
#if SUPPORT_CMAN
case pcmk_cluster_cman:
name = cman_node_name(nodeid);
break;
#endif
default:
crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack);
}
if(name == NULL && nodeid == 0) {
struct utsname res;
int rc = uname(&res);
if (rc == 0) {
crm_notice("Defaulting to uname -n for the local %s node name",
name_for_cluster_type(stack));
name = strdup(res.nodename);
}
if (name == NULL) {
crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack));
crm_exit(DAEMON_RESPAWN_STOP);
}
}
if (name == NULL) {
crm_notice("Could not obtain a node name for %s nodeid %u",
name_for_cluster_type(stack), nodeid);
}
return name;
}
/* Only used by update_failcount() in te_utils.c */
const char *
crm_peer_uname(const char *uuid)
{
GHashTableIter iter;
crm_node_t *node = NULL;
CRM_CHECK(uuid != NULL, return NULL);
+ /* remote nodes have the same uname and uuid */
+ if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
+ return uuid;
+ }
+
/* avoid blocking calls where possible */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->uuid && strcasecmp(node->uuid, uuid) == 0) {
if(node->uname) {
return node->uname;
}
break;
}
}
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
if (uname_is_uuid() == FALSE && is_corosync_cluster()) {
uint32_t id = crm_int_helper(uuid, NULL);
node = crm_get_peer(id, NULL);
} else {
node = crm_get_peer(0, uuid);
}
if (node) {
crm_info("Setting uuid for node %s[%u] to '%s'", node->uname, node->id, uuid);
node->uuid = strdup(uuid);
if(node->uname) {
return node->uname;
}
}
return NULL;
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
if (heartbeat_cluster != NULL) {
cl_uuid_t uuid_raw;
char *uuid_copy = strdup(uuid);
char *uname = malloc(MAX_NAME);
cl_uuid_parse(uuid_copy, &uuid_raw);
if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname,
MAX_NAME) == HA_FAIL) {
crm_err("Could not calculate uname for %s", uuid);
} else {
node = crm_get_peer(0, uname);
}
free(uuid_copy);
free(uname);
}
if (node) {
crm_info("Setting uuid for node %s to '%s'", node->uname, uuid);
node->uuid = strdup(uuid);
if(node->uname) {
return node->uname;
}
}
return NULL;
}
#endif
return NULL;
}
void
set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
{
const char *uuid_calc = crm_peer_uuid(node);
crm_xml_add(xml, attr, uuid_calc);
return;
}
const char *
name_for_cluster_type(enum cluster_type_e type)
{
switch (type) {
case pcmk_cluster_classic_ais:
return "classic openais (with plugin)";
case pcmk_cluster_cman:
return "cman";
case pcmk_cluster_corosync:
return "corosync";
case pcmk_cluster_heartbeat:
return "heartbeat";
case pcmk_cluster_unknown:
return "unknown";
case pcmk_cluster_invalid:
return "invalid";
}
crm_err("Invalid cluster type: %d", type);
return "invalid";
}
/* Do not expose these two */
int set_cluster_type(enum cluster_type_e type);
static enum cluster_type_e cluster_type = pcmk_cluster_unknown;
int
set_cluster_type(enum cluster_type_e type)
{
if (cluster_type == pcmk_cluster_unknown) {
crm_info("Cluster type set to: %s", name_for_cluster_type(type));
cluster_type = type;
return 0;
} else if (cluster_type == type) {
return 0;
} else if (pcmk_cluster_unknown == type) {
cluster_type = type;
return 0;
}
crm_err("Cluster type already set to %s, ignoring %s",
name_for_cluster_type(cluster_type), name_for_cluster_type(type));
return -1;
}
enum cluster_type_e
get_cluster_type(void)
{
bool detected = FALSE;
const char *cluster = NULL;
/* Return the previous calculation, if any */
if (cluster_type != pcmk_cluster_unknown) {
return cluster_type;
}
cluster = getenv("HA_cluster_type");
#if SUPPORT_HEARTBEAT
/* If nothing is defined in the environment, try heartbeat (if supported) */
if(cluster == NULL) {
ll_cluster_t *hb;
ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(
&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
hb = (*new_cluster) ("heartbeat");
crm_debug("Testing with Heartbeat");
if (hb->llc_ops->signon(hb, crm_system_name) == HA_OK) {
hb->llc_ops->signoff(hb, FALSE);
cluster_type = pcmk_cluster_heartbeat;
detected = TRUE;
goto done;
}
}
#endif
#if SUPPORT_COROSYNC
/* If nothing is defined in the environment, try corosync (if supported) */
if(cluster == NULL) {
crm_debug("Testing with Corosync");
cluster_type = find_corosync_variant();
if (cluster_type != pcmk_cluster_unknown) {
detected = TRUE;
goto done;
}
}
#endif
/* Something was defined in the environment, test it against what we support */
crm_info("Verifying cluster type: '%s'", cluster?cluster:"-unspecified-");
if (cluster == NULL) {
#if SUPPORT_HEARTBEAT
} else if (safe_str_eq(cluster, "heartbeat")) {
cluster_type = pcmk_cluster_heartbeat;
#endif
#if SUPPORT_COROSYNC
} else if (safe_str_eq(cluster, "openais")
|| safe_str_eq(cluster, "classic openais (with plugin)")) {
cluster_type = pcmk_cluster_classic_ais;
} else if (safe_str_eq(cluster, "corosync")) {
cluster_type = pcmk_cluster_corosync;
#endif
#if SUPPORT_CMAN
} else if (safe_str_eq(cluster, "cman")) {
cluster_type = pcmk_cluster_cman;
#endif
} else {
cluster_type = pcmk_cluster_invalid;
goto done; /* Keep the compiler happy when no stacks are supported */
}
done:
if (cluster_type == pcmk_cluster_unknown) {
crm_notice("Could not determin the current cluster type");
} else if (cluster_type == pcmk_cluster_invalid) {
crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.",
cluster);
crm_exit(DAEMON_RESPAWN_STOP);
} else {
crm_info("%s an active '%s' cluster", detected?"Detected":"Assuming", name_for_cluster_type(cluster_type));
}
return cluster_type;
}
gboolean
is_cman_cluster(void)
{
return get_cluster_type() == pcmk_cluster_cman;
}
gboolean
is_corosync_cluster(void)
{
return get_cluster_type() == pcmk_cluster_corosync;
}
gboolean
is_classic_ais_cluster(void)
{
return get_cluster_type() == pcmk_cluster_classic_ais;
}
gboolean
is_openais_cluster(void)
{
enum cluster_type_e type = get_cluster_type();
if (type == pcmk_cluster_classic_ais) {
return TRUE;
} else if (type == pcmk_cluster_corosync) {
return TRUE;
} else if (type == pcmk_cluster_cman) {
return TRUE;
}
return FALSE;
}
gboolean
is_heartbeat_cluster(void)
{
return get_cluster_type() == pcmk_cluster_heartbeat;
}
gboolean
node_name_is_valid(const char *key, const char *name)
{
int octet;
if (name == NULL) {
crm_trace("%s is empty", key);
return FALSE;
} else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
crm_trace("%s contains an ipv4 address, ignoring: %s", key, name);
return FALSE;
} else if (strstr(name, ":") != NULL) {
crm_trace("%s contains an ipv6 address, ignoring: %s", key, name);
return FALSE;
}
crm_trace("%s is valid", key);
return TRUE;
}
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index bc1684e23b..e82cadc85e 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -1,510 +1,573 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/param.h>
#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <glib.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/msg_xml.h>
#include <crm/stonith-ng.h>
GHashTable *crm_peer_cache = NULL;
+GHashTable *crm_remote_peer_cache = NULL;
unsigned long long crm_peer_seq = 0;
gboolean crm_have_quorum = FALSE;
+void crm_remote_peer_cache_refresh(xmlNode *cib)
+{
+ crm_node_t *node = NULL;
+ xmlXPathObjectPtr xpathObj = NULL;
+ const char *remote = NULL;
+ const char *xpath = NULL;
+ int max = 0;
+ int lpc = 0;
+
+ g_hash_table_remove_all(crm_remote_peer_cache);
+
+ xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR "[@name='remote-node']";
+ xpathObj = xpath_search(cib, xpath);
+ max = numXpathResults(xpathObj);
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *xml = getXpathResult(xpathObj, lpc);
+
+ CRM_CHECK(xml != NULL, continue);
+
+ remote = crm_element_value(xml, "value");
+ if (remote) {
+ crm_trace("added %s to remote cache", remote);
+ node = calloc(1, sizeof(crm_node_t));
+ node->flags = crm_remote_node;
+ CRM_ASSERT(node);
+ node->uname = strdup(remote);
+ node->uuid = strdup(remote);
+ g_hash_table_replace(crm_remote_peer_cache, node->uname, node);
+ }
+ }
+ freeXpathObject(xpathObj);
+}
+
gboolean
crm_is_peer_active(const crm_node_t * node)
{
if(node == NULL) {
return FALSE;
}
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
return crm_is_corosync_peer_active(node);
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
return crm_is_heartbeat_peer_active(node);
}
#endif
crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
return FALSE;
}
static gboolean
crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
crm_node_t *search = user_data;
if (search == NULL) {
return FALSE;
} else if (search->id && node->id != search->id) {
return FALSE;
} else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
return FALSE;
} else if (crm_is_peer_active(value) == FALSE) {
crm_notice("Removing %s/%u from the membership list", node->uname, node->id);
return TRUE;
}
return FALSE;
}
guint
reap_crm_member(uint32_t id, const char *name)
{
int matches = 0;
crm_node_t search;
if (crm_peer_cache == NULL) {
crm_trace("Nothing to do, cache not initialized");
return 0;
}
search.id = id;
search.uname = strdup(name);
matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
if(matches) {
crm_notice("Purged %d peers with id=%u and/or uname=%s from the membership cache", matches, id, name);
} else {
crm_info("No peers with id=%u and/or uname=%s exist", id, name);
}
free(search.uname);
return matches;
}
static void
crm_count_peer(gpointer key, gpointer value, gpointer user_data)
{
guint *count = user_data;
crm_node_t *node = value;
if (crm_is_peer_active(node)) {
*count = *count + 1;
}
}
guint
crm_active_peers(void)
{
guint count = 0;
if (crm_peer_cache) {
g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
}
return count;
}
static void
destroy_crm_node(gpointer data)
{
crm_node_t *node = data;
crm_trace("Destroying entry for node %u", node->id);
free(node->addr);
free(node->uname);
free(node->state);
free(node->uuid);
free(node->expected);
free(node);
}
void
crm_peer_init(void)
{
if (crm_peer_cache == NULL) {
crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_crm_node);
}
+
+ if (crm_remote_peer_cache == NULL) {
+ crm_remote_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node);
+ }
}
void
crm_peer_destroy(void)
{
if (crm_peer_cache != NULL) {
crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
g_hash_table_destroy(crm_peer_cache);
crm_peer_cache = NULL;
}
+
+ if (crm_remote_peer_cache != NULL) {
+ crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
+ g_hash_table_destroy(crm_remote_peer_cache);
+ crm_remote_peer_cache = NULL;
+ }
}
void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
void
crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
{
crm_status_callback = dispatch;
}
static void crm_dump_peer_hash(int level, const char *caller)
{
GHashTableIter iter;
const char *id = NULL;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
}
}
static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
{
if(value == user_data) {
return TRUE;
}
return FALSE;
}
+crm_node_t *
+crm_get_peer_full(unsigned int id, const char *uname, int flags)
+{
+ crm_node_t *node = NULL;
+
+ CRM_ASSERT(id > 0 || uname != NULL);
+
+ crm_peer_init();
+
+ if (flags & CRM_GET_PEER_REMOTE) {
+ node = g_hash_table_lookup(crm_remote_peer_cache, uname);
+ }
+
+ if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
+ node = crm_get_peer(id, uname);
+ }
+ return node;
+}
+
/* coverity[-alloc] Memory is referenced in one or both hashtables */
crm_node_t *
crm_get_peer(unsigned int id, const char *uname)
{
GHashTableIter iter;
crm_node_t *node = NULL;
crm_node_t *by_id = NULL;
crm_node_t *by_name = NULL;
CRM_ASSERT(id > 0 || uname != NULL);
crm_peer_init();
if (uname != NULL) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->uname && strcasecmp(node->uname, uname) == 0) {
crm_trace("Name match: %s = %p", node->uname, node);
by_name = node;
break;
}
}
}
if (id > 0) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->id == id) {
crm_trace("ID match: %u = %p", node->id, node);
by_id = node;
break;
}
}
}
node = by_id; /* Good default */
if(by_id == by_name) {
/* Nothing to do if they match (both NULL counts) */
crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
} else if(by_id == NULL && by_name) {
crm_trace("Only one: %p for %u/%s", by_name, id, uname);
if(id && by_name->id) {
crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
crm_crit("Node %u and %u share the same name '%s'",
id, by_name->id, uname);
node = NULL; /* Create a new one */
} else {
node = by_name;
}
} else if(by_name == NULL && by_id) {
crm_trace("Only one: %p for %u/%s", by_id, id, uname);
if(uname && by_id->uname) {
crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
uname, by_id->uname, id, uname);
}
} else if(uname && by_id->uname) {
crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u", by_id->uname, by_name->uname, id);
} else if(id && by_name->id) {
crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
} else {
/* Simple merge */
/* Only corosync based clusters use nodeid's
*
* The functions that call crm_update_peer_state() only know nodeid
* so 'by_id' is authorative when merging
*
* Same for crm_update_peer_proc()
*/
crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
crm_info("Merging %p into %p", by_name, by_id);
g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
}
if (node == NULL) {
char *uniqueid = crm_generate_uuid();
node = calloc(1, sizeof(crm_node_t));
CRM_ASSERT(node);
crm_info("Created entry %s/%p for node %s/%u (%d total)",
uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
g_hash_table_replace(crm_peer_cache, uniqueid, node);
}
if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
crm_info("Node %u is now known as %s", id, uname);
}
if(id > 0 && node->id == 0) {
node->id = id;
}
if(uname && node->uname == NULL) {
int lpc, len = strlen(uname);
for (lpc = 0; lpc < len; lpc++) {
if (uname[lpc] >= 'A' && uname[lpc] <= 'Z') {
crm_warn("Node names with capitals are discouraged, consider changing '%s' to something else",
uname);
break;
}
}
node->uname = strdup(uname);
if (crm_status_callback) {
crm_status_callback(crm_status_uname, node, NULL);
}
}
if(node->uuid == NULL) {
const char *uuid = crm_peer_uuid(node);
if (uuid) {
crm_info("Node %u has uuid %s", id, uuid);
} else {
crm_info("Cannot obtain a UUID for node %d/%s", id, node->uname);
}
}
return node;
}
crm_node_t *
crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
uint32_t children, const char *uuid, const char *uname, const char *addr,
const char *state)
{
#if SUPPORT_PLUGIN
gboolean addr_changed = FALSE;
gboolean votes_changed = FALSE;
#endif
crm_node_t *node = NULL;
id = get_corosync_id(id, uuid);
node = crm_get_peer(id, uname);
CRM_ASSERT(node != NULL);
if (node->uuid == NULL) {
if (is_openais_cluster()) {
/* Yes, overrule whatever was passed in */
crm_peer_uuid(node);
} else if (uuid != NULL) {
node->uuid = strdup(uuid);
}
}
if (children > 0) {
crm_update_peer_proc(source, node, children, state);
}
if (state != NULL) {
crm_update_peer_state(source, node, state, seen);
}
#if SUPPORT_HEARTBEAT
if (born != 0) {
node->born = born;
}
#endif
#if SUPPORT_PLUGIN
/* These were only used by the plugin */
if (born != 0) {
node->born = born;
}
if (votes > 0 && node->votes != votes) {
votes_changed = TRUE;
node->votes = votes;
}
if (addr != NULL) {
if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
addr_changed = TRUE;
free(node->addr);
node->addr = strdup(addr);
}
}
if (addr_changed || votes_changed) {
crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
" proc=%.32x", source, node->uname, node->id, node->state,
node->addr, addr_changed ? " (new)" : "", node->votes,
votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
}
#endif
return node;
}
void
crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
{
uint32_t last = 0;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
source, peer2text(flag), status); return);
last = node->processes;
if (status == NULL) {
node->processes = flag;
if (node->processes != last) {
changed = TRUE;
}
} else if (safe_str_eq(status, ONLINESTATUS)) {
if ((node->processes & flag) == 0) {
set_bit(node->processes, flag);
changed = TRUE;
}
#if SUPPORT_PLUGIN
} else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
if (flag > 0 && node->processes != flag) {
node->processes = flag;
changed = TRUE;
}
#endif
} else if (node->processes & flag) {
clear_bit(node->processes, flag);
changed = TRUE;
}
if (changed) {
if (status == NULL && flag <= crm_proc_none) {
crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
node->id);
} else {
crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
peer2text(flag), status);
}
if (crm_status_callback) {
crm_status_callback(crm_status_processes, node, &last);
}
} else {
crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
peer2text(flag), status);
}
}
void
crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
{
char *last = NULL;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
return);
last = node->expected;
if (expected != NULL && safe_str_neq(node->expected, expected)) {
node->expected = strdup(expected);
changed = TRUE;
}
if (changed) {
crm_info("%s: Node %s[%u] - expected state is now %s", source, node->uname, node->id,
expected);
free(last);
} else {
crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
node->id, expected);
}
}
void
crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
{
char *last = NULL;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set 'state' to %s", source, state);
return);
last = node->state;
if (state != NULL && safe_str_neq(node->state, state)) {
node->state = strdup(state);
changed = TRUE;
}
if (membership != 0 && safe_str_eq(node->state, CRM_NODE_MEMBER)) {
node->last_seen = membership;
}
if (changed) {
crm_notice("%s: Node %s[%u] - state is now %s (was %s)", source, node->uname, node->id, state, last);
if (crm_status_callback) {
crm_status_callback(crm_status_nstate, node, last);
}
free(last);
} else {
crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id,
state);
}
}
int
crm_terminate_member(int nodeid, const char *uname, void *unused)
{
/* Always use the synchronous, non-mainloop version */
return stonith_api_kick(nodeid, uname, 120, TRUE);
}
int
crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
{
return stonith_api_kick(nodeid, uname, 120, TRUE);
}
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index 37161dc40c..7aa3349247 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -1,2010 +1,2035 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <glib.h>
#include <dirent.h>
#include <crm/crm.h>
#include <crm/lrmd.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipcs.h>
#include <crm/msg_xml.h>
#include <crm/stonith-ng.h>
#ifdef HAVE_GNUTLS_GNUTLS_H
# undef KEYFILE
# include <gnutls/gnutls.h>
#endif
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <netdb.h>
CRM_TRACE_INIT_DATA(lrmd);
static int lrmd_api_disconnect(lrmd_t * lrmd);
static int lrmd_api_is_connected(lrmd_t * lrmd);
/* IPC proxy functions */
int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg);
void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
#ifdef HAVE_GNUTLS_GNUTLS_H
# define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */
gnutls_psk_client_credentials_t psk_cred_s;
int lrmd_tls_set_key(gnutls_datum_t * key);
static void lrmd_tls_disconnect(lrmd_t * lrmd);
static int global_remote_msg_id = 0;
int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type);
static void lrmd_tls_connection_destroy(gpointer userdata);
#endif
typedef struct lrmd_private_s {
enum client_type type;
char *token;
mainloop_io_t *source;
/* IPC parameters */
crm_ipc_t *ipc;
crm_remote_t *remote;
/* Extra TLS parameters */
char *remote_nodename;
#ifdef HAVE_GNUTLS_GNUTLS_H
char *server;
int port;
gnutls_psk_client_credentials_t psk_cred_c;
int sock;
+ /* since tls requires a round trip across the network for a
+ * request/reply, there are times where we just want to be able
+ * to send a request from the client and not wait around (or even care
+ * about) what the reply is. */
+ int expected_late_replies;
GList *pending_notify;
crm_trigger_t *process_notify;
#endif
lrmd_event_callback callback;
/* Internal IPC proxy msg passing for remote guests */
void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg);
void *proxy_callback_userdata;
} lrmd_private_t;
static lrmd_list_t *
lrmd_list_add(lrmd_list_t * head, const char *value)
{
lrmd_list_t *p, *end;
p = calloc(1, sizeof(lrmd_list_t));
p->val = strdup(value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
lrmd_list_freeall(lrmd_list_t * head)
{
lrmd_list_t *p;
while (head) {
char *val = (char *)head->val;
p = head->next;
free(val);
free(head);
head = p;
}
}
lrmd_key_value_t *
lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value)
{
lrmd_key_value_t *p, *end;
p = calloc(1, sizeof(lrmd_key_value_t));
p->key = strdup(key);
p->value = strdup(value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
lrmd_key_value_freeall(lrmd_key_value_t * head)
{
lrmd_key_value_t *p;
while (head) {
p = head->next;
free(head->key);
free(head->value);
free(head);
head = p;
}
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, strdup(key), strdup(value));
}
lrmd_event_data_t *
lrmd_copy_event(lrmd_event_data_t * event)
{
lrmd_event_data_t *copy = NULL;
copy = calloc(1, sizeof(lrmd_event_data_t));
/* This will get all the int values.
* we just have to be careful not to leave any
* dangling pointers to strings. */
memcpy(copy, event, sizeof(lrmd_event_data_t));
copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL;
copy->op_type = event->op_type ? strdup(event->op_type) : NULL;
copy->user_data = event->user_data ? strdup(event->user_data) : NULL;
copy->output = event->output ? strdup(event->output) : NULL;
copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL;
if (event->params) {
copy->params = g_hash_table_new_full(crm_str_hash,
g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
if (copy->params != NULL) {
g_hash_table_foreach(event->params, dup_attr, copy->params);
}
}
return copy;
}
void
lrmd_free_event(lrmd_event_data_t * event)
{
if (!event) {
return;
}
/* free gives me grief if i try to cast */
free((char *)event->rsc_id);
free((char *)event->op_type);
free((char *)event->user_data);
free((char *)event->output);
free((char *)event->remote_nodename);
if (event->params) {
g_hash_table_destroy(event->params);
}
free(event);
}
static int
lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
{
const char *type;
const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION);
lrmd_private_t *native = lrmd->private;
lrmd_event_data_t event = { 0, };
if (proxy_session != NULL) {
/* this is proxy business */
lrmd_internal_proxy_dispatch(lrmd, msg);
return 1;
- }
-
- if (!native->callback) {
+ } else if (!native->callback) {
/* no callback set */
crm_trace("notify event received but client has not set callback");
return 1;
}
event.remote_nodename = native->remote_nodename;
type = crm_element_value(msg, F_LRMD_OPERATION);
crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id);
event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID);
if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) {
event.type = lrmd_event_register;
} else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) {
event.type = lrmd_event_unregister;
} else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) {
crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout);
crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval);
crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay);
crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc);
crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status);
crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted);
crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run);
crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange);
crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time);
crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time);
event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION);
event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR);
event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT);
event.type = lrmd_event_exec_complete;
event.params = xml2list(msg);
} else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) {
event.type = lrmd_event_poke;
} else {
return 1;
}
crm_trace("op %s notify event received", type);
native->callback(&event);
if (event.params) {
g_hash_table_destroy(event.params);
}
return 1;
}
static int
lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
xmlNode *msg;
int rc;
if (!native->callback) {
/* no callback set */
return 1;
}
msg = string2xml(buffer);
rc = lrmd_dispatch_internal(lrmd, msg);
free_xml(msg);
return rc;
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_free_xml(gpointer userdata)
{
free_xml((xmlNode *) userdata);
}
static int
lrmd_tls_connected(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
if (native->remote->tls_session) {
return TRUE;
}
return FALSE;
}
static int
lrmd_tls_dispatch(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
xmlNode *xml = NULL;
int rc = 0;
int disconnected = 0;
if (lrmd_tls_connected(lrmd) == FALSE) {
crm_trace("tls dispatch triggered after disconnect");
return 0;
}
crm_trace("tls_dispatch triggered");
/* First check if there are any pending notifies to process that came
* while we were waiting for replies earlier. */
if (native->pending_notify) {
GList *iter = NULL;
crm_trace("Processing pending notifies");
for (iter = native->pending_notify; iter; iter = iter->next) {
lrmd_dispatch_internal(lrmd, iter->data);
}
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
/* Next read the current buffer and see if there are any messages to handle. */
rc = crm_remote_ready(native->remote, 0);
if (rc == 0) {
/* nothing to read, see if any full messages are already in buffer. */
xml = crm_remote_parse_buffer(native->remote);
} else if (rc < 0) {
disconnected = 1;
} else {
crm_remote_recv(native->remote, -1, &disconnected);
xml = crm_remote_parse_buffer(native->remote);
}
while (xml) {
- lrmd_dispatch_internal(lrmd, xml);
+ const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
+ if (safe_str_eq(msg_type, "notify")) {
+ lrmd_dispatch_internal(lrmd, xml);
+ } else if (safe_str_eq(msg_type, "reply")) {
+ if (native->expected_late_replies > 0) {
+ native->expected_late_replies--;
+ } else {
+ int reply_id = 0;
+ crm_element_value_int(xml, F_LRMD_CALLID, &reply_id);
+ /* if this happens, we want to know about it */
+ crm_err("Got outdated reply %d", reply_id);
+ }
+ }
free_xml(xml);
xml = crm_remote_parse_buffer(native->remote);
}
if (disconnected) {
crm_info("Server disconnected while reading remote server msg.");
lrmd_tls_disconnect(lrmd);
return 0;
}
return 1;
}
#endif
/* Not used with mainloop */
int
lrmd_poll(lrmd_t * lrmd, int timeout)
{
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
return crm_ipc_ready(native->ipc);
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
if (native->pending_notify) {
return 1;
} else if (native->remote->buffer
&& strstr(native->remote->buffer, REMOTE_MSG_TERMINATOR)) {
return 1;
}
return crm_remote_ready(native->remote, 0);
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return 0;
}
/* Not used with mainloop */
bool
lrmd_dispatch(lrmd_t * lrmd)
{
lrmd_private_t *private = NULL;
CRM_ASSERT(lrmd != NULL);
private = lrmd->private;
switch (private->type) {
case CRM_CLIENT_IPC:
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
lrmd_ipc_dispatch(msg, strlen(msg), lrmd);
}
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
lrmd_tls_dispatch(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", private->type);
}
if (lrmd_api_is_connected(lrmd) == FALSE) {
crm_err("Connection closed");
return FALSE;
}
return TRUE;
}
static xmlNode *
lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options)
{
xmlNode *op_msg = create_xml_node(NULL, "lrmd_command");
CRM_CHECK(op_msg != NULL, return NULL);
CRM_CHECK(token != NULL, return NULL);
crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command");
crm_xml_add(op_msg, F_TYPE, T_LRMD);
crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token);
crm_xml_add(op_msg, F_LRMD_OPERATION, op);
crm_trace("Sending call options: %.8lx, %d", (long)options, options);
crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options);
if (data != NULL) {
add_message_xml(op_msg, F_LRMD_CALLDATA, data);
}
return op_msg;
}
static void
lrmd_ipc_connection_destroy(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
crm_info("IPC connection destroyed");
/* Prevent these from being cleaned up in lrmd_api_disconnect() */
native->ipc = NULL;
native->source = NULL;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.type = lrmd_event_disconnect;
event.remote_nodename = native->remote_nodename;
native->callback(&event);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tls_connection_destroy(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
crm_info("TLS connection destroyed");
if (native->remote->tls_session) {
gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
}
if (native->psk_cred_c) {
gnutls_psk_free_client_credentials(native->psk_cred_c);
}
if (native->sock) {
close(native->sock);
}
if (native->process_notify) {
mainloop_destroy_trigger(native->process_notify);
native->process_notify = NULL;
}
if (native->pending_notify) {
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
free(native->remote->buffer);
native->remote->buffer = NULL;
native->source = 0;
native->sock = 0;
native->psk_cred_c = NULL;
native->remote->tls_session = NULL;
native->sock = 0;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.remote_nodename = native->remote_nodename;
event.type = lrmd_event_disconnect;
native->callback(&event);
}
return;
}
int
lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type)
{
int rc = -1;
crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id);
crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type);
rc = crm_remote_send(session, msg);
if (rc < 0) {
crm_err("Failed to send remote lrmd tls msg, rc = %d", rc);
return rc;
}
return rc;
}
static xmlNode *
lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected)
{
lrmd_private_t *native = lrmd->private;
xmlNode *xml = NULL;
time_t start = time(NULL);
const char *msg_type = NULL;
int reply_id = 0;
int remaining_timeout = 0;
/* A timeout of 0 here makes no sense. We have to wait a period of time
* for the response to come back. If -1 or 0, default to 10 seconds. */
if (total_timeout <= 0) {
total_timeout = 10000;
}
while (!xml) {
xml = crm_remote_parse_buffer(native->remote);
if (!xml) {
/* read some more off the tls buffer if we still have time left. */
if (remaining_timeout) {
remaining_timeout = remaining_timeout - ((time(NULL) - start) * 1000);
} else {
remaining_timeout = total_timeout;
}
if (remaining_timeout <= 0) {
return NULL;
}
crm_remote_recv(native->remote, remaining_timeout, disconnected);
xml = crm_remote_parse_buffer(native->remote);
if (!xml || *disconnected) {
return NULL;
}
}
CRM_ASSERT(xml != NULL);
crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id);
msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
if (!msg_type) {
crm_err("Empty msg type received while waiting for reply");
free_xml(xml);
xml = NULL;
} else if (safe_str_eq(msg_type, "notify")) {
/* got a notify while waiting for reply, trigger the notify to be processed later */
crm_info("queueing notify");
native->pending_notify = g_list_append(native->pending_notify, xml);
if (native->process_notify) {
crm_info("notify trigger set.");
mainloop_set_trigger(native->process_notify);
}
xml = NULL;
} else if (safe_str_neq(msg_type, "reply")) {
/* msg isn't a reply, make some noise */
crm_err("Expected a reply, got %s", msg_type);
free_xml(xml);
xml = NULL;
} else if (reply_id != expected_reply_id) {
- crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
+ if (native->expected_late_replies > 0) {
+ native->expected_late_replies--;
+ } else {
+ crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
+ }
free_xml(xml);
xml = NULL;
}
}
if (native->remote->buffer && native->process_notify) {
mainloop_set_trigger(native->process_notify);
}
return xml;
}
static int
lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg)
{
int rc = 0;
lrmd_private_t *native = lrmd->private;
global_remote_msg_id++;
if (global_remote_msg_id <= 0) {
global_remote_msg_id = 1;
}
rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request");
if (rc <= 0) {
crm_err("Remote lrmd send failed, disconnecting");
lrmd_tls_disconnect(lrmd);
return -ENOTCONN;
}
return pcmk_ok;
}
static int
lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
{
int rc = 0;
int disconnected = 0;
xmlNode *xml = NULL;
if (lrmd_tls_connected(lrmd) == FALSE) {
return -1;
}
rc = lrmd_tls_send(lrmd, msg);
if (rc < 0) {
return rc;
}
xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected);
if (disconnected) {
crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ",
global_remote_msg_id);
lrmd_tls_disconnect(lrmd);
rc = -ENOTCONN;
} else if (!xml) {
crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ",
global_remote_msg_id, timeout);
rc = -ECOMM;
}
if (reply) {
*reply = xml;
} else {
free_xml(xml);
}
return rc;
}
#endif
static int
lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
{
int rc = -1;
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static int
lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg)
{
int rc = -1;
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_none, 0, NULL);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_send(lrmd, msg);
+ if (rc == pcmk_ok) {
+ /* we don't want to wait around for the reply, but
+ * since the request/reply protocol needs to behave the same
+ * as libqb, a reply will eventually come later anyway. */
+ native->expected_late_replies++;
+ }
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static int
lrmd_api_is_connected(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
return crm_ipc_connected(native->ipc);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
return lrmd_tls_connected(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return 0;
}
static int
lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout, /* ms. defaults to 1000 if set to 0 */
enum lrmd_call_options options, gboolean expect_reply)
{ /* TODO we need to reduce usage of this boolean */
int rc = pcmk_ok;
int reply_id = -1;
lrmd_private_t *native = lrmd->private;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
if (!lrmd_api_is_connected(lrmd)) {
return -ENOTCONN;
}
if (op == NULL) {
crm_err("No operation specified");
return -EINVAL;
}
CRM_CHECK(native->token != NULL,;
);
crm_trace("sending %s op to lrmd", op);
op_msg = lrmd_create_op(native->token, op, data, options);
if (op_msg == NULL) {
return -EINVAL;
}
crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout);
if (expect_reply) {
rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply);
} else {
rc = lrmd_send_xml_no_reply(lrmd, op_msg);
goto done;
}
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
}
rc = pcmk_ok;
crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id);
crm_trace("%s op reply received", op);
if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) {
rc = -ENOMSG;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (output_data) {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
done:
if (lrmd_api_is_connected(lrmd) == FALSE) {
crm_err("LRMD disconnected");
}
free_xml(op_msg);
free_xml(op_reply);
return rc;
}
static int
lrmd_api_poke_connection(lrmd_t * lrmd)
{
int rc;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, FALSE);
free_xml(data);
return rc;
}
static int
lrmd_handshake(lrmd_t * lrmd, const char *name)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->private;
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, "lrmd_command");
crm_xml_add(hello, F_TYPE, T_LRMD);
crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(hello, F_LRMD_CLIENTNAME, name);
crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
/* advertise that we are a proxy provider */
if (native->proxy_callback) {
crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true");
}
rc = lrmd_send_xml(lrmd, hello, -1, &reply);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc);
rc = -ECOMM;
} else if (reply == NULL) {
crm_err("Did not receive registration reply");
rc = -EPROTO;
} else {
const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION);
const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID);
crm_element_value_int(reply, F_LRMD_RC, &rc);
if (rc == -EPROTO) {
crm_err("LRMD protocol mismatch client version %s, server version %s",
LRMD_PROTOCOL_VERSION, crm_element_value(reply, F_LRMD_PROTOCOL_VERSION));
crm_log_xml_err(reply, "Protocol Error");
} else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
crm_err("Invalid registration message: %s", msg_type);
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else if (tmp_ticket == NULL) {
crm_err("No registration token provided");
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else {
crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
rc = pcmk_ok;
}
}
free_xml(reply);
free_xml(hello);
if (rc != pcmk_ok) {
lrmd_api_disconnect(lrmd);
}
return rc;
}
static int
lrmd_ipc_connect(lrmd_t * lrmd, int *fd)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->private;
static struct ipc_client_callbacks lrmd_callbacks = {
.dispatch = lrmd_ipc_dispatch,
.destroy = lrmd_ipc_connection_destroy
};
crm_info("Connecting to lrmd");
if (fd) {
/* No mainloop */
native->ipc = crm_ipc_new("lrmd", 0);
if (native->ipc && crm_ipc_connect(native->ipc)) {
*fd = crm_ipc_get_fd(native->ipc);
} else if (native->ipc) {
rc = -ENOTCONN;
}
} else {
native->source = mainloop_add_ipc_client("lrmd", G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
crm_debug("Could not connect to the LRMD API");
rc = -ENOTCONN;
}
return rc;
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static int
set_key(gnutls_datum_t * key, const char *location)
{
FILE *stream;
int read_len = 256;
int cur_len = 0;
int buf_len = read_len;
static char *key_cache = NULL;
static size_t key_cache_len = 0;
static time_t key_cache_updated;
if (location == NULL) {
return -1;
}
if (key_cache) {
time_t now = time(NULL);
if ((now - key_cache_updated) < 60) {
key->data = gnutls_malloc(key_cache_len + 1);
key->size = key_cache_len;
memcpy(key->data, key_cache, key_cache_len);
crm_debug("using cached LRMD key");
return 0;
} else {
key_cache_len = 0;
key_cache_updated = 0;
free(key_cache);
key_cache = NULL;
crm_debug("clearing lrmd key cache");
}
}
stream = fopen(location, "r");
if (!stream) {
return -1;
}
key->data = gnutls_malloc(read_len);
while (!feof(stream)) {
int next;
if (cur_len == buf_len) {
buf_len = cur_len + read_len;
key->data = gnutls_realloc(key->data, buf_len);
}
next = fgetc(stream);
if (next == EOF && feof(stream)) {
break;
}
key->data[cur_len] = next;
cur_len++;
}
fclose(stream);
key->size = cur_len;
if (!cur_len) {
gnutls_free(key->data);
key->data = 0;
return -1;
}
if (!key_cache) {
key_cache = calloc(1, key->size + 1);
memcpy(key_cache, key->data, key->size);
key_cache_len = key->size;
key_cache_updated = time(NULL);
}
return 0;
}
int
lrmd_tls_set_key(gnutls_datum_t * key)
{
int rc = 0;
const char *specific_location = getenv("PCMK_authkey_location");
if (set_key(key, specific_location) == 0) {
crm_debug("Using custom authkey location %s", specific_location);
return 0;
}
if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) {
rc = set_key(key, ALT_REMOTE_KEY_LOCATION);
}
if (rc) {
crm_err("No lrmd remote key found");
return -1;
}
return rc;
}
static void
lrmd_gnutls_global_init(void)
{
static int gnutls_init = 0;
if (!gnutls_init) {
gnutls_global_init();
}
gnutls_init = 1;
}
#endif
static void
report_async_connection_result(lrmd_t * lrmd, int rc)
{
lrmd_private_t *native = lrmd->private;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.type = lrmd_event_connect;
event.remote_nodename = native->remote_nodename;
event.connection_rc = rc;
native->callback(&event);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tcp_connect_cb(void *userdata, int sock)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->private;
char name[256] = { 0, };
static struct mainloop_fd_callbacks lrmd_tls_callbacks = {
.dispatch = lrmd_tls_dispatch,
.destroy = lrmd_tls_connection_destroy,
};
int rc = sock;
gnutls_datum_t psk_key = { NULL, 0 };
if (rc < 0) {
lrmd_tls_connection_destroy(lrmd);
crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port);
report_async_connection_result(lrmd, rc);
return;
}
/* TODO continue with tls stuff now that tcp connect passed. make this async as well soon
* to avoid all blocking code in the client. */
native->sock = sock;
if (lrmd_tls_set_key(&psk_key) != 0) {
lrmd_tls_connection_destroy(lrmd);
return;
}
gnutls_psk_allocate_client_credentials(&native->psk_cred_c);
gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW);
gnutls_free(psk_key.data);
native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c);
if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) {
crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server,
native->port);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = NULL;
lrmd_tls_connection_destroy(lrmd);
report_async_connection_result(lrmd, -1);
return;
}
crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server,
native->port);
snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port);
native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd);
native->source =
mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks);
rc = lrmd_handshake(lrmd, name);
report_async_connection_result(lrmd, rc);
return;
}
static int
lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ )
{
int rc = 0;
lrmd_private_t *native = lrmd->private;
lrmd_gnutls_global_init();
rc = crm_remote_tcp_connect_async(native->server, native->port, timeout, lrmd,
lrmd_tcp_connect_cb);
return rc;
}
static int
lrmd_tls_connect(lrmd_t * lrmd, int *fd)
{
static struct mainloop_fd_callbacks lrmd_tls_callbacks = {
.dispatch = lrmd_tls_dispatch,
.destroy = lrmd_tls_connection_destroy,
};
lrmd_private_t *native = lrmd->private;
int sock;
gnutls_datum_t psk_key = { NULL, 0 };
lrmd_gnutls_global_init();
sock = crm_remote_tcp_connect(native->server, native->port);
if (sock < 0) {
crm_warn("Could not establish remote lrmd connection to %s", native->server);
lrmd_tls_connection_destroy(lrmd);
return -ENOTCONN;
}
native->sock = sock;
if (lrmd_tls_set_key(&psk_key) != 0) {
lrmd_tls_connection_destroy(lrmd);
return -1;
}
gnutls_psk_allocate_client_credentials(&native->psk_cred_c);
gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW);
gnutls_free(psk_key.data);
native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c);
if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) {
crm_err("Session creation for %s:%d failed", native->server, native->port);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = NULL;
lrmd_tls_connection_destroy(lrmd);
return -1;
}
crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server,
native->port);
if (fd) {
*fd = sock;
} else {
char name[256] = { 0, };
snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port);
native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd);
native->source =
mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks);
}
return pcmk_ok;
}
#endif
static int
lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd)
{
int rc = -ENOTCONN;
lrmd_private_t *native = lrmd->private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = lrmd_ipc_connect(lrmd, fd);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_connect(lrmd, fd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
if (rc == pcmk_ok) {
rc = lrmd_handshake(lrmd, name);
}
return rc;
}
static int
lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout)
{
int rc = 0;
lrmd_private_t *native = lrmd->private;
if (!native->callback) {
crm_err("Async connect not possible, no lrmd client callback set.");
return -1;
}
switch (native->type) {
case CRM_CLIENT_IPC:
/* fake async connection with ipc. it should be fast
* enough that we gain very little from async */
rc = lrmd_api_connect(lrmd, name, NULL);
if (!rc) {
report_async_connection_result(lrmd, rc);
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_connect_async(lrmd, timeout);
if (rc) {
/* connection failed, report rc now */
report_async_connection_result(lrmd, rc);
}
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static void
lrmd_ipc_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tls_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
if (native->remote->tls_session) {
gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = 0;
}
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
} else if (native->sock) {
close(native->sock);
}
if (native->pending_notify) {
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
}
#endif
static int
lrmd_api_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
crm_info("Disconnecting from lrmd service");
switch (native->type) {
case CRM_CLIENT_IPC:
lrmd_ipc_disconnect(lrmd);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
lrmd_tls_disconnect(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
free(native->token);
native->token = NULL;
return 0;
}
static int
lrmd_api_register_rsc(lrmd_t * lrmd,
const char *rsc_id,
const char *class,
const char *provider, const char *type, enum lrmd_call_options options)
{
int rc = pcmk_ok;
xmlNode *data = NULL;
if (!class || !type || !rsc_id) {
return -EINVAL;
}
if (safe_str_eq(class, "ocf") && !provider) {
return -EINVAL;
}
data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add(data, F_LRMD_CLASS, class);
crm_xml_add(data, F_LRMD_PROVIDER, provider);
crm_xml_add(data, F_LRMD_TYPE, type);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE);
free_xml(data);
return rc;
}
static int
lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE);
free_xml(data);
return rc;
}
lrmd_rsc_info_t *
lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info)
{
lrmd_rsc_info_t *copy = NULL;
copy = calloc(1, sizeof(lrmd_rsc_info_t));
copy->id = strdup(rsc_info->id);
copy->type = strdup(rsc_info->type);
copy->class = strdup(rsc_info->class);
if (rsc_info->provider) {
copy->provider = strdup(rsc_info->provider);
}
return copy;
}
void
lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info)
{
if (!rsc_info) {
return;
}
free(rsc_info->id);
free(rsc_info->type);
free(rsc_info->class);
free(rsc_info->provider);
free(rsc_info);
}
static lrmd_rsc_info_t *
lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
{
lrmd_rsc_info_t *rsc_info = NULL;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
xmlNode *output = NULL;
const char *class = NULL;
const char *provider = NULL;
const char *type = NULL;
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE);
free_xml(data);
if (!output) {
return NULL;
}
class = crm_element_value(output, F_LRMD_CLASS);
provider = crm_element_value(output, F_LRMD_PROVIDER);
type = crm_element_value(output, F_LRMD_TYPE);
if (!class || !type) {
free_xml(output);
return NULL;
} else if (safe_str_eq(class, "ocf") && !provider) {
free_xml(output);
return NULL;
}
rsc_info = calloc(1, sizeof(lrmd_rsc_info_t));
rsc_info->id = strdup(rsc_id);
rsc_info->class = strdup(class);
if (provider) {
rsc_info->provider = strdup(provider);
}
rsc_info->type = strdup(type);
free_xml(output);
return rsc_info;
}
static void
lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback)
{
lrmd_private_t *native = lrmd->private;
native->callback = callback;
}
void
lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg))
{
lrmd_private_t *native = lrmd->private;
native->proxy_callback = callback;
native->proxy_callback_userdata = userdata;
}
void
lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg)
{
lrmd_private_t *native = lrmd->private;
if (native->proxy_callback) {
crm_log_xml_trace(msg, "PROXY_INBOUND");
native->proxy_callback(lrmd, native->proxy_callback_userdata, msg);
}
}
int
lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg)
{
if (lrmd == NULL) {
return -ENOTCONN;
}
crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD);
crm_log_xml_trace(msg, "PROXY_OUTBOUND");
return lrmd_send_xml_no_reply(lrmd, msg);
}
static int
stonith_get_metadata(const char *provider, const char *type, char **output)
{
int rc = pcmk_ok;
stonith_t *stonith_api = stonith_api_new();
if(stonith_api) {
stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0);
stonith_api->cmds->free(stonith_api);
}
if (*output == NULL) {
rc = -EIO;
}
return rc;
}
#define lsb_metadata_template \
"<?xml version='1.0'?>\n" \
"<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n" \
"<resource-agent name='%s' version='0.1'>\n" \
" <version>1.0</version>\n" \
" <longdesc lang='en'>\n" \
" %s\n" \
" </longdesc>\n" \
" <shortdesc lang='en'>%s</shortdesc>\n" \
" <parameters>\n" \
" </parameters>\n" \
" <actions>\n" \
" <action name='meta-data' timeout='5' />\n" \
" <action name='start' timeout='15' />\n" \
" <action name='stop' timeout='15' />\n" \
" <action name='status' timeout='15' />\n" \
" <action name='restart' timeout='15' />\n" \
" <action name='force-reload' timeout='15' />\n" \
" <action name='monitor' timeout='15' interval='15' />\n" \
" </actions>\n" \
" <special tag='LSB'>\n" \
" <Provides>%s</Provides>\n" \
" <Required-Start>%s</Required-Start>\n" \
" <Required-Stop>%s</Required-Stop>\n" \
" <Should-Start>%s</Should-Start>\n" \
" <Should-Stop>%s</Should-Stop>\n" \
" <Default-Start>%s</Default-Start>\n" \
" <Default-Stop>%s</Default-Stop>\n" \
" </special>\n" \
"</resource-agent>\n"
#define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO"
#define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO"
#define PROVIDES "# Provides:"
#define REQ_START "# Required-Start:"
#define REQ_STOP "# Required-Stop:"
#define SHLD_START "# Should-Start:"
#define SHLD_STOP "# Should-Stop:"
#define DFLT_START "# Default-Start:"
#define DFLT_STOP "# Default-Stop:"
#define SHORT_DSCR "# Short-Description:"
#define DESCRIPTION "# Description:"
#define lsb_meta_helper_free_value(m) \
if ((m) != NULL) { \
xmlFree(m); \
(m) = NULL; \
}
#define lsb_meta_helper_get_value(buffer, ptr, keyword) \
if (!ptr && !strncasecmp(buffer, keyword, strlen(keyword))) { \
(ptr) = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer+strlen(keyword)); \
continue; \
}
static int
lsb_get_metadata(const char *type, char **output)
{
char ra_pathname[PATH_MAX] = { 0, };
FILE *fp;
GString *meta_data = NULL;
char buffer[1024];
char *provides = NULL;
char *req_start = NULL;
char *req_stop = NULL;
char *shld_start = NULL;
char *shld_stop = NULL;
char *dflt_start = NULL;
char *dflt_stop = NULL;
char *s_dscrpt = NULL;
char *xml_l_dscrpt = NULL;
GString *l_dscrpt = NULL;
if(type[0] == '/') {
snprintf(ra_pathname, sizeof(ra_pathname), "%s", type);
} else {
snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type);
}
crm_trace("Looking into %s", ra_pathname);
if (!(fp = fopen(ra_pathname, "r"))) {
return -errno;
}
/* Enter into the lsb-compliant comment block */
while (fgets(buffer, sizeof(buffer), fp)) {
/* Now suppose each of the following eight arguments contain only one line */
lsb_meta_helper_get_value(buffer, provides, PROVIDES)
lsb_meta_helper_get_value(buffer, req_start, REQ_START)
lsb_meta_helper_get_value(buffer, req_stop, REQ_STOP)
lsb_meta_helper_get_value(buffer, shld_start, SHLD_START)
lsb_meta_helper_get_value(buffer, shld_stop, SHLD_STOP)
lsb_meta_helper_get_value(buffer, dflt_start, DFLT_START)
lsb_meta_helper_get_value(buffer, dflt_stop, DFLT_STOP)
lsb_meta_helper_get_value(buffer, s_dscrpt, SHORT_DSCR)
/* Long description may cross multiple lines */
if ((l_dscrpt == NULL) && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) {
l_dscrpt = g_string_new(buffer + strlen(DESCRIPTION));
/* Between # and keyword, more than one space, or a tab character,
* indicates the continuation line. Extracted from LSB init script standard */
while (fgets(buffer, sizeof(buffer), fp)) {
if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) {
buffer[0] = ' ';
l_dscrpt = g_string_append(l_dscrpt, buffer);
} else {
fputs(buffer, fp);
break; /* Long description ends */
}
}
continue;
}
if (l_dscrpt) {
xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(l_dscrpt->str));
}
if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) {
/* Get to the out border of LSB comment block */
break;
}
if (buffer[0] != '#') {
break; /* Out of comment block in the beginning */
}
}
fclose(fp);
meta_data = g_string_new("");
g_string_sprintf(meta_data, lsb_metadata_template, type,
(xml_l_dscrpt == NULL) ? type : xml_l_dscrpt,
(s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides,
(req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop,
(shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop,
(dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop);
lsb_meta_helper_free_value(xml_l_dscrpt);
lsb_meta_helper_free_value(s_dscrpt);
lsb_meta_helper_free_value(provides);
lsb_meta_helper_free_value(req_start);
lsb_meta_helper_free_value(req_stop);
lsb_meta_helper_free_value(shld_start);
lsb_meta_helper_free_value(shld_stop);
lsb_meta_helper_free_value(dflt_start);
lsb_meta_helper_free_value(dflt_stop);
if (l_dscrpt) {
g_string_free(l_dscrpt, TRUE);
}
*output = strdup(meta_data->str);
g_string_free(meta_data, TRUE);
crm_trace("Created fake metadata: %d", strlen(*output));
return pcmk_ok;
}
#if SUPPORT_NAGIOS
static int
nagios_get_metadata(const char *type, char **output)
{
int rc = pcmk_ok;
FILE *file_strm = NULL;
int start = 0, length = 0, read_len = 0;
char *metadata_file = NULL;
int len = 36;
len += strlen(NAGIOS_METADATA_DIR);
len += strlen(type);
metadata_file = calloc(1, len);
CRM_CHECK(metadata_file != NULL, return -ENOMEM);
sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type);
file_strm = fopen(metadata_file, "r");
if (file_strm == NULL) {
crm_err("Metadata file %s does not exist", metadata_file);
free(metadata_file);
return -EIO;
}
/* see how big the file is */
start = ftell(file_strm);
fseek(file_strm, 0L, SEEK_END);
length = ftell(file_strm);
fseek(file_strm, 0L, start);
CRM_ASSERT(length >= 0);
CRM_ASSERT(start == ftell(file_strm));
if (length <= 0) {
crm_info("%s was not valid", metadata_file);
free(*output);
*output = NULL;
rc = -EIO;
} else {
crm_trace("Reading %d bytes from file", length);
*output = calloc(1, (length + 1));
read_len = fread(*output, 1, length, file_strm);
if (read_len != length) {
crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len);
free(*output);
*output = NULL;
rc = -EIO;
}
}
fclose(file_strm);
free(metadata_file);
return rc;
}
#endif
static int
generic_get_metadata(const char *standard, const char *provider, const char *type, char **output)
{
svc_action_t *action = resources_action_create(type,
standard,
provider,
type,
"meta-data",
0,
5000,
NULL);
if (!(services_action_sync(action))) {
crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type);
services_action_free(action);
return -EIO;
}
if (!action->stdout_data) {
crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type);
services_action_free(action);
return -EIO;
}
*output = strdup(action->stdout_data);
services_action_free(action);
return pcmk_ok;
}
static int
lrmd_api_get_metadata(lrmd_t * lrmd,
const char *class,
const char *provider,
const char *type, char **output, enum lrmd_call_options options)
{
if (!class || !type) {
return -EINVAL;
}
if (safe_str_eq(class, "stonith")) {
return stonith_get_metadata(provider, type, output);
} else if (safe_str_eq(class, "lsb")) {
return lsb_get_metadata(type, output);
#if SUPPORT_NAGIOS
} else if (safe_str_eq(class, "nagios")) {
return nagios_get_metadata(type, output);
#endif
}
return generic_get_metadata(class, provider, type, output);
}
static int
lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
int timeout, /* ms */
int start_delay, /* ms */
enum lrmd_call_options options, lrmd_key_value_t * params)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
lrmd_key_value_t *tmp = NULL;
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add(data, F_LRMD_RSC_ACTION, action);
crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata);
crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval);
crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout);
crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay);
for (tmp = params; tmp; tmp = tmp->next) {
hash2field((gpointer) tmp->key, (gpointer) tmp->value, args);
}
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE);
free_xml(data);
lrmd_key_value_freeall(params);
return rc;
}
static int
lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ACTION, action);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE);
free_xml(data);
return rc;
}
static int
list_stonith_agents(lrmd_list_t ** resources)
{
int rc = 0;
stonith_t *stonith_api = stonith_api_new();
stonith_key_value_t *stonith_resources = NULL;
stonith_key_value_t *dIter = NULL;
if(stonith_api) {
stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0);
stonith_api->cmds->free(stonith_api);
}
for (dIter = stonith_resources; dIter; dIter = dIter->next) {
rc++;
if (resources) {
*resources = lrmd_list_add(*resources, dIter->value);
}
}
stonith_key_value_freeall(stonith_resources, 1, 0);
return rc;
}
static int
lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class,
const char *provider)
{
int rc = 0;
if (safe_str_eq(class, "stonith")) {
rc += list_stonith_agents(resources);
} else {
GListPtr gIter = NULL;
GList *agents = resources_list_agents(class, provider);
for (gIter = agents; gIter != NULL; gIter = gIter->next) {
*resources = lrmd_list_add(*resources, (const char *)gIter->data);
rc++;
}
g_list_free_full(agents, free);
if (!class) {
rc += list_stonith_agents(resources);
}
}
if (rc == 0) {
crm_notice("No agents found for class %s", class);
rc = -EPROTONOSUPPORT;
}
return rc;
}
static int
does_provider_have_agent(const char *agent, const char *provider, const char *class)
{
int found = 0;
GList *agents = NULL;
GListPtr gIter2 = NULL;
agents = resources_list_agents(class, provider);
for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) {
if (safe_str_eq(agent, gIter2->data)) {
found = 1;
}
}
g_list_free_full(agents, free);
return found;
}
static int
lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers)
{
int rc = pcmk_ok;
char *provider = NULL;
GList *ocf_providers = NULL;
GListPtr gIter = NULL;
ocf_providers = resources_list_providers("ocf");
for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) {
provider = gIter->data;
if (!agent || does_provider_have_agent(agent, provider, "ocf")) {
*providers = lrmd_list_add(*providers, (const char *)gIter->data);
rc++;
}
}
g_list_free_full(ocf_providers, free);
return rc;
}
static int
lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported)
{
int rc = 0;
GList *standards = NULL;
GListPtr gIter = NULL;
standards = resources_list_standards();
for (gIter = standards; gIter != NULL; gIter = gIter->next) {
*supported = lrmd_list_add(*supported, (const char *)gIter->data);
rc++;
}
if (list_stonith_agents(NULL) > 0) {
*supported = lrmd_list_add(*supported, "stonith");
rc++;
}
g_list_free_full(standards, free);
return rc;
}
lrmd_t *
lrmd_api_new(void)
{
lrmd_t *new_lrmd = NULL;
lrmd_private_t *pvt = NULL;
new_lrmd = calloc(1, sizeof(lrmd_t));
pvt = calloc(1, sizeof(lrmd_private_t));
pvt->remote = calloc(1, sizeof(crm_remote_t));
new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t));
pvt->type = CRM_CLIENT_IPC;
new_lrmd->private = pvt;
new_lrmd->cmds->connect = lrmd_api_connect;
new_lrmd->cmds->connect_async = lrmd_api_connect_async;
new_lrmd->cmds->is_connected = lrmd_api_is_connected;
new_lrmd->cmds->poke_connection = lrmd_api_poke_connection;
new_lrmd->cmds->disconnect = lrmd_api_disconnect;
new_lrmd->cmds->register_rsc = lrmd_api_register_rsc;
new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc;
new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info;
new_lrmd->cmds->set_callback = lrmd_api_set_callback;
new_lrmd->cmds->get_metadata = lrmd_api_get_metadata;
new_lrmd->cmds->exec = lrmd_api_exec;
new_lrmd->cmds->cancel = lrmd_api_cancel;
new_lrmd->cmds->list_agents = lrmd_api_list_agents;
new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers;
new_lrmd->cmds->list_standards = lrmd_api_list_standards;
return new_lrmd;
}
lrmd_t *
lrmd_remote_api_new(const char *nodename, const char *server, int port)
{
#ifdef HAVE_GNUTLS_GNUTLS_H
lrmd_t *new_lrmd = lrmd_api_new();
lrmd_private_t *native = new_lrmd->private;
if (!nodename && !server) {
lrmd_api_delete(new_lrmd);
return NULL;
}
native->type = CRM_CLIENT_TLS;
native->remote_nodename = nodename ? strdup(nodename) : strdup(server);
native->server = server ? strdup(server) : strdup(nodename);
native->port = port;
if (native->port == 0) {
const char *remote_port_str = getenv("PCMK_remote_port");
native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT;
}
return new_lrmd;
#else
crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created");
return NULL;
#endif
}
void
lrmd_api_delete(lrmd_t * lrmd)
{
if (!lrmd) {
return;
}
lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */
free(lrmd->cmds);
if (lrmd->private) {
lrmd_private_t *native = lrmd->private;
#ifdef HAVE_GNUTLS_GNUTLS_H
free(native->server);
#endif
free(native->remote_nodename);
free(native->remote);
}
free(lrmd->private);
free(lrmd);
}
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index 14079daaa6..2802caa3e2 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -1,1321 +1,1322 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/msg_xml.h>
#include <lrmd_private.h>
#ifdef HAVE_SYS_TIMEB_H
# include <sys/timeb.h>
#endif
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
int interval;
int start_delay;
int timeout_orig;
int call_id;
int exec_rc;
int lrmd_op_status;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *output;
char *userdata_str;
#ifdef HAVE_SYS_TIMEB_H
/* Timestamp of when op first ran */
struct timeb t_first_run;
/* Timestamp of when op ran */
struct timeb t_run;
/* Timestamp of when op was queued */
struct timeb t_queue;
/* Timestamp of last rc change */
struct timeb t_rcchange;
#endif
int first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
static void
log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
{
char pid_str[32] = { 0, };
int log_level = LOG_INFO;
if (cmd->last_pid) {
snprintf(pid_str, 32, "%d", cmd->last_pid);
}
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
#ifdef HAVE_SYS_TIMEB_H
do_crm_log(log_level,
"finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms",
cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str,
cmd->exec_rc, exec_time, queue_time);
#else
do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d",
cmd->rsc_id,
cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc);
#endif
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode * msg, crm_client_t * client)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
return cmd;
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
send_reply(crm_client_t * client, int rc, uint32_t id, int call_id)
{
int send_rc = 0;
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
send_rc = lrmd_server_send_reply(client, id, reply);
free_xml(reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
if (client == NULL) {
crm_err("Asked to send event to NULL client");
return;
} else if (client->name == NULL) {
crm_trace("Asked to send event to client with no name");
return;
}
if (lrmd_server_send_notify(client, update_msg) <= 0) {
crm_warn("Notification of client %s/%s failed", client->name, client->id);
}
}
#ifdef HAVE_SYS_TIMEB_H
static int
time_diff_ms(struct timeb *now, struct timeb *old)
{
int sec = difftime(now->time, old->time);
int ms = now->millitm - old->millitm;
if (old->time == 0) {
return 0;
}
return (sec * 1000) + ms;
}
#endif
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
int exec_time = 0;
int queue_time = 0;
xmlNode *notify = NULL;
#ifdef HAVE_SYS_TIMEB_H
struct timeb now = { 0, };
ftime(&now);
exec_time = time_diff_ms(&now, &cmd->t_run);
queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue);
#endif
log_finished(cmd, exec_time, queue_time);
/* if the first notify result for a cmd has already been sent earlier, and the
* the option to only send notifies on result changes is set. Check to see
* if the last result is the same as the new one. If so, suppress this update */
if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
if (cmd->last_notify_rc == cmd->exec_rc &&
cmd->last_notify_op_status == cmd->lrmd_op_status) {
/* only send changes */
return;
}
}
cmd->first_notify_sent = 1;
cmd->last_notify_rc = cmd->exec_rc;
cmd->last_notify_op_status = cmd->lrmd_op_status;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
#ifdef HAVE_SYS_TIMEB_H
crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time);
crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time);
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2field((gpointer) key, (gpointer) value, args);
}
}
if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
crm_client_t *client = crm_client_get_by_id(cmd->client_id);
if (client) {
send_client_notify(client->id, client, notify);
}
} else {
g_hash_table_foreach(client_connections, send_client_notify, notify);
}
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
g_hash_table_foreach(client_connections, send_client_notify, notify);
free_xml(notify);
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
send_cmd_complete_notify(cmd);
/* crmd expects lrmd to automatically cancel recurring ops after rsc stops */
if (rsc && safe_str_eq(cmd->action, "stop")) {
cancel_all_recurring(rsc, NULL);
}
if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
free(cmd->output);
cmd->output = NULL;
}
}
static int
lsb2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
/* status has different return codes that everything else. */
if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) {
if (rc > PCMK_LSB_NOT_RUNNING) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
switch (rc) {
case PCMK_LSB_STATUS_OK:
return PCMK_OCF_OK;
case PCMK_LSB_STATUS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case PCMK_LSB_STATUS_VAR_PID:
case PCMK_LSB_STATUS_VAR_LOCK:
case PCMK_LSB_STATUS_NOT_RUNNING:
return PCMK_OCF_NOT_RUNNING;
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
if (rc == -ENODEV) {
if (safe_str_eq(action, "stop")) {
rc = PCMK_OCF_OK;
} else if (safe_str_eq(action, "start")) {
rc = PCMK_OCF_NOT_INSTALLED;
} else {
rc = PCMK_OCF_NOT_RUNNING;
}
} else if (rc != 0) {
rc = PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
#if SUPPORT_NAGIOS
static int
nagios2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
switch (rc) {
case NAGIOS_STATE_OK:
return PCMK_OCF_OK;
case NAGIOS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case NAGIOS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case NAGIOS_STATE_WARNING:
case NAGIOS_STATE_CRITICAL:
case NAGIOS_STATE_UNKNOWN:
case NAGIOS_STATE_DEPENDENT:
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
#endif
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, "ocf")) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, "stonith")) {
return stonith2uniform_rc(action, rc);
} else if (safe_str_eq(standard, "systemd")) {
return rc;
} else if (safe_str_eq(standard, "upstart")) {
return rc;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(standard, "nagios")) {
return nagios2uniform_rc(action, rc);
#endif
} else {
return lsb2uniform_rc(action, rc);
}
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (rsc->call_opts & lrmd_opt_drop_recurring) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
if (!cmd) {
crm_err("LRMD action (%s) completed does not match any known operations.", action->id);
return;
}
#ifdef HAVE_SYS_TIMEB_H
if (cmd->exec_rc != action->rc) {
ftime(&cmd->t_rcchange);
}
#endif
cmd->last_pid = action->pid;
cmd->exec_rc = get_uniform_rc(action->standard, cmd->action, action->rc);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (action->stdout_data) {
cmd->output = strdup(action->stdout_data);
}
#if SUPPORT_NAGIOS
if (rsc && safe_str_eq(rsc->class, "nagios")) {
if (safe_str_eq(cmd->action, "monitor") &&
cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) {
/* Successfully executed --version for the nagios plugin */
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) {
int time_sum = 0;
int timeout_left = 0;
int delay = cmd->timeout_orig / 10;
# ifdef HAVE_SYS_TIMEB_H
struct timeb now = { 0, };
ftime(&now);
time_sum = time_diff_ms(&now, &cmd->t_first_run);
timeout_left = cmd->timeout_orig - time_sum;
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
crm_notice
("%s %s failed (rc=%d): re-scheduling (time_sum=%dms, start_delay=%dms, timeout=%dms)",
cmd->rsc_id, cmd->action, cmd->exec_rc, time_sum, cmd->start_delay,
cmd->timeout);
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
free(cmd->output);
cmd->output = NULL;
rsc->active = NULL;
schedule_lrmd_cmd(rsc, cmd);
return;
}
# endif
}
}
#endif
cmd_finalize(cmd, rsc);
}
static void
stonith_action_complete(lrmd_cmd_t * cmd, int rc)
{
int recurring = cmd->interval;
lrmd_rsc_t *rsc = NULL;
cmd->exec_rc = get_uniform_rc("stonith", cmd->action, rc);
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) {
recurring = 0;
/* do nothing */
} else if (rc) {
/* Attempt to map return codes to op status if possible */
switch (rc) {
case -EPROTONOSUPPORT:
cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED;
break;
case -ETIME:
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
break;
default:
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
} else {
/* command successful */
cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
if (safe_str_eq(cmd->action, "start") && rsc) {
rsc->stonith_started = 1;
}
}
if (recurring && rsc) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
stonith_action_complete(data->userdata, data->rc);
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (safe_str_eq(rsc->class, "stonith")) {
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
}
rsc->pending_ops = rsc->recurring_ops = NULL;
}
}
if (!cmd_list) {
return;
}
crm_err("STONITH connection failed, finalizing %d pending operations.",
g_list_length(cmd_list));
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
stonith_action_complete(cmd_iter->data, -ENOTCONN);
}
g_list_free(cmd_list);
}
static int
lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
int rc = 0;
int do_monitor = 0;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
cmd->exec_rc = get_uniform_rc("stonith", cmd->action, -ENOTCONN);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
cmd_finalize(cmd, rsc);
return -EUNATCH;
}
if (safe_str_eq(cmd->action, "start")) {
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
/* Stonith automatically registers devices from the IPC when changes occur,
* but to avoid a possible race condition between stonith receiving the IPC update
* and the lrmd requesting that resource, the lrmd still registers the device as well.
* Stonith knows how to handle duplicate device registrations correctly. */
rc = stonith_api->cmds->register_device(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
rsc->provider, rsc->type, device_params);
stonith_key_value_freeall(device_params, 1, 1);
if (rc == 0) {
do_monitor = 1;
}
} else if (safe_str_eq(cmd->action, "stop")) {
rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id);
rsc->stonith_started = 0;
} else if (safe_str_eq(cmd->action, "monitor")) {
if (cmd->interval) {
do_monitor = 1;
} else {
rc = rsc->stonith_started ? 0 : -ENODEV;
}
}
if (!do_monitor) {
goto cleanup_stonith_exec;
}
rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000);
rc = stonith_api->cmds->register_callback(stonith_api,
rc,
0,
0,
cmd, "lrmd_stonith_callback", lrmd_stonith_callback);
/* don't cleanup yet, we will find out the result of the monitor later */
if (rc > 0) {
rsc->active = cmd;
return rc;
} else if (rc == 0) {
rc = -1;
}
cleanup_stonith_exec:
stonith_action_complete(cmd, rc);
return rc;
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
(safe_str_eq(rsc->class, "lsb") ||
safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) {
return "status";
}
return action;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, strdup(key), strdup(value));
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
CRM_ASSERT(rsc);
CRM_ASSERT(cmd);
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
#if SUPPORT_NAGIOS
/* Recurring operations are cancelled anyway for a stop operation */
if (safe_str_eq(rsc->class, "nagios") && safe_str_eq(cmd->action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
goto exec_done;
}
#endif
if (cmd->params) {
params_copy = g_hash_table_new_full(crm_str_hash,
g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
if (params_copy != NULL) {
g_hash_table_foreach(cmd->params, dup_attr, params_copy);
}
}
action = resources_action_create(rsc->rsc_id,
rsc->class,
rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval, cmd->timeout, params_copy);
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
/* 'cmd' may not be valid after this point if
* services_action_async() returned TRUE
*
* Upstart and systemd both synchronously determine monitor/status
* results and call action_complete (which may free 'cmd') if necessary.
*/
if (services_action_async(action, action_complete)) {
return TRUE;
}
cmd->exec_rc = action->rc;
if(action->status != PCMK_LRM_OP_DONE) {
cmd->lrmd_op_status = action->status;
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
services_action_free(action);
action = NULL;
exec_done:
cmd_finalize(cmd, rsc);
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t * rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef HAVE_SYS_TIMEB_H
if (cmd->t_first_run.time == 0) {
ftime(&cmd->t_first_run);
}
ftime(&cmd->t_run);
#endif
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (safe_str_eq(rsc->class, "stonith")) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, "stonith");
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id, cmd->action, cmd->interval);
}
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
if (safe_str_neq(protocol_version, LRMD_PROTOCOL_VERSION)) {
crm_xml_add_int(reply, F_LRMD_RC, -EPROTO);
crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
}
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
lrmd_server_send_reply(client, id, reply);
if (crm_is_true(is_ipc_provider)) {
/* this is a remote connection from a cluster nodes crmd */
#ifdef SUPPORT_REMOTE
ipc_proxy_add_provider(client);
#endif
}
free_xml(reply);
return pcmk_ok;
}
static int
process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) {
crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Added '%s' to the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
return rc;
}
static void
process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int send_rc = 0;
int call_id = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (!rsc_id) {
rc = -ENODEV;
goto get_rsc_done;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
goto get_rsc_done;
}
get_rsc_done:
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
send_rc = lrmd_server_send_reply(client, id, reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
free_xml(reply);
}
static int
process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation still in progress: %p", rsc->active);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client);
schedule_lrmd_cmd(rsc, cmd);
return cmd->call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, int interval)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then its either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (safe_str_eq(rsc->class, "stonith")) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith opereations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval == 0) {
continue;
}
if (client_id && safe_str_neq(cmd->client_id, client_id)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
int interval = 0;
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval);
}
void
process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) {
#ifdef SUPPORT_REMOTE
ipc_proxy_forward_client(client, request);
#endif
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
rc = process_lrmd_signon(client, id, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
process_lrmd_get_rsc_info(client, id, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
do_notify = 1;
+ do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown %s from %s", op, client->name);
crm_log_xml_warn(request, "UnknownOp");
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d, exit=%d",
op, client->id, rc, do_reply, do_notify, exit);
if (do_reply) {
send_reply(client, rc, id, call_id);
}
if (do_notify) {
send_generic_notify(rc, request);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:25 PM (12 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1980863
Default Alt Text
(166 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment