Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am
index 54bfaec7ee..6daca62ba6 100644
--- a/daemons/controld/Makefile.am
+++ b/daemons/controld/Makefile.am
@@ -1,45 +1,46 @@
#
# Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
#
# This source code is licensed under the GNU General Public License version 2
# or later (GPLv2+) WITHOUT ANY WARRANTY.
#
include $(top_srcdir)/Makefile.common
halibdir = $(CRM_DAEMON_DIR)
## binary progs
halib_PROGRAMS = pacemaker-controld
## SOURCES
-noinst_HEADERS = crmd_alerts.h crmd_callbacks.h crmd_fsa.h crmd.h \
+noinst_HEADERS = controld_alerts.h \
+ crmd_callbacks.h crmd_fsa.h crmd.h \
crmd_lrm.h crmd_messages.h crmd_utils.h fsa_defines.h \
fsa_matrix.h fsa_proto.h membership.h te_callbacks.h \
tengine.h throttle.h crmd_metadata.h
pacemaker_controld_CFLAGS = $(CFLAGS_HARDENED_EXE)
pacemaker_controld_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
pacemaker_controld_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \
$(top_builddir)/lib/transition/libtransitioner.la \
$(top_builddir)/lib/pengine/libpe_rules.la \
$(top_builddir)/lib/cib/libcib.la \
$(top_builddir)/lib/cluster/libcrmcluster.la \
$(top_builddir)/lib/common/libcrmcommon.la \
$(top_builddir)/lib/services/libcrmservice.la \
$(top_builddir)/lib/lrmd/liblrmd.la \
$(CLUSTERLIBS)
pacemaker_controld_SOURCES = main.c corosync.c crmd_metadata.c \
fsa.c control.c messages.c membership.c callbacks.c attrd.c \
election.c join_client.c join_dc.c throttle.c \
cib.c pengine.c tengine.c lrm.c lrm_state.c remote_ra.c \
utils.c misc.c te_events.c te_actions.c te_utils.c te_callbacks.c
if BUILD_XML_HELP
man7_MANS = pacemaker-controld.7
endif
CLEANFILES = $(man7_MANS)
diff --git a/daemons/controld/control.c b/daemons/controld/control.c
index 2abc421152..4a1d280414 100644
--- a/daemons/controld/control.c
+++ b/daemons/controld/control.c
@@ -1,906 +1,906 @@
/*
* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election.h>
#include <crm/common/ipcs.h>
#include <crmd.h>
#include <crmd_fsa.h>
#include <fsa_proto.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
-#include <crmd_alerts.h>
+#include <controld_alerts.h>
#include <crmd_metadata.h>
#include <tengine.h>
#include <throttle.h>
#include <sys/types.h>
#include <sys/stat.h>
qb_ipcs_service_t *ipcs = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
#endif
void crm_shutdown(int nsig);
gboolean crm_read_options(gpointer user_data);
gboolean fsa_has_quorum = FALSE;
crm_trigger_t *fsa_source = NULL;
crm_trigger_t *config_read = NULL;
bool no_quorum_suicide_escalation = FALSE;
static gboolean
election_timeout_popped(gpointer data)
{
/* Not everyone voted */
crm_info("Election failed: Declaring ourselves the winner");
register_fsa_input(C_TIMER_POPPED, I_ELECTION_DC, NULL);
return FALSE;
}
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = calloc(1, sizeof(crm_cluster_t));
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
set_bit(fsa_input_register, R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
registered = crm_connect_corosync(cluster);
#endif
}
fsa_election = election_init(NULL, cluster->uname, 60000/*60s*/, election_timeout_popped);
fsa_our_uname = cluster->uname;
fsa_our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
if (registered == FALSE) {
set_bit(fsa_input_register, R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __FUNCTION__);
clear_bit(fsa_input_register, R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
set_bit(fsa_input_register, R_SHUTDOWN);
if (stonith_api) {
/* Prevent it from coming up again */
clear_bit(fsa_input_register, R_ST_REQUIRED);
crm_info("Disconnecting STONITH...");
stonith_api->cmds->disconnect(stonith_api);
}
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
set_bit(fsa_input_register, R_SHUTDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
(fsa_our_dc? fsa_our_dc : "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
/* set_bit(fsa_input_register, R_STAYDOWN); */
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
extern crm_ipc_t *attrd_ipc;
extern char *max_generation_from;
extern xmlNode *max_generation_xml;
extern GHashTable *resource_history;
extern GHashTable *voted;
extern char *te_client_id;
crm_exit_t
crmd_fast_exit(crm_exit_t exit_code)
{
if (is_set(fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& is_set(fsa_input_register, R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
return crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GListPtr gIter = NULL;
GMainLoop *mloop = crmd_mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
set_bit(fsa_input_register, R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
if (attrd_ipc) {
crm_trace("Closing connection to pacemaker-attrd");
crm_ipc_close(attrd_ipc);
crm_ipc_destroy(attrd_ipc);
attrd_ipc = NULL;
}
pe_subsystem_free();
if(stonith_api) {
crm_trace("Disconnecting fencing API");
clear_bit(fsa_input_register, R_ST_REQUIRED);
stonith_api->cmds->free(stonith_api); stonith_api = NULL;
}
if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
return crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
fsa_data_t *fsa_data = gIter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
clear_bit(fsa_input_register, R_MEMBERSHIP);
g_list_free(fsa_message_queue); fsa_message_queue = NULL;
metadata_cache_fini();
election_fini(fsa_election);
fsa_election = NULL;
/* Tear down the CIB connection, but don't free it yet -- it could be used
* when we drain the mainloop later.
*/
cib_free_callbacks(fsa_cib_conn);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
verify_stopped(fsa_state, LOG_WARNING);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
lrm_state_destroy_all();
/* This basically will not work, since mainloop has a reference to it */
mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
mainloop_destroy_trigger(config_read); config_read = NULL;
mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL;
mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
crm_client_cleanup();
crm_peer_destroy();
crm_timer_stop(transition_timer);
crm_timer_stop(integration_timer);
crm_timer_stop(finalization_timer);
crm_timer_stop(election_trigger);
election_timeout_stop(fsa_election);
crm_timer_stop(shutdown_escalation_timer);
crm_timer_stop(wait_timer);
crm_timer_stop(recheck_timer);
free(transition_timer); transition_timer = NULL;
free(integration_timer); integration_timer = NULL;
free(finalization_timer); finalization_timer = NULL;
free(election_trigger); election_trigger = NULL;
free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
free(wait_timer); wait_timer = NULL;
free(recheck_timer); recheck_timer = NULL;
free(fsa_our_dc_version); fsa_our_dc_version = NULL;
free(fsa_our_uname); fsa_our_uname = NULL;
free(fsa_our_uuid); fsa_our_uuid = NULL;
free(fsa_our_dc); fsa_our_dc = NULL;
free(fsa_cluster_name); fsa_cluster_name = NULL;
free(te_uuid); te_uuid = NULL;
free(te_client_id); te_client_id = NULL;
free(fsa_pe_ref); fsa_pe_ref = NULL;
free(failed_stop_offset); failed_stop_offset = NULL;
free(failed_start_offset); failed_start_offset = NULL;
free(max_generation_from); max_generation_from = NULL;
free_xml(max_generation_xml); max_generation_xml = NULL;
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
/* Don't re-enter this block */
crmd_mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(fsa_cib_conn);
fsa_cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
int log_level = LOG_INFO;
const char *exit_type = "gracefully";
if (action & A_EXIT_1) {
log_level = LOG_ERR;
exit_type = "forcefully";
exit_code = CRM_EX_ERROR;
}
verify_stopped(cur_state, LOG_ERR);
do_crm_log(log_level, "Performing %s - %s exiting the controller",
fsa_action2string(action), exit_type);
crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int was_error = 0;
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
crm_debug("Creating CIB and executor objects");
fsa_cib_conn = cib_new();
lrm_state_init_local();
/* set up the timers */
transition_timer = calloc(1, sizeof(fsa_timer_t));
integration_timer = calloc(1, sizeof(fsa_timer_t));
finalization_timer = calloc(1, sizeof(fsa_timer_t));
election_trigger = calloc(1, sizeof(fsa_timer_t));
shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
wait_timer = calloc(1, sizeof(fsa_timer_t));
recheck_timer = calloc(1, sizeof(fsa_timer_t));
if (election_trigger != NULL) {
election_trigger->source_id = 0;
election_trigger->period_ms = -1;
election_trigger->fsa_input = I_DC_TIMEOUT;
election_trigger->callback = crm_timer_popped;
election_trigger->repeat = FALSE;
} else {
was_error = TRUE;
}
if (transition_timer != NULL) {
transition_timer->source_id = 0;
transition_timer->period_ms = -1;
transition_timer->fsa_input = I_PE_CALC;
transition_timer->callback = crm_timer_popped;
transition_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (integration_timer != NULL) {
integration_timer->source_id = 0;
integration_timer->period_ms = -1;
integration_timer->fsa_input = I_INTEGRATED;
integration_timer->callback = crm_timer_popped;
integration_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (finalization_timer != NULL) {
finalization_timer->source_id = 0;
finalization_timer->period_ms = -1;
finalization_timer->fsa_input = I_FINALIZED;
finalization_timer->callback = crm_timer_popped;
finalization_timer->repeat = FALSE;
/* for possible enabling... a bug in the join protocol left
* a slave in S_PENDING while we think it's in S_NOT_DC
*
* raising I_FINALIZED put us into a transition loop which is
* never resolved.
* in this loop we continually send probes which the node
* NACK's because it's in S_PENDING
*
* if we have nodes where the cluster layer is active but the
* CRM is not... then this will be handled in the
* integration phase
*/
finalization_timer->fsa_input = I_ELECTION;
} else {
was_error = TRUE;
}
if (shutdown_escalation_timer != NULL) {
shutdown_escalation_timer->source_id = 0;
shutdown_escalation_timer->period_ms = -1;
shutdown_escalation_timer->fsa_input = I_STOP;
shutdown_escalation_timer->callback = crm_timer_popped;
shutdown_escalation_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (wait_timer != NULL) {
wait_timer->source_id = 0;
wait_timer->period_ms = 2000;
wait_timer->fsa_input = I_NULL;
wait_timer->callback = crm_timer_popped;
wait_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (recheck_timer != NULL) {
recheck_timer->source_id = 0;
recheck_timer->period_ms = -1;
recheck_timer->fsa_input = I_PE_CALC;
recheck_timer->callback = crm_timer_popped;
recheck_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (was_error) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static int32_t
crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
crmd_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
static int32_t
crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *client = crm_client_get(c);
xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags);
crm_trace("Invoked: %s", crm_client_name(client));
crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
}
#if ENABLE_ACL
CRM_ASSERT(client->user != NULL);
crm_acl_get_set_user(msg, F_CRM_USER, client->user);
#endif
crm_trace("Processing msg from %s", crm_client_name(client));
crm_log_xml_trace(msg, "controller[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (crmd_authorize_message(msg, client, NULL)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
free_xml(msg);
return 0;
}
static int32_t
crmd_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), crm_client_name(client),
c, client);
free(client->userdata);
crm_client_destroy(client);
trigger_fsa(fsa_source);
}
return 0;
}
static void
crmd_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
crmd_ipc_closed(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = crmd_ipc_accept,
.connection_created = crmd_ipc_created,
.msg_process = crmd_ipc_dispatch,
.connection_closed = crmd_ipc_closed,
.connection_destroyed = crmd_ipc_destroy
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = crmd_ipc_server_init(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
if (stonith_reconnect == NULL) {
int dummy;
stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy);
}
set_bit(fsa_input_register, R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
crm_notice("The local CRM is operational");
clear_bit(fsa_input_register, R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
set_bit(fsa_input_register, R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* *INDENT-OFF* */
static pe_cluster_option crmd_opts[] = {
/* name, old-name, validate, values, default, short description, long description */
{ "dc-version", NULL, "string", NULL, "none", NULL,
"Version of Pacemaker on the cluster's DC.",
"Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes."
},
{ "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
"The messaging stack on which Pacemaker is currently running.",
"Used for informational and diagnostic purposes." },
{ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "20s", &check_time,
"How long to wait for a response from other nodes during startup.",
"The \"correct\" value will depend on the speed/load of your network and the type of switches used."
},
{ XML_CONFIG_ATTR_RECHECK, NULL, "time",
"Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)",
"15min", &check_timer,
"Polling interval for time based changes to options, resource parameters and constraints.",
"The Cluster is primarily event driven, however the configuration can have elements that change based on time."
" To ensure these changes take effect, we can optionally poll the cluster's status for changes."
},
{ "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization,
"The maximum amount of system resources that should be used by nodes in the cluster",
"The cluster will slow down its recovery process when the amount of system resources used"
" (currently CPU) approaches this limit",
},
{ "node-action-limit", NULL, "integer", NULL, "0", &check_number,
"The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"},
{ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer,
"*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer,
"*** Advanced Use Only ***\n"
"Enabling this option will slow down cluster recovery under all conditions",
"Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n"
"Useful if your configuration is sensitive to the order in which ping updates arrive."
},
{ "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout,
"How long to wait before we can assume nodes are safely down", NULL
},
{ "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
"How many times stonith can fail before it will no longer be attempted on a target"
},
{ "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
};
/* *INDENT-ON* */
void
crmd_metadata(void)
{
config_metadata("pacemaker-controld", "1.0",
"controller properties",
"Cluster properties used by Pacemaker's controller",
crmd_opts, DIMOF(crmd_opts));
}
static void
verify_crmd_options(GHashTable * options)
{
verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
}
static const char *
crmd_pref(GHashTable * options, const char *name)
{
return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
set_bit(fsa_input_register, R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig) &&
(crm_element_name(crmconfig)) &&
(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = crm_str_table_new();
unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, now);
verify_crmd_options(config_hash);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
election_trigger->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
throttle_update_job_max(value);
value = crmd_pref(config_hash, "load-threshold");
if(value) {
throttle_set_load_target(strtof(value, NULL) / 100.0);
}
value = crmd_pref(config_hash, "no-quorum-policy");
if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) {
no_quorum_suicide_escalation = TRUE;
}
value = crmd_pref(config_hash,"stonith-max-attempts");
update_stonith_max_attempts(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_get_msec(value);
/* How long to declare an election over - even if not everyone voted */
crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
election_timeout_set_period(fsa_election, crm_get_msec(value));
value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
recheck_timer->period_ms = crm_get_msec(value);
crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms);
value = crmd_pref(config_hash, "crmd-transition-delay");
transition_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "crmd-integration-timeout");
integration_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "crmd-finalization-timeout");
finalization_timer->period_ms = crm_get_msec(value);
free(fsa_cluster_name);
fsa_cluster_name = NULL;
value = g_hash_table_lookup(config_hash, "cluster-name");
if (value) {
fsa_cluster_name = strdup(value);
}
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
set_bit(fsa_input_register, R_READ_CONFIG);
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
gboolean
crm_read_options(gpointer user_data)
{
int call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn,
"//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath | cib_scope_local);
fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
mainloop_set_trigger(config_read);
}
void
crm_shutdown(int nsig)
{
if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating the shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
} else {
set_bit(fsa_input_register, R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
if (shutdown_escalation_timer->period_ms < 1) {
const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
int msec = crm_get_msec(value);
crm_debug("Using default shutdown escalation: %dms", msec);
shutdown_escalation_timer->period_ms = msec;
}
/* can't rely on this... */
crm_notice("Shutting down cluster resource manager " CRM_XS
" limit=%dms", shutdown_escalation_timer->period_ms);
crm_timer_start(shutdown_escalation_timer);
}
} else {
crm_info("exit from shutdown");
crmd_exit(CRM_EX_OK);
}
}
diff --git a/daemons/controld/controld_alerts.h b/daemons/controld/controld_alerts.h
new file mode 100644
index 0000000000..4fb73d4207
--- /dev/null
+++ b/daemons/controld/controld_alerts.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015-2018 Andrew Beekhof <andrew@beekhof.net>
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef CONTROLD_ALERTS__H
+# define CONTROLD_ALERTS__H
+
+# include <crm/crm.h>
+# include <crm/cluster.h>
+# include <crm/stonith-ng.h>
+
+void crmd_unpack_alerts(xmlNode *alerts);
+void crmd_alert_node_event(crm_node_t *node);
+void crmd_alert_fencing_op(stonith_event_t *e);
+void crmd_alert_resource_op(const char *node, lrmd_event_data_t *op);
+
+#endif
diff --git a/daemons/controld/crmd_alerts.h b/daemons/controld/crmd_alerts.h
deleted file mode 100644
index eadede1ed0..0000000000
--- a/daemons/controld/crmd_alerts.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2015 Andrew Beekhof <andrew@beekhof.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef CRMD_ALERT__H
-# define CRMD_ALERT__H
-
-# include <crm/crm.h>
-# include <crm/cluster.h>
-# include <crm/stonith-ng.h>
-
-void crmd_unpack_alerts(xmlNode *alerts);
-void crmd_alert_node_event(crm_node_t *node);
-void crmd_alert_fencing_op(stonith_event_t *e);
-void crmd_alert_resource_op(const char *node, lrmd_event_data_t *op);
-
-#endif
diff --git a/daemons/controld/crmd_utils.h b/daemons/controld/crmd_utils.h
index 44f055fe10..790b818bc8 100644
--- a/daemons/controld/crmd_utils.h
+++ b/daemons/controld/crmd_utils.h
@@ -1,111 +1,101 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
+
#ifndef CRMD_UTILS__H
# define CRMD_UTILS__H
# include <crm/crm.h>
# include <crm/transition.h>
# include <crm/common/xml.h>
# include <crm/cib/internal.h> /* For CIB_OP_MODIFY */
-# include "crmd_alerts.h"
+# include "controld_alerts.h"
# define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
# define fsa_cib_update(section, data, options, call_id, user_name) \
if(fsa_cib_conn != NULL) { \
call_id = cib_internal_op( \
fsa_cib_conn, CIB_OP_MODIFY, NULL, section, data, \
NULL, options, user_name); \
\
} else { \
crm_err("No CIB connection available"); \
}
# define fsa_cib_anon_update(section, data, options) \
if(fsa_cib_conn != NULL) { \
fsa_cib_conn->cmds->modify( \
fsa_cib_conn, section, data, options); \
\
} else { \
crm_err("No CIB connection available"); \
}
extern gboolean fsa_has_quorum;
extern int last_peer_update;
extern int last_resource_update;
enum node_update_flags {
node_update_none = 0x0000,
node_update_quick = 0x0001,
node_update_cluster = 0x0010,
node_update_peer = 0x0020,
node_update_join = 0x0040,
node_update_expected = 0x0100,
node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected,
};
gboolean crm_timer_stop(fsa_timer_t * timer);
gboolean crm_timer_start(fsa_timer_t * timer);
gboolean crm_timer_popped(gpointer data);
gboolean is_timer_started(fsa_timer_t * timer);
crm_exit_t crmd_exit(crm_exit_t exit_code);
crm_exit_t crmd_fast_exit(crm_exit_t exit_code);
void pe_subsystem_free(void);
void fsa_dump_actions(long long action, const char *text);
void fsa_dump_inputs(int log_level, const char *text, long long input_register);
gboolean update_dc(xmlNode * msg);
void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase);
xmlNode *create_node_state_update(crm_node_t *node, int flags,
xmlNode *parent, const char *source);
void populate_cib_nodes(enum node_update_flags flags, const char *source);
void crm_update_quorum(gboolean quorum, gboolean force_update);
void erase_status_tag(const char *uname, const char *tag, int options);
void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
void update_attrd_remote_node_removed(const char *host, const char *user_name);
void update_attrd_clear_failures(const char *host, const char *rsc,
const char *op, const char *interval_spec,
gboolean is_remote_node);
int crmd_join_phase_count(enum crm_join_phase phase);
void crmd_join_phase_log(int level);
const char *get_timer_desc(fsa_timer_t * timer);
void st_fail_count_reset(const char * target);
void st_fail_count_increment(const char *target);
void abort_for_stonith_failure(enum transition_action abort_action,
const char *target, xmlNode *reason);
void crmd_peer_down(crm_node_t *peer, bool full);
unsigned int cib_op_timeout(void);
bool feature_set_compatible(const char *dc_version, const char *join_version);
/* Convenience macro for registering a CIB callback
* (assumes that data can be freed with free())
*/
# define fsa_register_cib_callback(id, flag, data, fn) do { \
CRM_ASSERT(fsa_cib_conn); \
fsa_cib_conn->cmds->register_callback_full( \
fsa_cib_conn, id, cib_op_timeout(), \
flag, data, #fn, fn, free); \
} while(0)
#endif
diff --git a/daemons/controld/lrm_state.c b/daemons/controld/lrm_state.c
index 4eb9ee9d8d..d3c291ac5a 100644
--- a/daemons/controld/lrm_state.c
+++ b/daemons/controld/lrm_state.c
@@ -1,791 +1,791 @@
/*
* Copyright 2012-2018 David Vossel <davidvossel@gmail.com>
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/iso8601.h>
#include <crmd.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
-#include <crmd_alerts.h>
+#include <controld_alerts.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/rules_internal.h>
#include <crm/transition.h>
#include <crm/lrmd_alerts_internal.h>
GHashTable *lrm_state_table = NULL;
extern GHashTable *proxy_table;
int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
static void
free_rsc_info(gpointer value)
{
lrmd_rsc_info_t *rsc_info = value;
lrmd_free_rsc_info(rsc_info);
}
static void
free_deletion_op(gpointer value)
{
struct pending_deletion_op_s *op = value;
free(op->rsc);
delete_ha_msg_input(op->input);
free(op);
}
static void
free_recurring_op(gpointer value)
{
struct recurring_op_s *op = (struct recurring_op_s *)value;
free(op->user_data);
free(op->rsc_id);
free(op->op_type);
free(op->op_key);
if (op->params) {
g_hash_table_destroy(op->params);
}
free(op);
}
static gboolean
fail_pending_op(gpointer key, gpointer value, gpointer user_data)
{
lrmd_event_data_t event = { 0, };
lrm_state_t *lrm_state = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
crm_trace("Pre-emptively failing " CRM_OP_FMT " on %s (call=%s, %s)",
op->rsc_id, op->op_type, op->interval_ms,
lrm_state->node_name, (char*)key, op->user_data);
event.type = lrmd_event_exec_complete;
event.rsc_id = op->rsc_id;
event.op_type = op->op_type;
event.user_data = op->user_data;
event.timeout = 0;
event.interval_ms = op->interval_ms;
event.rc = PCMK_OCF_CONNECTION_DIED;
event.op_status = PCMK_LRM_OP_ERROR;
event.t_run = op->start_time;
event.t_rcchange = op->start_time;
event.call_id = op->call_id;
event.remote_nodename = lrm_state->node_name;
event.params = op->params;
process_lrm_event(lrm_state, &event, op);
return TRUE;
}
gboolean
lrm_state_is_local(lrm_state_t *lrm_state)
{
if (lrm_state == NULL || fsa_our_uname == NULL) {
return FALSE;
}
if (strcmp(lrm_state->node_name, fsa_our_uname) != 0) {
return FALSE;
}
return TRUE;
}
lrm_state_t *
lrm_state_create(const char *node_name)
{
lrm_state_t *state = NULL;
if (!node_name) {
crm_err("No node name given for lrm state object");
return NULL;
}
state = calloc(1, sizeof(lrm_state_t));
if (!state) {
return NULL;
}
state->node_name = strdup(node_name);
state->rsc_info_cache = g_hash_table_new_full(crm_str_hash,
g_str_equal, NULL, free_rsc_info);
state->deletion_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
free_deletion_op);
state->pending_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
free_recurring_op);
state->resource_history = g_hash_table_new_full(crm_str_hash,
g_str_equal, NULL, history_free);
state->metadata_cache = metadata_cache_new();
g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
return state;
}
void
lrm_state_destroy(const char *node_name)
{
g_hash_table_remove(lrm_state_table, node_name);
}
static gboolean
remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
{
remote_proxy_t *proxy = value;
const char *node_name = user_data;
if (safe_str_eq(node_name, proxy->node_name)) {
return TRUE;
}
return FALSE;
}
static void
internal_lrm_state_destroy(gpointer data)
{
lrm_state_t *lrm_state = data;
if (!lrm_state) {
return;
}
crm_trace("Destroying proxy table %s with %d members", lrm_state->node_name, g_hash_table_size(proxy_table));
g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name);
remote_ra_cleanup(lrm_state);
lrmd_api_delete(lrm_state->conn);
if (lrm_state->rsc_info_cache) {
crm_trace("Destroying rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache));
g_hash_table_destroy(lrm_state->rsc_info_cache);
}
if (lrm_state->resource_history) {
crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history));
g_hash_table_destroy(lrm_state->resource_history);
}
if (lrm_state->deletion_ops) {
crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops));
g_hash_table_destroy(lrm_state->deletion_ops);
}
if (lrm_state->pending_ops) {
crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops));
g_hash_table_destroy(lrm_state->pending_ops);
}
metadata_cache_free(lrm_state->metadata_cache);
free((char *)lrm_state->node_name);
free(lrm_state);
}
void
lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata)
{
if (lrm_state->resource_history) {
crm_trace("Re-setting history op cache with %d members",
g_hash_table_size(lrm_state->resource_history));
g_hash_table_remove_all(lrm_state->resource_history);
}
if (lrm_state->deletion_ops) {
crm_trace("Re-setting deletion op cache with %d members",
g_hash_table_size(lrm_state->deletion_ops));
g_hash_table_remove_all(lrm_state->deletion_ops);
}
if (lrm_state->pending_ops) {
crm_trace("Re-setting pending op cache with %d members",
g_hash_table_size(lrm_state->pending_ops));
g_hash_table_remove_all(lrm_state->pending_ops);
}
if (lrm_state->rsc_info_cache) {
crm_trace("Re-setting rsc info cache with %d members",
g_hash_table_size(lrm_state->rsc_info_cache));
g_hash_table_remove_all(lrm_state->rsc_info_cache);
}
if (reset_metadata) {
metadata_cache_reset(lrm_state->metadata_cache);
}
}
gboolean
lrm_state_init_local(void)
{
if (lrm_state_table) {
return TRUE;
}
lrm_state_table =
g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, internal_lrm_state_destroy);
if (!lrm_state_table) {
return FALSE;
}
proxy_table =
g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, remote_proxy_free);
if (!proxy_table) {
g_hash_table_destroy(lrm_state_table);
lrm_state_table = NULL;
return FALSE;
}
return TRUE;
}
void
lrm_state_destroy_all(void)
{
if (lrm_state_table) {
crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table));
g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL;
}
if(proxy_table) {
crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table));
g_hash_table_destroy(proxy_table); proxy_table = NULL;
}
}
lrm_state_t *
lrm_state_find(const char *node_name)
{
if (!node_name) {
return NULL;
}
return g_hash_table_lookup(lrm_state_table, node_name);
}
lrm_state_t *
lrm_state_find_or_create(const char *node_name)
{
lrm_state_t *lrm_state;
lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
if (!lrm_state) {
lrm_state = lrm_state_create(node_name);
}
return lrm_state;
}
GList *
lrm_state_get_list(void)
{
return g_hash_table_get_values(lrm_state_table);
}
static remote_proxy_t *
find_connected_proxy_by_node(const char * node_name)
{
GHashTableIter gIter;
remote_proxy_t *proxy = NULL;
CRM_CHECK(proxy_table != NULL, return NULL);
g_hash_table_iter_init(&gIter, proxy_table);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) {
if (proxy->source
&& safe_str_eq(node_name, proxy->node_name)) {
return proxy;
}
}
return NULL;
}
static void
remote_proxy_disconnect_by_node(const char * node_name)
{
remote_proxy_t *proxy = NULL;
CRM_CHECK(proxy_table != NULL, return);
while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) {
/* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected()
* , which removes the entry from proxy_table.
* Do not do this in a g_hash_table_iter_next() loop. */
if (proxy->source) {
mainloop_del_ipc_client(proxy->source);
}
}
return;
}
void
lrm_state_disconnect_only(lrm_state_t * lrm_state)
{
int removed = 0;
if (!lrm_state->conn) {
return;
}
crm_trace("Disconnecting %s", lrm_state->node_name);
remote_proxy_disconnect_by_node(lrm_state->node_name);
((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn);
if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
removed = g_hash_table_foreach_remove(lrm_state->pending_ops, fail_pending_op, lrm_state);
crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name);
}
}
void
lrm_state_disconnect(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return;
}
lrm_state_disconnect_only(lrm_state);
lrmd_api_delete(lrm_state->conn);
lrm_state->conn = NULL;
}
int
lrm_state_is_connected(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return FALSE;
}
return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn);
}
int
lrm_state_poke_connection(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return -1;
}
return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn);
}
int
lrm_state_ipc_connect(lrm_state_t * lrm_state)
{
int ret;
if (!lrm_state->conn) {
lrm_state->conn = lrmd_api_new();
((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, lrm_op_callback);
}
ret = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL);
if (ret != pcmk_ok) {
lrm_state->num_lrm_register_fails++;
} else {
lrm_state->num_lrm_register_fails = 0;
}
return ret;
}
static remote_proxy_t *
crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel)
{
static struct ipc_client_callbacks proxy_callbacks = {
.dispatch = remote_proxy_dispatch,
.destroy = remote_proxy_disconnected
};
remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name,
session_id, channel);
return proxy;
}
gboolean
crmd_is_proxy_session(const char *session)
{
return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE;
}
void
crmd_proxy_send(const char *session, xmlNode *msg)
{
remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
lrm_state_t *lrm_state = NULL;
if (!proxy) {
return;
}
crm_log_xml_trace(msg, "to-proxy");
lrm_state = lrm_state_find(proxy->node_name);
if (lrm_state) {
crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name);
remote_proxy_relay_event(proxy, msg);
}
}
static void
crmd_proxy_dispatch(const char *session, xmlNode *msg)
{
crm_log_xml_trace(msg, "controller-proxy[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, session);
if (crmd_authorize_message(msg, NULL, session)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
}
static void
remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc != pcmk_ok) {
crm_err("Query resulted in an error: %s", pcmk_strerror(rc));
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
}
} else {
lrmd_t * lrmd = (lrmd_t *)user_data;
crm_time_t *now = crm_time_new(NULL);
GHashTable *config_hash = crm_str_table_new();
crm_debug("Call %d : Parsing CIB options", call_id);
unpack_instance_attributes(
output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now);
/* Now send it to the remote peer */
remote_proxy_check(lrmd, config_hash);
g_hash_table_destroy(config_hash);
crm_time_free(now);
}
}
static void
crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
{
lrm_state_t *lrm_state = userdata;
const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION);
remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
const char *op = crm_element_value(msg, F_LRMD_IPC_OP);
if (safe_str_eq(op, LRMD_IPC_OP_NEW)) {
const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER);
proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel);
if (proxy != NULL) {
/* Look up stonith-watchdog-timeout and send to the remote peer for validation */
int rc = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local);
fsa_cib_conn->cmds->register_callback_full(fsa_cib_conn, rc, 10, FALSE, lrmd,
"remote_config_check", remote_config_check, NULL);
}
} else if (safe_str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ)) {
char *now_s = NULL;
time_t now = time(NULL);
crm_notice("%s requested shutdown of its remote connection",
lrm_state->node_name);
if (!remote_ra_is_in_maintenance(lrm_state)) {
now_s = crm_itoa(now);
update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
free(now_s);
remote_proxy_ack_shutdown(lrmd);
crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
lrm_state->node_name);
} else {
remote_proxy_nack_shutdown(lrmd);
crm_notice("Remote resource for %s is not managed so no ordered shutdown happening",
lrm_state->node_name);
}
return;
} else if (safe_str_eq(op, LRMD_IPC_OP_REQUEST) && proxy && proxy->is_local) {
/* This is for the controller, which we are, so don't try
* to send to ourselves over IPC -- do it directly.
*/
int flags = 0;
xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG);
CRM_CHECK(request != NULL, return);
#if ENABLE_ACL
CRM_CHECK(lrm_state->node_name, return);
crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote");
crm_acl_get_set_user(request, F_LRMD_IPC_USER, lrm_state->node_name);
#endif
crmd_proxy_dispatch(session, request);
crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags);
if (flags & crm_ipc_client_response) {
int msg_id = 0;
xmlNode *op_reply = create_xml_node(NULL, "ack");
crm_xml_add(op_reply, "function", __FUNCTION__);
crm_xml_add_int(op_reply, "line", __LINE__);
crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id);
remote_proxy_relay_response(proxy, op_reply, msg_id);
free_xml(op_reply);
}
} else {
remote_proxy_cb(lrmd, lrm_state->node_name, msg);
}
}
int
lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
int timeout_ms)
{
int ret;
if (!lrm_state->conn) {
lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server, port);
if (!lrm_state->conn) {
return -1;
}
((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback);
lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, crmd_remote_proxy_cb);
}
crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms);
ret =
((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name,
timeout_ms);
if (ret != pcmk_ok) {
lrm_state->num_lrm_register_fails++;
} else {
lrm_state->num_lrm_register_fails = 0;
}
return ret;
}
int
lrm_state_get_metadata(lrm_state_t * lrm_state,
const char *class,
const char *provider,
const char *agent, char **output, enum lrmd_call_options options)
{
if (!lrm_state->conn) {
return -ENOTCONN;
}
return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata(lrm_state->conn, class, provider, agent,
output, options);
}
int
lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
guint interval_ms)
{
if (!lrm_state->conn) {
return -ENOTCONN;
}
/* Figure out a way to make this async?
* NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms);
}
return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id,
action, interval_ms);
}
lrmd_rsc_info_t *
lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
{
lrmd_rsc_info_t *rsc = NULL;
if (!lrm_state->conn) {
return NULL;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_get_rsc_info(lrm_state, rsc_id);
}
rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
if (rsc == NULL) {
/* only contact the lrmd if we don't already have a cached rsc info */
rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options);
if (rsc == NULL) {
return NULL;
}
/* cache the result */
g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
}
return lrmd_copy_rsc_info(rsc);
}
int
lrm_state_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
lrmd_key_value_t * params)
{
if (!lrm_state->conn) {
lrmd_key_value_freeall(params);
return -ENOTCONN;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_exec(lrm_state, rsc_id, action, userdata, interval_ms,
timeout, start_delay, params);
}
return ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn,
rsc_id,
action,
userdata,
interval_ms,
timeout,
start_delay,
lrmd_opt_notify_changes_only, params);
}
int
lrm_state_register_rsc(lrm_state_t * lrm_state,
const char *rsc_id,
const char *class,
const char *provider, const char *agent, enum lrmd_call_options options)
{
lrmd_t *conn = (lrmd_t *) lrm_state->conn;
if (conn == NULL) {
return -ENOTCONN;
}
if (is_remote_lrmd_ra(agent, provider, NULL)) {
return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL;
}
/* @TODO Implement an asynchronous version of this (currently a blocking
* call to the lrmd).
*/
return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider,
agent, options);
}
int
lrm_state_unregister_rsc(lrm_state_t * lrm_state,
const char *rsc_id, enum lrmd_call_options options)
{
if (!lrm_state->conn) {
return -ENOTCONN;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
lrm_state_destroy(rsc_id);
return pcmk_ok;
}
g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
/* @TODO Optimize this ... this function is a blocking round trip from
* client to daemon. The pacemaker-controld/lrm.c code path that uses this
* function should always treat it as an async operation. The executor API
* should make an async version available.
*/
return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
}
/*
* Functions for sending alerts via local executor connection
*/
static GListPtr crmd_alert_list = NULL;
void
crmd_unpack_alerts(xmlNode *alerts)
{
pe_free_alert_list(crmd_alert_list);
crmd_alert_list = pe_unpack_alerts(alerts);
}
void
crmd_alert_node_event(crm_node_t *node)
{
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
node->uname, node->id, node->state);
}
void
crmd_alert_fencing_op(stonith_event_t * e)
{
char *desc;
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
e->action, e->target,
(e->executioner? e->executioner : "<no-one>"),
e->client_origin, e->origin,
pcmk_strerror(e->result), e->id);
lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
e->target, e->operation, desc, e->result);
free(desc);
}
void
crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
{
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
op);
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 4:18 PM (20 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018869
Default Alt Text
(62 KB)

Event Timeline