diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index 89b5b5dc41..6d9f3355a9 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,899 +1,900 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
qb_ipcs_service_t *ipcs = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
#endif
void crm_shutdown(int nsig);
gboolean crm_read_options(gpointer user_data);
gboolean fsa_has_quorum = FALSE;
crm_trigger_t *fsa_source = NULL;
crm_trigger_t *config_read = NULL;
bool no_quorum_suicide_escalation = FALSE;
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = calloc(1, sizeof(crm_cluster_t));
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
set_bit(fsa_input_register, R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
registered = crm_connect_corosync(cluster);
#endif
}
if (registered == TRUE) {
controld_election_init(cluster->uname);
fsa_our_uname = cluster->uname;
fsa_our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (registered == FALSE) {
set_bit(fsa_input_register, R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __FUNCTION__);
clear_bit(fsa_input_register, R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
set_bit(fsa_input_register, R_SHUTDOWN);
if (stonith_api) {
/* Prevent it from coming up again */
clear_bit(fsa_input_register, R_ST_REQUIRED);
crm_info("Disconnecting from fencer");
stonith_api->cmds->disconnect(stonith_api);
}
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
set_bit(fsa_input_register, R_SHUTDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
(fsa_our_dc? fsa_our_dc : "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
/* set_bit(fsa_input_register, R_STAYDOWN); */
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
extern char *max_generation_from;
extern xmlNode *max_generation_xml;
extern GHashTable *resource_history;
extern GHashTable *voted;
extern char *te_client_id;
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (is_set(fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& is_set(fsa_input_register, R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GListPtr gIter = NULL;
GMainLoop *mloop = crmd_mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
set_bit(fsa_input_register, R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
pe_subsystem_free();
if(stonith_api) {
crm_trace("Disconnecting fencing API");
clear_bit(fsa_input_register, R_ST_REQUIRED);
stonith_api->cmds->free(stonith_api); stonith_api = NULL;
}
if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
fsa_data_t *fsa_data = gIter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
clear_bit(fsa_input_register, R_MEMBERSHIP);
g_list_free(fsa_message_queue); fsa_message_queue = NULL;
metadata_cache_fini();
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
cib_free_callbacks(fsa_cib_conn);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
verify_stopped(fsa_state, LOG_WARNING);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
lrm_state_destroy_all();
/* This basically will not work, since mainloop has a reference to it */
mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
mainloop_destroy_trigger(config_read); config_read = NULL;
mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL;
mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
crm_client_cleanup();
crm_peer_destroy();
crm_timer_stop(transition_timer);
crm_timer_stop(integration_timer);
crm_timer_stop(finalization_timer);
crm_timer_stop(election_trigger);
crm_timer_stop(shutdown_escalation_timer);
crm_timer_stop(wait_timer);
crm_timer_stop(recheck_timer);
free(transition_timer); transition_timer = NULL;
free(integration_timer); integration_timer = NULL;
free(finalization_timer); finalization_timer = NULL;
free(election_trigger); election_trigger = NULL;
free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
free(wait_timer); wait_timer = NULL;
free(recheck_timer); recheck_timer = NULL;
free(fsa_our_dc_version); fsa_our_dc_version = NULL;
free(fsa_our_uname); fsa_our_uname = NULL;
free(fsa_our_uuid); fsa_our_uuid = NULL;
free(fsa_our_dc); fsa_our_dc = NULL;
free(fsa_cluster_name); fsa_cluster_name = NULL;
free(te_uuid); te_uuid = NULL;
free(te_client_id); te_client_id = NULL;
free(fsa_pe_ref); fsa_pe_ref = NULL;
free(failed_stop_offset); failed_stop_offset = NULL;
free(failed_start_offset); failed_start_offset = NULL;
free(max_generation_from); max_generation_from = NULL;
free_xml(max_generation_xml); max_generation_xml = NULL;
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
/* Don't re-enter this block */
crmd_mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(fsa_cib_conn);
fsa_cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
int log_level = LOG_INFO;
const char *exit_type = "gracefully";
if (action & A_EXIT_1) {
log_level = LOG_ERR;
exit_type = "forcefully";
exit_code = CRM_EX_ERROR;
}
verify_stopped(cur_state, LOG_ERR);
do_crm_log(log_level, "Performing %s - %s exiting the controller",
fsa_action2string(action), exit_type);
crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int was_error = 0;
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
crm_debug("Creating CIB manager and executor objects");
fsa_cib_conn = cib_new();
lrm_state_init_local();
/* set up the timers */
transition_timer = calloc(1, sizeof(fsa_timer_t));
integration_timer = calloc(1, sizeof(fsa_timer_t));
finalization_timer = calloc(1, sizeof(fsa_timer_t));
election_trigger = calloc(1, sizeof(fsa_timer_t));
shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
wait_timer = calloc(1, sizeof(fsa_timer_t));
recheck_timer = calloc(1, sizeof(fsa_timer_t));
if (election_trigger != NULL) {
election_trigger->source_id = 0;
election_trigger->period_ms = -1;
election_trigger->fsa_input = I_DC_TIMEOUT;
election_trigger->callback = crm_timer_popped;
election_trigger->repeat = FALSE;
} else {
was_error = TRUE;
}
if (transition_timer != NULL) {
transition_timer->source_id = 0;
transition_timer->period_ms = -1;
transition_timer->fsa_input = I_PE_CALC;
transition_timer->callback = crm_timer_popped;
transition_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (integration_timer != NULL) {
integration_timer->source_id = 0;
integration_timer->period_ms = -1;
integration_timer->fsa_input = I_INTEGRATED;
integration_timer->callback = crm_timer_popped;
integration_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (finalization_timer != NULL) {
finalization_timer->source_id = 0;
finalization_timer->period_ms = -1;
finalization_timer->fsa_input = I_FINALIZED;
finalization_timer->callback = crm_timer_popped;
finalization_timer->repeat = FALSE;
/* for possible enabling... a bug in the join protocol left
* a slave in S_PENDING while we think it's in S_NOT_DC
*
* raising I_FINALIZED put us into a transition loop which is
* never resolved.
* in this loop we continually send probes which the node
* NACK's because it's in S_PENDING
*
* if we have nodes where the cluster layer is active but the
* CRM is not... then this will be handled in the
* integration phase
*/
finalization_timer->fsa_input = I_ELECTION;
} else {
was_error = TRUE;
}
if (shutdown_escalation_timer != NULL) {
shutdown_escalation_timer->source_id = 0;
shutdown_escalation_timer->period_ms = -1;
shutdown_escalation_timer->fsa_input = I_STOP;
shutdown_escalation_timer->callback = crm_timer_popped;
shutdown_escalation_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (wait_timer != NULL) {
wait_timer->source_id = 0;
wait_timer->period_ms = 2000;
wait_timer->fsa_input = I_NULL;
wait_timer->callback = crm_timer_popped;
wait_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (recheck_timer != NULL) {
recheck_timer->source_id = 0;
recheck_timer->period_ms = -1;
recheck_timer->fsa_input = I_PE_CALC;
recheck_timer->callback = crm_timer_popped;
recheck_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (was_error) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static int32_t
crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
crmd_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
static int32_t
crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *client = crm_client_get(c);
xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags);
crm_trace("Invoked: %s", crm_client_name(client));
crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
}
#if ENABLE_ACL
CRM_ASSERT(client->user != NULL);
crm_acl_get_set_user(msg, F_CRM_USER, client->user);
#endif
crm_trace("Processing msg from %s", crm_client_name(client));
crm_log_xml_trace(msg, "controller[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (crmd_authorize_message(msg, client, NULL)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
free_xml(msg);
return 0;
}
static int32_t
crmd_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), crm_client_name(client),
c, client);
free(client->userdata);
crm_client_destroy(client);
trigger_fsa(fsa_source);
}
return 0;
}
static void
crmd_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
crmd_ipc_closed(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = crmd_ipc_accept,
.connection_created = crmd_ipc_created,
.msg_process = crmd_ipc_dispatch,
.connection_closed = crmd_ipc_closed,
.connection_destroyed = crmd_ipc_destroy
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = crmd_ipc_server_init(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
+ // Try connecting to fencer (retrying later in mainloop if failed)
if (stonith_reconnect == NULL) {
- int dummy;
-
- stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy);
+ stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
+ te_connect_stonith,
+ GINT_TO_POINTER(TRUE));
}
set_bit(fsa_input_register, R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
crm_notice("Pacemaker controller successfully started and accepting connections");
clear_bit(fsa_input_register, R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
set_bit(fsa_input_register, R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* *INDENT-OFF* */
static pe_cluster_option crmd_opts[] = {
/* name, old-name, validate, values, default, short description, long description */
{ "dc-version", NULL, "string", NULL, "none", NULL,
"Version of Pacemaker on the cluster's DC.",
"Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes."
},
{ "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
"The messaging stack on which Pacemaker is currently running.",
"Used for informational and diagnostic purposes." },
{ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "20s", &check_time,
"How long to wait for a response from other nodes during startup.",
"The \"correct\" value will depend on the speed/load of your network and the type of switches used."
},
{ XML_CONFIG_ATTR_RECHECK, NULL, "time",
"Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)",
"15min", &check_timer,
"Polling interval for time based changes to options, resource parameters and constraints.",
"The Cluster is primarily event driven, however the configuration can have elements that change based on time."
" To ensure these changes take effect, we can optionally poll the cluster's status for changes."
},
{ "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization,
"The maximum amount of system resources that should be used by nodes in the cluster",
"The cluster will slow down its recovery process when the amount of system resources used"
" (currently CPU) approaches this limit",
},
{ "node-action-limit", NULL, "integer", NULL, "0", &check_number,
"The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"},
{ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{
"join-integration-timeout", "crmd-integration-timeout",
"time", NULL, "3min", &check_timer,
"*** Advanced Use Only ***",
"If need to adjust this value, it probably indicates the presence of a bug"
},
{
"join-finalization-timeout", "crmd-finalization-timeout",
"time", NULL, "30min", &check_timer,
"*** Advanced Use Only ***",
"If you need to adjust this value, it probably indicates the presence of a bug"
},
{
"transition-delay", "crmd-transition-delay",
"time", NULL, "0s", &check_timer,
"*** Advanced Use Only *** Enabling this option will slow down cluster recovery under all conditions",
"Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n"
"Useful if your configuration is sensitive to the order in which ping updates arrive."
},
{ "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout,
"How long to wait before we can assume nodes are safely down", NULL
},
{ "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
"How many times stonith can fail before it will no longer be attempted on a target"
},
{ "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
};
/* *INDENT-ON* */
void
crmd_metadata(void)
{
config_metadata("pacemaker-controld", "1.0",
"controller properties",
"Cluster properties used by Pacemaker's controller,"
" formerly known as crmd",
crmd_opts, DIMOF(crmd_opts));
}
static void
verify_crmd_options(GHashTable * options)
{
verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
}
static const char *
crmd_pref(GHashTable * options, const char *name)
{
return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
set_bit(fsa_input_register, R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig) &&
(crm_element_name(crmconfig)) &&
(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = crm_str_table_new();
unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, now);
verify_crmd_options(config_hash);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
election_trigger->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
throttle_update_job_max(value);
value = crmd_pref(config_hash, "load-threshold");
if(value) {
throttle_set_load_target(strtof(value, NULL) / 100.0);
}
value = crmd_pref(config_hash, "no-quorum-policy");
if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) {
no_quorum_suicide_escalation = TRUE;
}
value = crmd_pref(config_hash,"stonith-max-attempts");
update_stonith_max_attempts(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_get_msec(value);
/* How long to declare an election over - even if not everyone voted */
crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
controld_set_election_period(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
recheck_timer->period_ms = crm_get_msec(value);
crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms);
value = crmd_pref(config_hash, "transition-delay");
transition_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "join-integration-timeout");
integration_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "join-finalization-timeout");
finalization_timer->period_ms = crm_get_msec(value);
free(fsa_cluster_name);
fsa_cluster_name = NULL;
value = g_hash_table_lookup(config_hash, "cluster-name");
if (value) {
fsa_cluster_name = strdup(value);
}
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
set_bit(fsa_input_register, R_READ_CONFIG);
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
gboolean
crm_read_options(gpointer user_data)
{
int call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn,
"//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath | cib_scope_local);
fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
mainloop_set_trigger(config_read);
}
void
crm_shutdown(int nsig)
{
if (crmd_mainloop != NULL && g_main_loop_is_running(crmd_mainloop)) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating the shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
} else {
set_bit(fsa_input_register, R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
if (shutdown_escalation_timer->period_ms < 1) {
const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
int msec = crm_get_msec(value);
crm_debug("Using default shutdown escalation: %dms", msec);
shutdown_escalation_timer->period_ms = msec;
}
/* can't rely on this... */
crm_notice("Shutting down cluster resource manager " CRM_XS
" limit=%dms", shutdown_escalation_timer->period_ms);
crm_timer_start(shutdown_escalation_timer);
}
} else {
crm_info("exit from shutdown");
crmd_exit(CRM_EX_OK);
}
}
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
index 5606ed654a..22f83ad2d7 100644
--- a/daemons/controld/controld_te_utils.c
+++ b/daemons/controld/controld_te_utils.c
@@ -1,718 +1,725 @@
/*
- * Copyright 2004-2018 Andrew Beekhof
+ * Copyright 2004-2019 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
crm_trigger_t *stonith_reconnect = NULL;
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer = NULL;
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GListPtr stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GListPtr iter = stonith_cleanup_list;
while (iter != NULL) {
GListPtr tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (safe_str_eq(target, iter_name)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup()
{
if (stonith_cleanup_list) {
GListPtr iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup()
{
GListPtr iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = crm_get_peer(0, target);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
static gboolean
fail_incompletable_stonith(crm_graph_t * graph)
{
GListPtr lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GListPtr lpc2 = NULL;
synapse_t *synapse = (synapse_t *) lpc->data;
if (synapse->confirmed) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
crm_action_t *action = (crm_action_t *) lpc2->data;
if (action->type != action_type_crm || action->confirmed) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && safe_str_eq(task, CRM_OP_FENCE)) {
action->failed = TRUE;
last_action = action->xml;
update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
{
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
/* cbchan will be garbage at this point, arrange for it to be reset */
if(stonith_api) {
stonith_api->state = stonith_disconnected;
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
char *te_client_id = NULL;
#ifdef HAVE_SYS_REBOOT_H
# include
# include
#endif
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
if(te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (st_event == NULL) {
crm_err("Notify data not found");
return;
}
crmd_alert_fencing_op(st_event);
if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
crm_notice("%s was successfully unfenced by %s (at the request of %s)",
st_event->target, st_event->executioner ? st_event->executioner : "", st_event->origin);
/* TODO: Hook up st_event->device */
return;
} else if (safe_str_eq("on", st_event->action)) {
crm_err("Unfencing of %s by %s failed: %s (%d)",
st_event->target, st_event->executioner ? st_event->executioner : "",
pcmk_strerror(st_event->result), st_event->result);
return;
} else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
crm_crit("We were allegedly just fenced by %s for %s!",
st_event->executioner ? st_event->executioner : "", st_event->origin); /* Dumps blackbox if enabled */
qb_log_fini(); /* Try to get the above log message to disk - somehow */
/* Get out ASAP and do not come back up.
*
* Triggering a reboot is also not the worst idea either since
* the rest of the cluster thinks we're safely down
*/
#ifdef RB_HALT_SYSTEM
reboot(RB_HALT_SYSTEM);
#endif
/*
* If reboot() fails or is not supported, coming back up will
* probably lead to a situation where the other nodes set our
* status to 'lost' because of the fencing callback and will
* discard subsequent election votes with:
*
* Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
*
* So just stay dead, something is seriously messed up anyway.
*
*/
exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
return;
}
/* Update the count of stonith failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
if (st_event->result == pcmk_ok) {
st_fail_count_reset(st_event->target);
} else {
st_fail_count_increment(st_event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
CRM_XS " initiator=%s ref=%s",
st_event->target, st_event->result == pcmk_ok ? "" : " not",
st_event->action,
st_event->executioner ? st_event->executioner : "",
(st_event->client_origin? st_event->client_origin : ""),
pcmk_strerror(st_event->result),
st_event->origin, st_event->id);
if (st_event->result == pcmk_ok) {
crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
const char *uuid = NULL;
gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
if(AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, st_event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
* on the PE creating fence pseudo-events for the guests.
*/
if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
/* Abort the current transition graph if it wasn't us
* that invoked stonith to fence someone
*/
crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
}
/* Assume it was our leader if we don't currently have one */
} else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
&& !is_set(peer->flags, crm_remote_node)) {
crm_notice("Target %s our leader %s (recorded: %s)",
fsa_our_dc ? "was" : "may have been", st_event->target,
fsa_our_dc ? fsa_our_dc : "");
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (we_are_executioner) {
send_stonith_update(NULL, st_event->target, uuid);
}
add_stonith_cleanup(st_event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(st_event->target);
}
crmd_peer_down(peer, TRUE);
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(void)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if(stonith_history_sync_timer == NULL) {
stonith_history_sync_timer =
mainloop_timer_add("history_sync", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer);
}
+/*!
+ * \brief Connect to fencer
+ *
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
+ *
+ * \return TRUE
+ * \note If user_data is NULL, this will wait 2s between attempts, for up to
+ * 30 attempts, meaning the controller could be blocked as long as 58s.
+ */
gboolean
te_connect_stonith(gpointer user_data)
{
- int lpc = 0;
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
}
if (stonith_api->state != stonith_disconnected) {
- crm_trace("Still connected");
+ crm_trace("Already connected to fencer, no need to retry");
return TRUE;
}
- for (lpc = 0; lpc < 30; lpc++) {
- crm_debug("Attempting connection to fencing daemon...");
-
- sleep(1);
- rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
-
- if (rc == pcmk_ok) {
- break;
+ if (user_data == NULL) {
+ // Blocking (retry failures now until successful)
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to fencer in 30 attempts: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
-
- if (user_data != NULL) {
+ } else {
+ // Non-blocking (retry failures later in main loop)
+ rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
+ if (rc != pcmk_ok) {
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
- crm_err("Sign-in failed: triggered a retry");
+ crm_err("Fencer connection failed (will retry): %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
mainloop_set_trigger(stonith_reconnect);
} else {
- crm_info("Sign-in failed, but no longer required");
+ crm_info("Fencer connection failed (ignoring because no longer required): %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return TRUE;
}
-
- crm_err("Sign-in failed: pausing and trying again in 2s...");
- sleep(1);
}
- CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */
- stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
- tengine_stonith_connection_destroy);
-
- stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
- tengine_stonith_notify);
-
- crm_trace("Connected");
+ if (rc == pcmk_ok) {
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_DISCONNECT,
+ tengine_stonith_connection_destroy);
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_FENCE,
+ tengine_stonith_notify);
+ }
return TRUE;
}
gboolean
stop_te_timer(crm_action_timer_t * timer)
{
if (timer == NULL) {
return FALSE;
}
if (timer->source_id != 0) {
crm_trace("Stopping action timer");
g_source_remove(timer->source_id);
timer->source_id = 0;
} else {
crm_trace("Action timer was already stopped");
return FALSE;
}
return TRUE;
}
gboolean
te_graph_trigger(gpointer user_data)
{
enum transition_status graph_rc = -1;
if (transition_graph == NULL) {
crm_debug("Nothing to do");
return TRUE;
}
crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
return TRUE;
break;
default:
break;
}
if (transition_graph->complete == FALSE) {
int limit = transition_graph->batch_limit;
transition_graph->batch_limit = throttle_get_total_job_limit(limit);
graph_rc = run_graph(transition_graph);
transition_graph->batch_limit = limit; /* Restore the configured value */
/* significant overhead... */
/* print_graph(LOG_TRACE, transition_graph); */
if (graph_rc == transition_active) {
crm_trace("Transition not yet complete");
return TRUE;
} else if (graph_rc == transition_pending) {
crm_trace("Transition not yet complete - no actions fired");
return TRUE;
}
if (graph_rc != transition_complete) {
crm_warn("Transition failed: %s", transition_status(graph_rc));
print_graph(LOG_NOTICE, transition_graph);
}
}
crm_debug("Transition %d is now complete", transition_graph->id);
transition_graph->complete = TRUE;
notify_crmd(transition_graph);
return TRUE;
}
void
trigger_graph_processing(const char *fn, int line)
{
crm_trace("%s:%d - Triggered graph processing", fn, line);
mainloop_set_trigger(transition_trigger);
}
static struct abort_timer_s {
bool aborted;
guint id;
int priority;
enum transition_action action;
const char *text;
} abort_timer = { 0, };
static gboolean
abort_timer_popped(gpointer data)
{
if (AM_I_DC && (abort_timer.aborted == FALSE)) {
abort_transition(abort_timer.priority, abort_timer.action,
abort_timer.text, NULL);
}
abort_timer.id = 0;
return FALSE; // do not immediately reschedule timer
}
/*!
* \internal
* \brief Abort transition after delay, if not already aborted in that time
*
* \param[in] abort_text Must be literal string
*/
void
abort_after_delay(int abort_priority, enum transition_action abort_action,
const char *abort_text, guint delay_ms)
{
if (abort_timer.id) {
// Timer already in progress, stop and reschedule
g_source_remove(abort_timer.id);
}
abort_timer.aborted = FALSE;
abort_timer.priority = abort_priority;
abort_timer.action = abort_action;
abort_timer.text = abort_text;
abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL);
}
void
abort_transition_graph(int abort_priority, enum transition_action abort_action,
const char *abort_text, xmlNode * reason, const char *fn, int line)
{
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
int level = LOG_INFO;
xmlNode *diff = NULL;
xmlNode *change = NULL;
CRM_CHECK(transition_graph != NULL, return);
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
crm_info("Abort %s suppressed: state=%s (complete=%d)",
abort_text, fsa_state2string(fsa_state), transition_graph->complete);
return;
default:
break;
}
abort_timer.aborted = TRUE;
/* Make sure any queued calculations are discarded ASAP */
free(fsa_pe_ref);
fsa_pe_ref = NULL;
if (transition_graph->complete == FALSE) {
if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) {
level = LOG_NOTICE;
}
}
if(reason) {
xmlNode *search = NULL;
for(search = reason; search; search = search->parent) {
if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) {
diff = search;
break;
}
}
if(diff) {
xml_patch_versions(diff, add, del);
for(search = reason; search; search = search->parent) {
if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) {
change = search;
break;
}
}
}
}
if(reason == NULL) {
do_crm_log(level, "Transition %d aborted: %s "CRM_XS" source=%s:%d complete=%s",
transition_graph->id, abort_text, fn, line,
(transition_graph->complete? "true" : "false"));
} else if(change == NULL) {
char *local_path = xml_get_path(reason);
do_crm_log(level, "Transition %d aborted by %s.%s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
transition_graph->id, TYPE(reason), ID(reason), abort_text,
add[0], add[1], add[2], fn, line, local_path,
(transition_graph->complete? "true" : "false"));
free(local_path);
} else {
const char *kind = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *path = crm_element_value(change, XML_DIFF_PATH);
if(change == reason) {
if(strcmp(op, "create") == 0) {
reason = reason->children;
} else if(strcmp(op, "modify") == 0) {
reason = first_named_child(reason, XML_DIFF_RESULT);
if(reason) {
reason = reason->children;
}
}
}
kind = TYPE(reason);
if(strcmp(op, "delete") == 0) {
const char *shortpath = strrchr(path, '/');
do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
transition_graph->id,
(shortpath? (shortpath + 1) : path), abort_text,
add[0], add[1], add[2], fn, line, path,
(transition_graph->complete? "true" : "false"));
} else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) {
do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
transition_graph->id,
crm_element_value(reason, XML_ATTR_ID), op,
crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
abort_text, add[0], add[1], add[2], fn, line, path,
(transition_graph->complete? "true" : "false"));
} else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) {
const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
transition_graph->id,
crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
magic, add[0], add[1], add[2], fn, line,
(transition_graph->complete? "true" : "false"));
} else if (safe_str_eq(XML_CIB_TAG_STATE, kind)
|| safe_str_eq(XML_CIB_TAG_NODE, kind)) {
const char *uname = crm_peer_uname(ID(reason));
do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
transition_graph->id,
kind, op, (uname? uname : ID(reason)), abort_text,
add[0], add[1], add[2], fn, line,
(transition_graph->complete? "true" : "false"));
} else {
const char *id = ID(reason);
do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
transition_graph->id,
TYPE(reason), (id? id : ""), (op? op : "change"),
abort_text, add[0], add[1], add[2], fn, line, path,
(transition_graph->complete? "true" : "false"));
}
}
if (transition_graph->complete) {
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
}
return;
}
mainloop_set_trigger(transition_trigger);
}
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
index 21bb0edde8..e2fdfcad71 100644
--- a/daemons/execd/pacemaker-execd.c
+++ b/daemons/execd/pacemaker-execd.c
@@ -1,630 +1,622 @@
/*
* Copyright 2012-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "pacemaker-execd.h"
#if defined(HAVE_GNUTLS_GNUTLS_H) && defined(SUPPORT_REMOTE)
# define ENABLE_PCMK_REMOTE
// Hidden in liblrmd
extern int lrmd_tls_send_msg(crm_remote_t *session, xmlNode *msg, uint32_t id,
const char *msg_type);
#endif
static GMainLoop *mainloop = NULL;
static qb_ipcs_service_t *ipcs = NULL;
static stonith_t *stonith_api = NULL;
int lrmd_call_id = 0;
#ifdef ENABLE_PCMK_REMOTE
/* whether shutdown request has been sent */
static sig_atomic_t shutting_down = FALSE;
/* timer for waiting for acknowledgment of shutdown request */
static guint shutdown_ack_timer = 0;
static gboolean lrmd_exit(gpointer data);
#endif
static void
stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e)
{
stonith_api->state = stonith_disconnected;
crm_err("Connection to fencer lost");
stonith_connection_failed();
}
stonith_t *
get_stonith_connection(void)
{
if (stonith_api && stonith_api->state == stonith_disconnected) {
stonith_api_delete(stonith_api);
stonith_api = NULL;
}
- if (!stonith_api) {
- int rc = 0;
- int tries = 10;
+ if (stonith_api == NULL) {
+ int rc = pcmk_ok;
stonith_api = stonith_api_new();
- do {
- rc = stonith_api->cmds->connect(stonith_api, "pacemaker-execd", NULL);
- if (rc == pcmk_ok) {
- stonith_api->cmds->register_notification(stonith_api,
- T_STONITH_NOTIFY_DISCONNECT,
- stonith_connection_destroy_cb);
- break;
- }
- sleep(1);
- tries--;
- } while (tries);
-
- if (rc) {
- crm_err("Unable to connect to stonith daemon to execute command. error: %s",
- pcmk_strerror(rc));
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to fencer in 10 attempts: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
stonith_api_delete(stonith_api);
stonith_api = NULL;
+ } else {
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_DISCONNECT,
+ stonith_connection_destroy_cb);
}
}
return stonith_api;
}
static int32_t
lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
lrmd_ipc_created(qb_ipcs_connection_t * c)
{
crm_client_t *new_client = crm_client_get(c);
crm_trace("Connection %p", c);
CRM_ASSERT(new_client != NULL);
/* Now that the connection is offically established, alert
* the other clients a new connection exists. */
notify_of_new_client(new_client);
}
static int32_t
lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *client = crm_client_get(c);
xmlNode *request = crm_ipcs_recv(client, data, size, &id, &flags);
CRM_CHECK(client != NULL, crm_err("Invalid client");
return FALSE);
CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
return FALSE);
CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client);
return FALSE);
if (!request) {
return 0;
}
if (!client->name) {
const char *value = crm_element_value(request, F_LRMD_CLIENTNAME);
if (value == NULL) {
client->name = crm_itoa(crm_ipcs_client_pid(c));
} else {
client->name = strdup(value);
}
}
lrmd_call_id++;
if (lrmd_call_id < 1) {
lrmd_call_id = 1;
}
crm_xml_add(request, F_LRMD_CLIENTID, client->id);
crm_xml_add(request, F_LRMD_CLIENTNAME, client->name);
crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id);
process_lrmd_message(client, id, request);
free_xml(request);
return 0;
}
/*!
* \internal
* \brief Free a client connection, and exit if appropriate
*
* \param[in] client Client connection to free
*/
void
lrmd_client_destroy(crm_client_t *client)
{
crm_client_destroy(client);
#ifdef ENABLE_PCMK_REMOTE
/* If we were waiting to shut down, we can now safely do so
* if there are no more proxied IPC providers
*/
if (shutting_down && (ipc_proxy_get_provider() == NULL)) {
lrmd_exit(NULL);
}
#endif
}
static int32_t
lrmd_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
client_disconnect_cleanup(client->id);
#ifdef ENABLE_PCMK_REMOTE
ipc_proxy_remove_provider(client);
#endif
lrmd_client_destroy(client);
return 0;
}
static void
lrmd_ipc_destroy(qb_ipcs_connection_t * c)
{
lrmd_ipc_closed(c);
crm_trace("Connection %p", c);
}
static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = {
.connection_accept = lrmd_ipc_accept,
.connection_created = lrmd_ipc_created,
.msg_process = lrmd_ipc_dispatch,
.connection_closed = lrmd_ipc_closed,
.connection_destroyed = lrmd_ipc_destroy
};
int
lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply)
{
crm_trace("Sending reply (%d) to client (%s)", id, client->id);
switch (client->kind) {
case CRM_CLIENT_IPC:
return crm_ipcs_send(client, id, reply, FALSE);
#ifdef ENABLE_PCMK_REMOTE
case CRM_CLIENT_TLS:
return lrmd_tls_send_msg(client->remote, reply, id, "reply");
#endif
default:
crm_err("Could not send reply: unknown client type %d",
client->kind);
}
return -ENOTCONN;
}
int
lrmd_server_send_notify(crm_client_t * client, xmlNode * msg)
{
crm_trace("Sending notification to client (%s)", client->id);
switch (client->kind) {
case CRM_CLIENT_IPC:
if (client->ipcs == NULL) {
crm_trace("Could not notify local client: disconnected");
return -ENOTCONN;
}
return crm_ipcs_send(client, 0, msg, crm_ipc_server_event);
#ifdef ENABLE_PCMK_REMOTE
case CRM_CLIENT_TLS:
if (client->remote == NULL) {
crm_trace("Could not notify remote client: disconnected");
return -ENOTCONN;
}
return lrmd_tls_send_msg(client->remote, msg, 0, "notify");
#endif
default:
crm_err("Could not notify client: unknown type %d", client->kind);
}
return -ENOTCONN;
}
/*!
* \internal
* \brief Clean up and exit immediately
*
* \param[in] data Ignored
*
* \return Doesn't return
* \note This can be used as a timer callback.
*/
static gboolean
lrmd_exit(gpointer data)
{
crm_info("Terminating with %d clients",
crm_hash_table_size(client_connections));
if (stonith_api) {
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
stonith_api->cmds->disconnect(stonith_api);
stonith_api_delete(stonith_api);
}
if (ipcs) {
mainloop_del_ipc_server(ipcs);
}
#ifdef ENABLE_PCMK_REMOTE
lrmd_tls_server_destroy();
ipc_proxy_cleanup();
#endif
crm_client_cleanup();
g_hash_table_destroy(rsc_list);
if (mainloop) {
lrmd_drain_alerts(mainloop);
}
crm_exit(CRM_EX_OK);
return FALSE;
}
/*!
* \internal
* \brief Request cluster shutdown if appropriate, otherwise exit immediately
*
* \param[in] nsig Signal that caused invocation (ignored)
*/
static void
lrmd_shutdown(int nsig)
{
#ifdef ENABLE_PCMK_REMOTE
crm_client_t *ipc_proxy = ipc_proxy_get_provider();
/* If there are active proxied IPC providers, then we may be running
* resources, so notify the cluster that we wish to shut down.
*/
if (ipc_proxy) {
if (shutting_down) {
crm_notice("Waiting for cluster to stop resources before exiting");
return;
}
crm_info("Sending shutdown request to cluster");
if (ipc_proxy_shutdown_req(ipc_proxy) < 0) {
crm_crit("Shutdown request failed, exiting immediately");
} else {
/* We requested a shutdown. Now, we need to wait for an
* acknowledgement from the proxy host (which ensures the proxy host
* supports shutdown requests), then wait for all proxy hosts to
* disconnect (which ensures that all resources have been stopped).
*/
shutting_down = TRUE;
/* Stop accepting new proxy connections */
lrmd_tls_server_destroy();
/* Older controller versions will never acknowledge our request, so
* set a fairly short timeout to exit quickly in that case. If we
* get the ack, we'll defuse this timer.
*/
shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL);
/* Currently, we let the OS kill us if the clients don't disconnect
* in a reasonable time. We could instead set a long timer here
* (shorter than what the OS is likely to use) and exit immediately
* if it pops.
*/
return;
}
}
#endif
lrmd_exit(NULL);
}
/*!
* \internal
* \brief Defuse short exit timer if shutting down
*/
void handle_shutdown_ack()
{
#ifdef ENABLE_PCMK_REMOTE
if (shutting_down) {
crm_info("Received shutdown ack");
if (shutdown_ack_timer > 0) {
g_source_remove(shutdown_ack_timer);
shutdown_ack_timer = 0;
}
return;
}
#endif
crm_debug("Ignoring unexpected shutdown ack");
}
/*!
* \internal
* \brief Make short exit timer fire immediately
*/
void handle_shutdown_nack()
{
#ifdef ENABLE_PCMK_REMOTE
if (shutting_down) {
crm_info("Received shutdown nack");
if (shutdown_ack_timer > 0) {
g_source_remove(shutdown_ack_timer);
shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL);
}
return;
}
#endif
crm_debug("Ignoring unexpected shutdown nack");
}
static pid_t main_pid = 0;
static void
sigdone(void)
{
exit(CRM_EX_OK);
}
static void
sigreap(void)
{
pid_t pid = 0;
int status;
do {
/*
* Opinions seem to differ as to what to put here:
* -1, any child process
* 0, any child process whose process group ID is equal to that of the calling process
*/
pid = waitpid(-1, &status, WNOHANG);
if(pid == main_pid) {
/* Exit when pacemaker-remote exits and use the same return code */
if (WIFEXITED(status)) {
exit(WEXITSTATUS(status));
}
exit(CRM_EX_ERROR);
}
} while (pid > 0);
}
static struct {
int sig;
void (*handler)(void);
} sigmap[] = {
{ SIGCHLD, sigreap },
{ SIGINT, sigdone },
};
static void spawn_pidone(int argc, char **argv, char **envp)
{
sigset_t set;
if (getpid() != 1) {
return;
}
sigfillset(&set);
sigprocmask(SIG_BLOCK, &set, 0);
main_pid = fork();
switch (main_pid) {
case 0:
sigprocmask(SIG_UNBLOCK, &set, NULL);
setsid();
setpgid(0, 0);
/* Child remains as pacemaker-remoted */
return;
case -1:
perror("fork");
}
/* Parent becomes the reaper of zombie processes */
/* Safe to initialize logging now if needed */
#ifdef HAVE___PROGNAME
/* Differentiate ourselves in the 'ps' output */
{
char *p;
int i, maxlen;
char *LastArgv = NULL;
const char *name = "pcmk-init";
for(i = 0; i < argc; i++) {
if(!i || (LastArgv + 1 == argv[i]))
LastArgv = argv[i] + strlen(argv[i]);
}
for(i = 0; envp[i] != NULL; i++) {
if((LastArgv + 1) == envp[i]) {
LastArgv = envp[i] + strlen(envp[i]);
}
}
maxlen = (LastArgv - argv[0]) - 2;
i = strlen(name);
/* We can overwrite individual argv[] arguments */
snprintf(argv[0], maxlen, "%s", name);
/* Now zero out everything else */
p = &argv[0][i];
while(p < LastArgv)
*p++ = '\0';
argv[1] = NULL;
}
#endif /* HAVE___PROGNAME */
while (1) {
int sig;
size_t i;
sigwait(&set, &sig);
for (i = 0; i < DIMOF(sigmap); i++) {
if (sigmap[i].sig == sig) {
sigmap[i].handler();
break;
}
}
}
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"logfile", 1, 0, 'l', "\tSend logs to the additional named logfile"},
#ifdef ENABLE_PCMK_REMOTE
{"port", 1, 0, 'p', "\tPort to listen on"},
#endif
{0, 0, 0, 0}
};
/* *INDENT-ON* */
#ifdef ENABLE_PCMK_REMOTE
# define EXECD_TYPE "remote"
#else
# define EXECD_TYPE "local"
#endif
int
main(int argc, char **argv, char **envp)
{
int flag = 0;
int index = 0;
int bump_log_num = 0;
const char *option = NULL;
/* If necessary, create PID1 now before any FDs are opened */
spawn_pidone(argc, argv, envp);
#ifndef ENABLE_PCMK_REMOTE
crm_log_preinit("pacemaker-execd", argc, argv);
crm_set_options(NULL, "[options]", long_options,
"Resource agent executor daemon for cluster nodes");
#else
crm_log_preinit("pacemaker-remoted", argc, argv);
crm_set_options(NULL, "[options]", long_options,
"Resource agent executor daemon for Pacemaker Remote nodes");
#endif
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1) {
break;
}
switch (flag) {
case 'l':
crm_add_logfile(optarg);
break;
case 'p':
setenv("PCMK_remote_port", optarg, 1);
break;
case 'V':
bump_log_num++;
break;
case '?':
case '$':
crm_help(flag, CRM_EX_OK);
break;
default:
crm_help('?', CRM_EX_USAGE);
break;
}
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
while (bump_log_num > 0) {
crm_bump_log_level(argc, argv);
bump_log_num--;
}
option = daemon_option("logfacility");
if(option && safe_str_neq(option, "none")) {
setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */
}
option = daemon_option("logfile");
if(option && safe_str_neq(option, "none")) {
setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */
if (daemon_option_enabled(crm_system_name, "debug")) {
setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */
}
}
crm_notice("Starting Pacemaker " EXECD_TYPE " executor");
/* The presence of this variable allegedly controls whether child
* processes like httpd will try and use Systemd's sd_notify
* API
*/
unsetenv("NOTIFY_SOCKET");
/* Used by RAs - Leave owned by root */
crm_build_path(CRM_RSCTMP_DIR, 0755);
rsc_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_rsc);
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
crm_exit(CRM_EX_FATAL);
}
#ifdef ENABLE_PCMK_REMOTE
if (lrmd_init_remote_tls_server() < 0) {
crm_err("Failed to create TLS listener: shutting down and staying down");
crm_exit(CRM_EX_FATAL);
}
ipc_proxy_init();
#endif
mainloop_add_signal(SIGTERM, lrmd_shutdown);
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections");
g_main_loop_run(mainloop);
/* should never get here */
lrmd_exit(NULL);
return CRM_EX_OK;
}
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
index c5ce1ab01a..4552fc1178 100644
--- a/daemons/fenced/cts-fence-helper.c
+++ b/daemons/fenced/cts-fence-helper.c
@@ -1,658 +1,654 @@
/*
- * Copyright 2009-2018 Andrew Beekhof
+ * Copyright 2009-2019 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static GMainLoop *mainloop = NULL;
static crm_trigger_t *trig = NULL;
static int mainloop_iter = 0;
static int callback_rc = 0;
typedef void (*mainloop_test_iteration_cb) (int check_event);
#define MAINLOOP_DEFAULT_TIMEOUT 2
#define mainloop_test_done(pass) \
if (pass) { \
crm_info("SUCCESS - %s", __FUNCTION__); \
mainloop_iter++; \
mainloop_set_trigger(trig); \
} else { \
crm_err("FAILURE = %s async_callback %d", __FUNCTION__, callback_rc); \
crm_exit(CRM_EX_ERROR); \
} \
callback_rc = 0; \
/* *INDENT-OFF* */
enum test_modes {
test_standard = 0, // test using a specific developer environment
test_passive, // watch notifications only
test_api_sanity, // sanity-test stonith client API using fence_dummy
test_api_mainloop, // sanity-test mainloop code with async responses
};
static struct crm_option long_options[] = {
{"verbose", 0, 0, 'V'},
{"version", 0, 0, '$'},
{"help", 0, 0, '?'},
{"passive", 0, 0, 'p'},
{"api_test", 0, 0, 't'},
{"mainloop_api_test", 0, 0, 'm'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static stonith_t *st = NULL;
static struct pollfd pollfd;
static int st_opts = st_opt_sync_call;
static int expected_notifications = 0;
static int verbose = 0;
static void
dispatch_helper(int timeout)
{
int rc;
crm_debug("Looking for notification");
pollfd.events = POLLIN;
while (true) {
rc = poll(&pollfd, 1, timeout); /* wait 10 minutes, -1 forever */
if (rc > 0) {
if (!stonith_dispatch(st)) {
break;
}
} else {
break;
}
}
}
static void
st_callback(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
crm_exit(CRM_EX_DISCONNECT);
}
crm_notice("Operation %s requested by %s %s for peer %s. %s reported: %s (ref=%s)",
e->operation, e->origin, e->result == pcmk_ok ? "completed" : "failed",
e->target, e->executioner ? e->executioner : "",
pcmk_strerror(e->result), e->id);
if (expected_notifications) {
expected_notifications--;
}
}
static void
st_global_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
crm_notice("Call id %d completed with rc %d", data->call_id, data->rc);
}
static void
passive_test(void)
{
int rc = 0;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
- crm_debug("Connect: %d", rc);
-
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback);
st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback",
st_global_callback);
dispatch_helper(600 * 1000);
}
#define single_test(cmd, str, num_notifications, expected_rc) \
{ \
int rc = 0; \
rc = cmd; \
expected_notifications = 0; \
if (num_notifications) { \
expected_notifications = num_notifications; \
dispatch_helper(500); \
} \
if (rc != expected_rc) { \
crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s", expected_rc, rc, pcmk_strerror(rc), str); \
crm_exit(CRM_EX_ERROR); \
} else if (expected_notifications) { \
crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s", \
num_notifications, num_notifications - expected_notifications, str); \
crm_exit(CRM_EX_ERROR); \
} else { \
if (verbose) { \
crm_info("SUCCESS - %s: %d", str, rc); \
} else { \
crm_debug("SUCCESS - %s: %d", str, rc); \
} \
} \
}\
static void
run_fence_failure_test(void)
{
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2 false_1_node2=3,4");
params = stonith_key_value_add(params, "mode", "fail");
single_test(st->
cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params),
"Register device1 for failure test", 1, 0);
single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0),
"Fence failure results off", 1, -pcmk_err_generic);
single_test(st->cmds->fence(st, st_opts, "false_1_node2", "reboot", 3, 0),
"Fence failure results reboot", 1, -pcmk_err_generic);
single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
"Remove device1 for failure test", 1, 0);
stonith_key_value_freeall(params, 1, 1);
}
static void
run_fence_failure_rollover_test(void)
{
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2 false_1_node2=3,4");
params = stonith_key_value_add(params, "mode", "fail");
single_test(st->
cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params),
"Register device1 for rollover test", 1, 0);
stonith_key_value_freeall(params, 1, 1);
params = NULL;
params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2 false_1_node2=3,4");
params = stonith_key_value_add(params, "mode", "pass");
single_test(st->
cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params),
"Register device2 for rollover test", 1, 0);
single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0),
"Fence rollover results off", 1, 0);
/* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */
single_test(st->cmds->fence(st, st_opts, "false_1_node2", "on", 3, 0),
"Fence rollover results on", 1, -ENODEV);
single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
"Remove device1 for rollover tests", 1, 0);
single_test(st->cmds->remove_device(st, st_opts, "test-id2"),
"Remove device2 for rollover tests", 1, 0);
stonith_key_value_freeall(params, 1, 1);
}
static void
run_standard_test(void)
{
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2 false_1_node2=3,4");
params = stonith_key_value_add(params, "mode", "pass");
params = stonith_key_value_add(params, "mock_dynamic_hosts", "false_1_node1 false_1_node2");
single_test(st->
cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_dummy", params),
"Register", 1, 0);
stonith_key_value_freeall(params, 1, 1);
params = NULL;
single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), "list", 1, 0);
single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 1, 0);
single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node2", 1),
"Status false_1_node2", 1, 0);
single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1),
"Status false_1_node1", 1, 0);
single_test(st->cmds->fence(st, st_opts, "unknown-host", "off", 1, 0),
"Fence unknown-host (expected failure)", 0, -ENODEV);
single_test(st->cmds->fence(st, st_opts, "false_1_node1", "off", 1, 0),
"Fence false_1_node1", 1, 0);
/* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */
single_test(st->cmds->fence(st, st_opts, "false_1_node1", "on", 1, 0),
"Unfence false_1_node1", 1, -ENODEV);
/* Confirm that an invalid level index is rejected */
single_test(st->cmds->register_level(st, st_opts, "node1", 999, params),
"Attempt to register an invalid level index", 0, -EINVAL);
single_test(st->cmds->remove_device(st, st_opts, "test-id"), "Remove test-id", 1, 0);
stonith_key_value_freeall(params, 1, 1);
}
static void
sanity_tests(void)
{
int rc = 0;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
- crm_debug("Connect: %d", rc);
-
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback);
st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback",
st_global_callback);
crm_info("Starting API Sanity Tests");
run_standard_test();
run_fence_failure_test();
run_fence_failure_rollover_test();
crm_info("Sanity Tests Passed");
}
static void
standard_dev_test(void)
{
int rc = 0;
char *tmp = NULL;
stonith_key_value_t *params = NULL;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
- crm_debug("Connect: %d", rc);
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
params = stonith_key_value_add(params, "pcmk_host_map", "some-host=pcmk-7 true_1_node1=3,4");
rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_xvm", params);
crm_debug("Register: %d", rc);
rc = st->cmds->list(st, st_opts, "test-id", &tmp, 10);
crm_debug("List: %d output: %s", rc, tmp ? tmp : "");
rc = st->cmds->monitor(st, st_opts, "test-id", 10);
crm_debug("Monitor: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node2", 10);
crm_debug("Status false_1_node2: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "unknown-host", "off", 60, 0);
crm_debug("Fence unknown-host: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", "off", 60, 0);
crm_debug("Fence false_1_node1: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0);
crm_debug("Unfence false_1_node1: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "some-host", "off", 10, 0);
crm_debug("Fence alias: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10);
crm_debug("Status alias: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0);
crm_debug("Unfence false_1_node1: %d", rc);
rc = st->cmds->remove_device(st, st_opts, "test-id");
crm_debug("Remove test-id: %d", rc);
stonith_key_value_freeall(params, 1, 1);
}
static void
iterate_mainloop_tests(gboolean event_ready);
static void
mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
callback_rc = data->rc;
iterate_mainloop_tests(TRUE);
}
static int
register_callback_helper(int callid)
{
return st->cmds->register_callback(st,
callid,
MAINLOOP_DEFAULT_TIMEOUT,
st_opt_timeout_updates, NULL, "callback", mainloop_callback);
}
static void
test_async_fence_pass(int check_event)
{
int rc = 0;
if (check_event) {
if (callback_rc != 0) {
mainloop_test_done(FALSE);
} else {
mainloop_test_done(TRUE);
}
return;
}
rc = st->cmds->fence(st, 0, "true_1_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(FALSE);
}
register_callback_helper(rc);
/* wait for event */
}
#define CUSTOM_TIMEOUT_ADDITION 10
static void
test_async_fence_custom_timeout(int check_event)
{
int rc = 0;
static time_t begin = 0;
if (check_event) {
uint32_t diff = (time(NULL) - begin);
if (callback_rc != -ETIME) {
mainloop_test_done(FALSE);
} else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) {
crm_err
("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d",
CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff);
mainloop_test_done(FALSE);
} else {
mainloop_test_done(TRUE);
}
return;
}
begin = time(NULL);
rc = st->cmds->fence(st, 0, "custom_timeout_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(FALSE);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_async_fence_timeout(int check_event)
{
int rc = 0;
if (check_event) {
if (callback_rc != -ENODEV) {
mainloop_test_done(FALSE);
} else {
mainloop_test_done(TRUE);
}
return;
}
rc = st->cmds->fence(st, 0, "false_1_node2", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(FALSE);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_async_monitor(int check_event)
{
int rc = 0;
if (check_event) {
if (callback_rc) {
mainloop_test_done(FALSE);
} else {
mainloop_test_done(TRUE);
}
return;
}
rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT);
if (rc < 0) {
crm_err("monitor failed with rc %d", rc);
mainloop_test_done(FALSE);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_register_async_devices(int check_event)
{
char buf[16] = { 0, };
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2");
params = stonith_key_value_add(params, "mode", "fail");
st->cmds->register_device(st, st_opts, "false_1", "stonith-ng", "fence_dummy", params);
stonith_key_value_freeall(params, 1, 1);
params = NULL;
params = stonith_key_value_add(params, "pcmk_host_map", "true_1_node1=1,2");
params = stonith_key_value_add(params, "mode", "pass");
st->cmds->register_device(st, st_opts, "true_1", "stonith-ng", "fence_dummy", params);
stonith_key_value_freeall(params, 1, 1);
params = NULL;
params = stonith_key_value_add(params, "pcmk_host_map", "custom_timeout_node1=1,2");
params = stonith_key_value_add(params, "mode", "fail");
params = stonith_key_value_add(params, "delay", "1000");
snprintf(buf, sizeof(buf) - 1, "%d", MAINLOOP_DEFAULT_TIMEOUT + CUSTOM_TIMEOUT_ADDITION);
params = stonith_key_value_add(params, "pcmk_off_timeout", buf);
st->cmds->register_device(st, st_opts, "false_custom_timeout", "stonith-ng", "fence_dummy",
params);
stonith_key_value_freeall(params, 1, 1);
mainloop_test_done(TRUE);
}
static void
try_mainloop_connect(int check_event)
{
- int tries = 10;
- int i = 0;
- int rc = 0;
+ int rc = stonith_api_connect_retry(st, crm_system_name, 10);
- for (i = 0; i < tries; i++) {
- rc = st->cmds->connect(st, crm_system_name, NULL);
-
- if (!rc) {
- crm_info("stonith client connection established");
- mainloop_test_done(TRUE);
- return;
- } else {
- crm_info("stonith client connection failed");
- }
- sleep(1);
+ if (rc == pcmk_ok) {
+ mainloop_test_done(TRUE);
+ return;
}
-
crm_err("API CONNECTION FAILURE");
mainloop_test_done(FALSE);
}
static void
iterate_mainloop_tests(gboolean event_ready)
{
static mainloop_test_iteration_cb callbacks[] = {
try_mainloop_connect,
test_register_async_devices,
test_async_monitor,
test_async_fence_pass,
test_async_fence_timeout,
test_async_fence_custom_timeout,
};
if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) {
/* all tests ran, everything passed */
crm_info("ALL MAINLOOP TESTS PASSED!");
crm_exit(CRM_EX_OK);
}
callbacks[mainloop_iter] (event_ready);
}
static gboolean
trigger_iterate_mainloop_tests(gpointer user_data)
{
iterate_mainloop_tests(FALSE);
return TRUE;
}
static void
test_shutdown(int nsig)
{
int rc = 0;
if (st) {
rc = st->cmds->disconnect(st);
crm_info("Disconnect: %d", rc);
crm_debug("Destroy");
stonith_api_delete(st);
}
if (rc) {
crm_exit(CRM_EX_ERROR);
}
}
static void
mainloop_tests(void)
{
trig = mainloop_add_trigger(G_PRIORITY_HIGH, trigger_iterate_mainloop_tests, NULL);
mainloop_set_trigger(trig);
mainloop_add_signal(SIGTERM, test_shutdown);
crm_info("Starting");
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
}
int
main(int argc, char **argv)
{
int argerr = 0;
int flag;
int option_index = 0;
enum test_modes mode = test_standard;
crm_log_cli_init("cts-fence-helper");
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1) {
break;
}
switch (flag) {
case 'V':
verbose = 1;
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
case 'p':
mode = test_passive;
break;
case 't':
mode = test_api_sanity;
break;
case 'm':
mode = test_api_mainloop;
break;
default:
++argerr;
break;
}
}
crm_log_init(NULL, LOG_INFO, TRUE, (verbose? TRUE : FALSE), argc, argv,
FALSE);
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
crm_debug("Create");
st = stonith_api_new();
switch (mode) {
case test_standard:
standard_dev_test();
break;
case test_passive:
passive_test();
break;
case test_api_sanity:
sanity_tests();
break;
case test_api_mainloop:
mainloop_tests();
break;
}
test_shutdown(0);
return CRM_EX_OK;
}
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
index b7365a9700..b6407324df 100644
--- a/include/crm/stonith-ng.h
+++ b/include/crm/stonith-ng.h
@@ -1,550 +1,554 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Fencing aka. STONITH
* \ingroup fencing
*/
#ifndef STONITH_NG__H
# define STONITH_NG__H
# include
# include
# include // bool
# include // uint32_t
# include // time_t
# define T_STONITH_NOTIFY_DISCONNECT "st_notify_disconnect"
# define T_STONITH_NOTIFY_FENCE "st_notify_fence"
# define T_STONITH_NOTIFY_HISTORY "st_notify_history"
/* *INDENT-OFF* */
enum stonith_state {
stonith_connected_command,
stonith_connected_query,
stonith_disconnected,
};
enum stonith_call_options {
st_opt_none = 0x00000000,
st_opt_verbose = 0x00000001,
st_opt_allow_suicide = 0x00000002,
st_opt_manual_ack = 0x00000008,
st_opt_discard_reply = 0x00000010,
/* st_opt_all_replies = 0x00000020, */
st_opt_topology = 0x00000040,
st_opt_scope_local = 0x00000100,
st_opt_cs_nodeid = 0x00000200,
st_opt_sync_call = 0x00001000,
/*! Allow the timeout period for a callback to be adjusted
* based on the time the server reports the operation will take. */
st_opt_timeout_updates = 0x00002000,
/*! Only report back if operation is a success in callback */
st_opt_report_only_success = 0x00004000,
/* used where ever apropriate - e.g. cleanup of history */
st_opt_cleanup = 0x000080000,
/* used where ever apropriate - e.g. send out a history query to all nodes */
st_opt_broadcast = 0x000100000,
};
/*! Order matters here, do not change values */
enum op_state
{
st_query,
st_exec,
st_done,
st_duplicate,
st_failed,
};
// Supported fence agent interface standards
enum stonith_namespace {
st_namespace_invalid,
st_namespace_any,
st_namespace_internal, // Implemented internally by Pacemaker
/* Neither of these projects are active any longer, but the fence agent
* interfaces they created are still in use and supported by Pacemaker.
*/
st_namespace_rhcs, // Red Hat Cluster Suite compatible
st_namespace_lha, // Linux-HA compatible
};
enum stonith_namespace stonith_text2namespace(const char *namespace_s);
const char *stonith_namespace2text(enum stonith_namespace st_namespace);
enum stonith_namespace stonith_get_namespace(const char *agent,
const char *namespace_s);
typedef struct stonith_key_value_s {
char *key;
char *value;
struct stonith_key_value_s *next;
} stonith_key_value_t;
typedef struct stonith_history_s {
char *target;
char *action;
char *origin;
char *delegate;
char *client;
int state;
time_t completed;
struct stonith_history_s *next;
} stonith_history_t;
typedef struct stonith_s stonith_t;
typedef struct stonith_event_s
{
char *id;
char *type;
char *message;
char *operation;
int result;
char *origin;
char *target;
char *action;
char *executioner;
char *device;
/*! The name of the client that initiated the action. */
char *client_origin;
} stonith_event_t;
typedef struct stonith_callback_data_s
{
int rc;
int call_id;
void *userdata;
} stonith_callback_data_t;
typedef struct stonith_api_operations_s
{
/*!
* \brief Destroy the stonith api structure.
*/
int (*free) (stonith_t *st);
/*!
* \brief Connect to the local stonith daemon.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*connect) (stonith_t *st, const char *name, int *stonith_fd);
/*!
* \brief Disconnect from the local stonith daemon.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*disconnect)(stonith_t *st);
/*!
* \brief Remove a registered stonith device with the local stonith daemon.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*remove_device)(
stonith_t *st, int options, const char *name);
/*!
* \brief Register a stonith device with the local stonith daemon.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_device)(
stonith_t *st, int options, const char *id,
const char *provider, const char *agent, stonith_key_value_t *params);
/*!
* \brief Remove a fencing level for a specific node.
*
* \note This feature is not available when stonith is in standalone mode.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*remove_level)(
stonith_t *st, int options, const char *node, int level);
/*!
* \brief Register a fencing level containing the fencing devices to be used
* at that level for a specific node.
*
* \note This feature is not available when stonith is in standalone mode.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_level)(
stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
/*!
* \brief Get the metadata documentation for a resource.
*
* \note Value is returned in output. Output must be freed when set.
*
* \retval 0 success
* \retval negative error code on failure
*/
int (*metadata)(stonith_t *st, int options,
const char *device, const char *provider, char **output, int timeout);
/*!
* \brief Retrieve a list of installed stonith agents
*
* \note if provider is not provided, all known agents will be returned
* \note list must be freed using stonith_key_value_freeall()
* \note call_options parameter is not used, it is reserved for future use.
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_agents)(stonith_t *stonith, int call_options, const char *provider,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Retrieve string listing hosts and port assignments from a local stonith device.
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*list)(stonith_t *st, int options, const char *id, char **list_output, int timeout);
/*!
* \brief Check to see if a local stonith device is reachable
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*monitor)(stonith_t *st, int options, const char *id, int timeout);
/*!
* \brief Check to see if a local stonith device's port is reachable
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*status)(stonith_t *st, int options, const char *id, const char *port, int timeout);
/*!
* \brief Retrieve a list of registered stonith devices.
*
* \note If node is provided, only devices that can fence the node id
* will be returned.
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*query)(stonith_t *st, int options, const char *node,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Issue a fencing action against a node.
*
* \note Possible actions are, 'on', 'off', and 'reboot'.
*
* \param st, stonith connection
* \param options, call options
* \param node, The target node to fence
* \param action, The fencing action to take
* \param timeout, The default per device timeout to use with each device
* capable of fencing the target.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*fence)(stonith_t *st, int options, const char *node, const char *action,
int timeout, int tolerance);
/*!
* \brief Manually confirm that a node is down.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*confirm)(stonith_t *st, int options, const char *node);
/*!
* \brief Retrieve a list of fencing operations that have occurred for a specific node.
*
* \note History is not available in standalone mode.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*history)(stonith_t *st, int options, const char *node, stonith_history_t **output, int timeout);
int (*register_notification)(
stonith_t *st, const char *event,
void (*notify)(stonith_t *st, stonith_event_t *e));
int (*remove_notification)(stonith_t *st, const char *event);
/*!
* \brief Register a callback to receive the result of an asynchronous call
*
* \param[in] call_id The call ID to register callback for
* \param[in] timeout Default time to wait until callback expires
* \param[in] options Bitmask of \c stonith_call_options (respects
* \c st_opt_timeout_updates and
* \c st_opt_report_only_success)
* \param[in] userdata Pointer that will be given to callback
* \param[in] callback_name Unique name to identify callback
* \param[in] callback The callback function to register
*
* \return \c TRUE on success, \c FALSE if call_id is negative, -errno otherwise
*
* \todo This function should return \c pcmk_ok on success, and \c call_id
* when negative, but that would break backward compatibility.
*/
int (*register_callback)(stonith_t *st,
int call_id,
int timeout,
int options,
void *userdata,
const char *callback_name,
void (*callback)(stonith_t *st, stonith_callback_data_t *data));
/*!
* \brief Remove a registered callback for a given call id.
*/
int (*remove_callback)(stonith_t *st, int call_id, bool all_callbacks);
/*!
* \brief Remove fencing level for specific node, node regex or attribute
*
* \param[in] st Fencer connection to use
* \param[in] options Bitmask of stonith_call_options to pass to the fencer
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to remove
*
* \return 0 on success, negative error code otherwise
*
* \note This feature is not available when stonith is in standalone mode.
* The caller should set only one of node, pattern or attr/value.
*/
int (*remove_level_full)(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level);
/*!
* \brief Register fencing level for specific node, node regex or attribute
*
* \param[in] st Fencer connection to use
* \param[in] options Bitmask of stonith_call_options to pass to fencer
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to add
* \param[in] device_list Devices to use in level
*
* \return 0 on success, negative error code otherwise
*
* \note This feature is not available when stonith is in standalone mode.
* The caller should set only one of node, pattern or attr/value.
*/
int (*register_level_full)(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value,
int level, stonith_key_value_t *device_list);
/*!
* \brief Validate an arbitrary stonith device configuration
*
* \param[in] st Stonithd connection to use
* \param[in] call_options Bitmask of stonith_call_options to use with fencer
* \param[in] rsc_id ID used to replace CIB secrets in params
* \param[in] namespace_s Namespace of fence agent to validate (optional)
* \param[in] agent Fence agent to validate
* \param[in] params Configuration parameters to pass to fence agent
* \param[in] timeout Fail if no response within this many seconds
* \param[out] output If non-NULL, where to store any agent output
* \param[out] error_output If non-NULL, where to store agent error output
*
* \return pcmk_ok if validation succeeds, -errno otherwise
*
* \note If pcmk_ok is returned, the caller is responsible for freeing
* the output (if requested).
*/
int (*validate)(stonith_t *st, int call_options, const char *rsc_id,
const char *namespace_s, const char *agent,
stonith_key_value_t *params, int timeout, char **output,
char **error_output);
} stonith_api_operations_t;
struct stonith_s
{
enum stonith_state state;
int call_id;
int call_timeout;
void *st_private;
stonith_api_operations_t *cmds;
};
/* *INDENT-ON* */
/* Core functions */
stonith_t *stonith_api_new(void);
void stonith_api_delete(stonith_t * st);
void stonith_dump_pending_callbacks(stonith_t * st);
// deprecated (use stonith_get_namespace() instead)
const char *get_stonith_provider(const char *agent, const char *provider);
bool stonith_dispatch(stonith_t * st);
stonith_key_value_t *stonith_key_value_add(stonith_key_value_t * kvp, const char *key,
const char *value);
void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values);
void stonith_history_free(stonith_history_t *history);
+// Convenience functions
+int stonith_api_connect_retry(stonith_t *st, const char *name,
+ int max_attempts);
+
/* Basic helpers that allows nodes to be fenced and the history to be
* queried without mainloop or the caller understanding the full API
*
* At least one of nodeid and uname are required
*/
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off);
time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress);
/*
* Helpers for using the above functions without install-time dependencies
*
* Usage:
* #include
*
* To turn a node off by corosync nodeid:
* stonith_api_kick_helper(nodeid, 120, 1);
*
* To check the last fence date/time (also by nodeid):
* last = stonith_api_time_helper(nodeid, 0);
*
* To check if fencing is in progress:
* if(stonith_api_time_helper(nodeid, 1) > 0) { ... }
*
* eg.
#include
#include
#include
int
main(int argc, char ** argv)
{
int rc = 0;
int nodeid = 102;
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
rc = stonith_api_kick_helper(nodeid, 120, 1);
printf("%d fence result: %d\n", nodeid, rc);
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
return 0;
}
*/
# define STONITH_LIBRARY "libstonithd.so.26"
typedef int (*st_api_kick_fn) (int nodeid, const char *uname, int timeout, bool off);
typedef time_t (*st_api_time_fn) (int nodeid, const char *uname, bool in_progress);
static inline int
stonith_api_kick_helper(uint32_t nodeid, int timeout, bool off)
{
static void *st_library = NULL;
static st_api_kick_fn st_kick_fn;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_kick_fn == NULL) {
st_kick_fn = (st_api_kick_fn) dlsym(st_library, "stonith_api_kick");
}
if (st_kick_fn == NULL) {
#ifdef ELIBACC
return -ELIBACC;
#else
return -ENOSYS;
#endif
}
return (*st_kick_fn) (nodeid, NULL, timeout, off);
}
static inline time_t
stonith_api_time_helper(uint32_t nodeid, bool in_progress)
{
static void *st_library = NULL;
static st_api_time_fn st_time_fn;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_time_fn == NULL) {
st_time_fn = (st_api_time_fn) dlsym(st_library, "stonith_api_time");
}
if (st_time_fn == NULL) {
return 0;
}
return (*st_time_fn) (nodeid, NULL, in_progress);
}
/**
* Does the given agent describe a stonith resource that can exist?
*
* \param[in] agent What is the name of the agent?
* \param[in] timeout Timeout to use when querying. If 0 is given,
* use a default of 120.
*
* \return A boolean
*/
bool stonith_agent_exists(const char *agent, int timeout);
/*!
* \brief Turn stonith action into a more readable string.
*
* \param action Stonith action
*/
const char *stonith_action_str(const char *action);
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 270ef8d196..ceee94428b 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,2281 +1,2306 @@
/*
- * Copyright 2004-2018 Andrew Beekhof
+ * Copyright 2004-2019 the Pacemaker project contributors
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if SUPPORT_CIBSECRETS
# include
#endif
CRM_TRACE_INIT_DATA(stonith);
struct stonith_action_s {
/*! user defined data */
char *agent;
char *action;
char *victim;
GHashTable *args;
int timeout;
int async;
void *userdata;
void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data);
void (*fork_cb) (GPid pid, gpointer user_data);
svc_action_t *svc_action;
/*! internal timing information */
time_t initial_start_time;
int tries;
int remaining_timeout;
int max_retries;
/* device output data */
GPid pid;
int rc;
char *output;
char *error;
};
typedef struct stonith_private_s {
char *token;
crm_ipc_t *ipc;
mainloop_io_t *source;
GHashTable *stonith_op_callback_table;
GList *notify_list;
void (*op_callback) (stonith_t * st, stonith_callback_data_t * data);
} stonith_private_t;
typedef struct stonith_notify_client_s {
const char *event;
const char *obj_id; /* implement one day */
const char *obj_type; /* implement one day */
void (*notify) (stonith_t * st, stonith_event_t * e);
} stonith_notify_client_t;
typedef struct stonith_callback_client_s {
void (*callback) (stonith_t * st, stonith_callback_data_t * data);
const char *id;
void *user_data;
gboolean only_success;
gboolean allow_timeout_updates;
struct timer_rec_s *timer;
} stonith_callback_client_t;
struct notify_blob_s {
stonith_t *stonith;
xmlNode *xml;
};
struct timer_rec_s {
int call_id;
int timeout;
guint ref;
stonith_t *stonith;
};
typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *,
xmlNode *, xmlNode *, xmlNode **, xmlNode **);
bool stonith_dispatch(stonith_t * st);
xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data,
xmlNode ** output_data, int call_options, int timeout);
static void stonith_connection_destroy(gpointer user_data);
static void stonith_send_notification(gpointer data, gpointer user_data);
static int internal_stonith_action_execute(stonith_action_t * action);
static void log_action(stonith_action_t *action, pid_t pid);
/*!
* \brief Get agent namespace by name
*
* \param[in] namespace_s Name of namespace as string
*
* \return Namespace as enum value
*/
enum stonith_namespace
stonith_text2namespace(const char *namespace_s)
{
if ((namespace_s == NULL) || !strcmp(namespace_s, "any")) {
return st_namespace_any;
} else if (!strcmp(namespace_s, "redhat")
|| !strcmp(namespace_s, "stonith-ng")) {
return st_namespace_rhcs;
} else if (!strcmp(namespace_s, "internal")) {
return st_namespace_internal;
} else if (!strcmp(namespace_s, "heartbeat")) {
return st_namespace_lha;
}
return st_namespace_invalid;
}
/*!
* \brief Get agent namespace name
*
* \param[in] namespace Namespace as enum value
*
* \return Namespace name as string
*/
const char *
stonith_namespace2text(enum stonith_namespace st_namespace)
{
switch (st_namespace) {
case st_namespace_any: return "any";
case st_namespace_rhcs: return "stonith-ng";
case st_namespace_internal: return "internal";
case st_namespace_lha: return "heartbeat";
default: break;
}
return "unsupported";
}
/*!
* \brief Determine namespace of a fence agent
*
* \param[in] agent Fence agent type
* \param[in] namespace_s Name of agent namespace as string, if known
*
* \return Namespace of specified agent, as enum value
*/
enum stonith_namespace
stonith_get_namespace(const char *agent, const char *namespace_s)
{
if (safe_str_eq(namespace_s, "internal")) {
return st_namespace_internal;
}
if (stonith__agent_is_rhcs(agent)) {
return st_namespace_rhcs;
}
#if HAVE_STONITH_STONITH_H
if (stonith__agent_is_lha(agent)) {
return st_namespace_lha;
}
#endif
crm_err("Unknown fence agent: %s", agent);
return st_namespace_invalid;
}
static void
log_action(stonith_action_t *action, pid_t pid)
{
if (action->output) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid);
crm_log_output(LOG_TRACE, prefix, action->output);
free(prefix);
}
if (action->error) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
crm_log_output(LOG_WARNING, prefix, action->error);
free(prefix);
}
}
static void
stonith_connection_destroy(gpointer user_data)
{
stonith_t *stonith = user_data;
stonith_private_t *native = NULL;
struct notify_blob_s blob;
crm_trace("Sending destroyed notification");
blob.stonith = stonith;
blob.xml = create_xml_node(NULL, "notify");
native = stonith->st_private;
native->ipc = NULL;
native->source = NULL;
stonith->state = stonith_disconnected;
crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT);
g_list_foreach(native->notify_list, stonith_send_notification, &blob);
free_xml(blob.xml);
}
xmlNode *
create_device_registration_xml(const char *id, enum stonith_namespace namespace,
const char *agent, stonith_key_value_t *params,
const char *rsc_provides)
{
xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE);
xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
#if HAVE_STONITH_STONITH_H
if (namespace == st_namespace_any) {
namespace = stonith_get_namespace(agent, NULL);
}
if (namespace == st_namespace_lha) {
hash2field((gpointer) "plugin", (gpointer) agent, args);
agent = "fence_legacy";
}
#endif
crm_xml_add(data, XML_ATTR_ID, id);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add(data, "agent", agent);
if ((namespace != st_namespace_any) && (namespace != st_namespace_invalid)) {
crm_xml_add(data, "namespace", stonith_namespace2text(namespace));
}
if (rsc_provides) {
crm_xml_add(data, "rsc_provides", rsc_provides);
}
for (; params; params = params->next) {
hash2field((gpointer) params->key, (gpointer) params->value, args);
}
return data;
}
static int
stonith_api_register_device(stonith_t * st, int call_options,
const char *id, const char *namespace, const char *agent,
stonith_key_value_t * params)
{
int rc = 0;
xmlNode *data = NULL;
data = create_device_registration_xml(id, stonith_text2namespace(namespace),
agent, params, NULL);
rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_device(stonith_t * st, int call_options, const char *name)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add(data, XML_ATTR_ID, name);
rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_level_full(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level)
{
int rc = 0;
xmlNode *data = NULL;
CRM_CHECK(node || pattern || (attr && value), return -EINVAL);
data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
if (node) {
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
} else if (pattern) {
crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern);
} else {
crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr);
crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value);
}
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_level(stonith_t * st, int options, const char *node, int level)
{
return stonith_api_remove_level_full(st, options, node,
NULL, NULL, NULL, level);
}
/*!
* \internal
* \brief Create XML for fence topology level registration request
*
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to register
* \param[in] device_list List of devices in level
*
* \return Newly allocated XML tree on success, NULL otherwise
*
* \note The caller should set only one of node, pattern or attr/value.
*/
xmlNode *
create_level_registration_xml(const char *node, const char *pattern,
const char *attr, const char *value,
int level, stonith_key_value_t *device_list)
{
int len = 0;
char *list = NULL;
xmlNode *data;
CRM_CHECK(node || pattern || (attr && value), return NULL);
data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
CRM_CHECK(data, return NULL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_ID, level);
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
if (node) {
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
} else if (pattern) {
crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern);
} else {
crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr);
crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value);
}
for (; device_list; device_list = device_list->next) {
int adding = strlen(device_list->value);
if(list) {
adding++; /* +1 space */
}
crm_trace("Adding %s (%dc) at offset %d", device_list->value, adding, len);
list = realloc_safe(list, len + adding + 1); /* +1 EOS */
if (list == NULL) {
crm_perror(LOG_CRIT, "Could not create device list");
free_xml(data);
return NULL;
}
sprintf(list + len, "%s%s", len?",":"", device_list->value);
len += adding;
}
crm_xml_add(data, XML_ATTR_STONITH_DEVICES, list);
free(list);
return data;
}
static int
stonith_api_register_level_full(stonith_t * st, int options, const char *node,
const char *pattern,
const char *attr, const char *value,
int level, stonith_key_value_t *device_list)
{
int rc = 0;
xmlNode *data = create_level_registration_xml(node, pattern, attr, value,
level, device_list);
CRM_CHECK(data != NULL, return -EINVAL);
rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_register_level(stonith_t * st, int options, const char *node, int level,
stonith_key_value_t * device_list)
{
return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL,
level, device_list);
}
static void
append_arg(const char *key, const char *value, GHashTable **args)
{
CRM_CHECK(key != NULL, return);
CRM_CHECK(value != NULL, return);
CRM_CHECK(args != NULL, return);
if (strstr(key, "pcmk_")) {
return;
} else if (strstr(key, CRM_META)) {
return;
} else if (safe_str_eq(key, "crm_feature_set")) {
return;
}
if (!*args) {
*args = crm_str_table_new();
}
CRM_CHECK(*args != NULL, return);
crm_trace("Appending: %s=%s", key, value);
g_hash_table_replace(*args, strdup(key), strdup(value));
}
static void
append_config_arg(gpointer key, gpointer value, gpointer user_data)
{
/* The fencer will filter action out when it registers the device,
* but ignore it here just in case any other library callers
* fail to do so.
*/
if (safe_str_neq(key, STONITH_ATTR_ACTION_OP)) {
append_arg(key, value, user_data);
return;
}
}
static GHashTable *
make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args,
GHashTable * port_map)
{
char buffer[512];
GHashTable *arg_list = NULL;
const char *value = NULL;
CRM_CHECK(action != NULL, return NULL);
snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action);
if (device_args) {
value = g_hash_table_lookup(device_args, buffer);
}
if (value) {
crm_info("Substituting action '%s' for requested operation '%s'", value, action);
action = value;
}
append_arg(STONITH_ATTR_ACTION_OP, action, &arg_list);
if (victim && device_args) {
const char *alias = victim;
const char *param = g_hash_table_lookup(device_args, STONITH_ATTR_HOSTARG);
if (port_map && g_hash_table_lookup(port_map, victim)) {
alias = g_hash_table_lookup(port_map, victim);
}
/* Always supply the node's name, too:
* https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md
*/
append_arg("nodename", victim, &arg_list);
if (victim_nodeid) {
char nodeid_str[33] = { 0, };
if (snprintf(nodeid_str, 33, "%u", (unsigned int)victim_nodeid)) {
crm_info("For stonith action (%s) for victim %s, adding nodeid (%s) to parameters",
action, victim, nodeid_str);
append_arg("nodeid", nodeid_str, &arg_list);
}
}
/* Check if we need to supply the victim in any other form */
if(safe_str_eq(agent, "fence_legacy")) {
value = agent;
} else if (param == NULL) {
param = "port";
value = g_hash_table_lookup(device_args, param);
} else if (safe_str_eq(param, "none")) {
value = param; /* Nothing more to do */
} else {
value = g_hash_table_lookup(device_args, param);
}
/* Don't overwrite explictly set values for $param */
if (value == NULL || safe_str_eq(value, "dynamic")) {
crm_debug("Performing %s action for node '%s' as '%s=%s'", action, victim, param,
alias);
append_arg(param, alias, &arg_list);
}
}
if (device_args) {
g_hash_table_foreach(device_args, append_config_arg, &arg_list);
}
return arg_list;
}
/*!
* \internal
* \brief Free all memory used by a stonith action
*
* \param[in,out] action Action to free
*/
void
stonith__destroy_action(stonith_action_t *action)
{
if (action) {
free(action->agent);
if (action->args) {
g_hash_table_destroy(action->args);
}
free(action->action);
free(action->victim);
if (action->svc_action) {
services_action_free(action->svc_action);
}
free(action->output);
free(action->error);
free(action);
}
}
/*!
* \internal
* \brief Get the result of an executed stonith action
*
* \param[in,out] action Executed action
* \param[out] rc Where to store result code (or NULL)
* \param[out] output Where to store standard output (or NULL)
* \param[out] error_output Where to store standard error output (or NULL)
*
* \note If output or error_output is not NULL, the caller is responsible for
* freeing the memory.
*/
void
stonith__action_result(stonith_action_t *action, int *rc, char **output,
char **error_output)
{
if (rc) {
*rc = pcmk_ok;
}
if (output) {
*output = NULL;
}
if (error_output) {
*error_output = NULL;
}
if (action != NULL) {
if (rc) {
*rc = action->rc;
}
if (output && action->output) {
*output = action->output;
action->output = NULL; // hand off memory management to caller
}
if (error_output && action->error) {
*error_output = action->error;
action->error = NULL; // hand off memory management to caller
}
}
}
#define FAILURE_MAX_RETRIES 2
stonith_action_t *
stonith_action_create(const char *agent,
const char *_action,
const char *victim,
uint32_t victim_nodeid,
int timeout, GHashTable * device_args, GHashTable * port_map)
{
stonith_action_t *action;
action = calloc(1, sizeof(stonith_action_t));
action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map);
crm_debug("Preparing '%s' action for %s using agent %s",
_action, (victim? victim : "no target"), agent);
action->agent = strdup(agent);
action->action = strdup(_action);
if (victim) {
action->victim = strdup(victim);
}
action->timeout = action->remaining_timeout = timeout;
action->max_retries = FAILURE_MAX_RETRIES;
if (device_args) {
char buffer[512];
const char *value = NULL;
snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
value = g_hash_table_lookup(device_args, buffer);
if (value) {
action->max_retries = atoi(value);
}
}
return action;
}
static gboolean
update_remaining_timeout(stonith_action_t * action)
{
int diff = time(NULL) - action->initial_start_time;
if (action->tries >= action->max_retries) {
crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
action->agent, action->action, action->max_retries);
action->remaining_timeout = 0;
} else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) {
/* only set remaining timeout period if there is 30%
* or greater of the original timeout period left */
action->remaining_timeout = action->timeout - diff;
} else {
action->remaining_timeout = 0;
}
return action->remaining_timeout ? TRUE : FALSE;
}
static int
svc_action_to_errno(svc_action_t *svc_action) {
int rv = pcmk_ok;
if (svc_action->rc > 0) {
/* Try to provide a useful error code based on the fence agent's
* error output.
*/
if (svc_action->rc == PCMK_OCF_TIMEOUT) {
rv = -ETIME;
} else if (svc_action->stderr_data == NULL) {
rv = -ENODATA;
} else if (strstr(svc_action->stderr_data, "imed out")) {
/* Some agents have their own internal timeouts */
rv = -ETIME;
} else if (strstr(svc_action->stderr_data, "Unrecognised action")) {
rv = -EOPNOTSUPP;
} else {
rv = -pcmk_err_generic;
}
}
return rv;
}
static void
stonith_action_async_done(svc_action_t *svc_action)
{
stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
action->rc = svc_action_to_errno(svc_action);
action->output = svc_action->stdout_data;
svc_action->stdout_data = NULL;
action->error = svc_action->stderr_data;
svc_action->stderr_data = NULL;
svc_action->params = NULL;
crm_debug("Child process %d performing action '%s' exited with rc %d",
action->pid, action->action, svc_action->rc);
log_action(action, action->pid);
if (action->rc != pcmk_ok && update_remaining_timeout(action)) {
int rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
return;
}
}
if (action->done_cb) {
action->done_cb(action->pid, action->rc, action->output, action->userdata);
}
action->svc_action = NULL; // don't remove our caller
stonith__destroy_action(action);
}
static void
stonith_action_async_forked(svc_action_t *svc_action)
{
stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
action->pid = svc_action->pid;
action->svc_action = svc_action;
if (action->fork_cb) {
(action->fork_cb) (svc_action->pid, action->userdata);
}
crm_trace("Child process %d performing action '%s' successfully forked",
action->pid, action->action);
}
static int
internal_stonith_action_execute(stonith_action_t * action)
{
int rc = -EPROTO;
int is_retry = 0;
svc_action_t *svc_action = NULL;
static int stonith_sequence = 0;
char *buffer = NULL;
if (!action->tries) {
action->initial_start_time = time(NULL);
}
action->tries++;
if (action->tries > 1) {
crm_info("Attempt %d to execute %s (%s). remaining timeout is %d",
action->tries, action->agent, action->action, action->remaining_timeout);
is_retry = 1;
}
if (action->args == NULL || action->agent == NULL)
goto fail;
buffer = crm_strdup_printf(RH_STONITH_DIR "/%s", basename(action->agent));
svc_action = services_action_create_generic(buffer, NULL);
free(buffer);
svc_action->timeout = 1000 * action->remaining_timeout;
svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH);
svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent),
action->action, action->tries);
svc_action->agent = strdup(action->agent);
svc_action->sequence = stonith_sequence++;
svc_action->params = action->args;
svc_action->cb_data = (void *) action;
/* keep retries from executing out of control and free previous results */
if (is_retry) {
free(action->output);
action->output = NULL;
free(action->error);
action->error = NULL;
sleep(1);
}
if (action->async) {
/* async */
if(services_action_async_fork_notify(svc_action,
&stonith_action_async_done,
&stonith_action_async_forked) == FALSE) {
services_action_free(svc_action);
svc_action = NULL;
} else {
rc = 0;
}
} else {
/* sync */
if (services_action_sync(svc_action)) {
rc = 0;
action->rc = svc_action_to_errno(svc_action);
action->output = svc_action->stdout_data;
svc_action->stdout_data = NULL;
action->error = svc_action->stderr_data;
svc_action->stderr_data = NULL;
} else {
action->rc = -ECONNABORTED;
rc = action->rc;
}
svc_action->params = NULL;
services_action_free(svc_action);
}
fail:
return rc;
}
/*!
* \internal
* \brief Kick off execution of an async stonith action
*
* \param[in,out] action Action to be executed
* \param[in,out] userdata Datapointer to be passed to callbacks
* \param[in] done Callback to notify action has failed/succeeded
* \param[in] fork_callback Callback to notify successful fork of child
*
* \return pcmk_ok if ownership of action has been taken, -errno otherwise
*/
int
stonith_action_execute_async(stonith_action_t * action,
void *userdata,
void (*done) (GPid pid, int rc, const char *output,
gpointer user_data),
void (*fork_cb) (GPid pid, gpointer user_data))
{
if (!action) {
return -1;
}
action->userdata = userdata;
action->done_cb = done;
action->fork_cb = fork_cb;
action->async = 1;
return internal_stonith_action_execute(action);
}
/*!
* \internal
* \brief Execute a stonith action
*
* \param[in,out] action Action to execute
*
* \return pcmk_ok on success, -errno otherwise
*/
int
stonith__execute(stonith_action_t *action)
{
int rc = pcmk_ok;
CRM_CHECK(action != NULL, return -EINVAL);
// Keep trying until success, max retries, or timeout
do {
rc = internal_stonith_action_execute(action);
} while ((rc != pcmk_ok) && update_remaining_timeout(action));
return rc;
}
static int
stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace,
stonith_key_value_t ** devices, int timeout)
{
int count = 0;
enum stonith_namespace ns = stonith_text2namespace(namespace);
if (devices == NULL) {
crm_err("Parameter error: stonith_api_device_list");
return -EFAULT;
}
#if HAVE_STONITH_STONITH_H
// Include Linux-HA agents if requested
if ((ns == st_namespace_any) || (ns == st_namespace_lha)) {
count += stonith__list_lha_agents(devices);
}
#endif
// Include Red Hat agents if requested
if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) {
count += stonith__list_rhcs_agents(devices);
}
return count;
}
static int
stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
const char *namespace, char **output, int timeout)
{
/* By executing meta-data directly, we can get it from stonith_admin when
* the cluster is not running, which is important for higher-level tools.
*/
enum stonith_namespace ns = stonith_get_namespace(agent, namespace);
crm_trace("Looking up metadata for %s agent %s",
stonith_namespace2text(ns), agent);
switch (ns) {
case st_namespace_rhcs:
return stonith__rhcs_metadata(agent, timeout, output);
#if HAVE_STONITH_STONITH_H
case st_namespace_lha:
return stonith__lha_metadata(agent, timeout, output);
#endif
default:
errno = EINVAL;
crm_perror(LOG_ERR,
"Agent %s not found or does not support meta-data",
agent);
break;
}
return -EINVAL;
}
static int
stonith_api_query(stonith_t * stonith, int call_options, const char *target,
stonith_key_value_t ** devices, int timeout)
{
int rc = 0, lpc = 0, max = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
xmlXPathObjectPtr xpathObj = NULL;
CRM_CHECK(devices != NULL, return -EINVAL);
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, target);
crm_xml_add(data, F_STONITH_ACTION, "off");
rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout);
if (rc < 0) {
return rc;
}
xpathObj = xpath_search(output, "//@agent");
if (xpathObj) {
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
xmlChar *match_path = xmlGetNodePath(match);
crm_info("%s[%d] = %s", "//@agent", lpc, match_path);
free(match_path);
*devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID));
}
}
freeXpathObject(xpathObj);
}
free_xml(output);
free_xml(data);
return max;
}
static int
stonith_api_call(stonith_t * stonith,
int call_options,
const char *id,
const char *action, const char *victim, int timeout, xmlNode ** output)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_STONITH_DEVICE, id);
crm_xml_add(data, F_STONITH_ACTION, action);
crm_xml_add(data, F_STONITH_TARGET, victim);
rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info,
int timeout)
{
int rc;
xmlNode *output = NULL;
rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output);
if (output && list_info) {
const char *list_str;
list_str = crm_element_value(output, "st_output");
if (list_str) {
*list_info = strdup(list_str);
}
}
if (output) {
free_xml(output);
}
return rc;
}
static int
stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout)
{
return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL);
}
static int
stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port,
int timeout)
{
return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL);
}
static int
stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action,
int timeout, int tolerance)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
crm_xml_add(data, F_STONITH_ACTION, action);
crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout);
crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance);
rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_confirm(stonith_t * stonith, int call_options, const char *target)
{
return stonith_api_fence(stonith, call_options | st_opt_manual_ack, target, "off", 0, 0);
}
static int
stonith_api_history(stonith_t * stonith, int call_options, const char *node,
stonith_history_t ** history, int timeout)
{
int rc = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
stonith_history_t *last = NULL;
*history = NULL;
if (node) {
data = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
}
rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output,
call_options | st_opt_sync_call, timeout);
free_xml(data);
if (rc == 0) {
xmlNode *op = NULL;
xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_TRACE);
for (op = __xml_first_child(reply); op != NULL; op = __xml_next(op)) {
stonith_history_t *kvp;
int completed;
kvp = calloc(1, sizeof(stonith_history_t));
kvp->target = crm_element_value_copy(op, F_STONITH_TARGET);
kvp->action = crm_element_value_copy(op, F_STONITH_ACTION);
kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN);
kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE);
kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME);
crm_element_value_int(op, F_STONITH_DATE, &completed);
kvp->completed = (time_t) completed;
crm_element_value_int(op, F_STONITH_STATE, &kvp->state);
if (last) {
last->next = kvp;
} else {
*history = kvp;
}
last = kvp;
}
}
free_xml(output);
return rc;
}
void stonith_history_free(stonith_history_t *history)
{
stonith_history_t *hp, *hp_old;
for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) {
free(hp->target);
free(hp->action);
free(hp->origin);
free(hp->delegate);
free(hp->client);
}
}
/*!
* \brief Deprecated (use stonith_get_namespace() instead)
*/
const char *
get_stonith_provider(const char *agent, const char *provider)
{
return stonith_namespace2text(stonith_get_namespace(agent, provider));
}
static gint
stonithlib_GCompareFunc(gconstpointer a, gconstpointer b)
{
int rc = 0;
const stonith_notify_client_t *a_client = a;
const stonith_notify_client_t *b_client = b;
CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0);
rc = strcmp(a_client->event, b_client->event);
if (rc == 0) {
if (a_client->notify == NULL || b_client->notify == NULL) {
return 0;
} else if (a_client->notify == b_client->notify) {
return 0;
} else if (((long)a_client->notify) < ((long)b_client->notify)) {
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return -1;
}
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return 1;
}
return rc;
}
xmlNode *
stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options)
{
xmlNode *op_msg = create_xml_node(NULL, "stonith_command");
CRM_CHECK(op_msg != NULL, return NULL);
CRM_CHECK(token != NULL, return NULL);
crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command");
crm_xml_add(op_msg, F_TYPE, T_STONITH_NG);
crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token);
crm_xml_add(op_msg, F_STONITH_OPERATION, op);
crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id);
crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options);
crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options);
if (data != NULL) {
add_message_xml(op_msg, F_STONITH_CALLDATA, data);
}
return op_msg;
}
static void
stonith_destroy_op_callback(gpointer data)
{
stonith_callback_client_t *blob = data;
if (blob->timer && blob->timer->ref > 0) {
g_source_remove(blob->timer->ref);
}
free(blob->timer);
free(blob);
}
static int
stonith_api_signoff(stonith_t * stonith)
{
stonith_private_t *native = stonith->st_private;
crm_debug("Disconnecting from the fencer");
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
free(native->token); native->token = NULL;
stonith->state = stonith_disconnected;
return pcmk_ok;
}
static int
stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
{
stonith_private_t *private = stonith->st_private;
if (all_callbacks) {
private->op_callback = NULL;
g_hash_table_destroy(private->stonith_op_callback_table);
private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
NULL,
stonith_destroy_op_callback);
} else if (call_id == 0) {
private->op_callback = NULL;
} else {
g_hash_table_remove(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
}
return pcmk_ok;
}
static void
invoke_callback(stonith_t * st, int call_id, int rc, void *userdata,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_data_t data = { 0, };
data.call_id = call_id;
data.rc = rc;
data.userdata = userdata;
callback(st, &data);
}
static void
stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc)
{
stonith_private_t *private = NULL;
stonith_callback_client_t *blob = NULL;
stonith_callback_client_t local_blob;
CRM_CHECK(stonith != NULL, return);
CRM_CHECK(stonith->st_private != NULL, return);
private = stonith->st_private;
local_blob.id = NULL;
local_blob.callback = NULL;
local_blob.user_data = NULL;
local_blob.only_success = FALSE;
if (msg != NULL) {
crm_element_value_int(msg, F_STONITH_RC, &rc);
crm_element_value_int(msg, F_STONITH_CALLID, &call_id);
}
CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result"));
blob = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
if (blob != NULL) {
local_blob = *blob;
blob = NULL;
stonith_api_del_callback(stonith, call_id, FALSE);
} else {
crm_trace("No callback found for call %d", call_id);
local_blob.callback = NULL;
}
if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) {
crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id);
invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback);
} else if (private->op_callback == NULL && rc != pcmk_ok) {
crm_warn("Fencing command failed: %s", pcmk_strerror(rc));
crm_log_xml_debug(msg, "Failed fence update");
}
if (private->op_callback != NULL) {
crm_trace("Invoking global callback for call %d", call_id);
invoke_callback(stonith, call_id, rc, NULL, private->op_callback);
}
crm_trace("OP callback activated.");
}
static gboolean
stonith_async_timeout_handler(gpointer data)
{
struct timer_rec_s *timer = data;
crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME);
/* Always return TRUE, never remove the handler
* We do that in stonith_del_callback()
*/
return TRUE;
}
static void
set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id,
int timeout)
{
struct timer_rec_s *async_timer = callback->timer;
if (timeout <= 0) {
return;
}
if (!async_timer) {
async_timer = calloc(1, sizeof(struct timer_rec_s));
callback->timer = async_timer;
}
async_timer->stonith = stonith;
async_timer->call_id = call_id;
/* Allow a fair bit of grace to allow the server to tell us of a timeout
* This is only a fallback
*/
async_timer->timeout = (timeout + 60) * 1000;
if (async_timer->ref) {
g_source_remove(async_timer->ref);
}
async_timer->ref =
g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer);
}
static void
update_callback_timeout(int call_id, int timeout, stonith_t * st)
{
stonith_callback_client_t *callback = NULL;
stonith_private_t *private = st->st_private;
callback = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
if (!callback || !callback->allow_timeout_updates) {
return;
}
set_callback_timeout(callback, st, call_id, timeout);
}
static int
stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
{
const char *type = NULL;
struct notify_blob_s blob;
stonith_t *st = userdata;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->st_private;
blob.stonith = st;
blob.xml = string2xml(buffer);
if (blob.xml == NULL) {
crm_warn("Received malformed message from fencer: %s", buffer);
return 0;
}
/* do callbacks */
type = crm_element_value(blob.xml, F_TYPE);
crm_trace("Activating %s callbacks...", type);
if (safe_str_eq(type, T_STONITH_NG)) {
stonith_perform_callback(st, blob.xml, 0, 0);
} else if (safe_str_eq(type, T_STONITH_NOTIFY)) {
g_list_foreach(private->notify_list, stonith_send_notification, &blob);
} else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) {
int call_id = 0;
int timeout = 0;
crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout);
crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id);
update_callback_timeout(call_id, timeout, st);
} else {
crm_err("Unknown message type: %s", type);
crm_log_xml_warn(blob.xml, "BadReply");
}
free_xml(blob.xml);
return 1;
}
static int
stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
{
int rc = pcmk_ok;
- stonith_private_t *native = stonith->st_private;
+ stonith_private_t *native = NULL;
+ const char *display_name = name? name : "client";
static struct ipc_client_callbacks st_callbacks = {
.dispatch = stonith_dispatch_internal,
.destroy = stonith_connection_destroy
};
- crm_trace("Connecting command channel");
+ CRM_CHECK(stonith != NULL, return -EINVAL);
+
+ native = stonith->st_private;
+ CRM_ASSERT(native != NULL);
+
+ crm_debug("Attempting fencer connection by %s with%s mainloop",
+ display_name, (stonith_fd? "out" : ""));
stonith->state = stonith_connected_command;
if (stonith_fd) {
/* No mainloop */
native->ipc = crm_ipc_new("stonith-ng", 0);
if (native->ipc && crm_ipc_connect(native->ipc)) {
*stonith_fd = crm_ipc_get_fd(native->ipc);
} else if (native->ipc) {
- crm_perror(LOG_ERR, "Connection to fencer failed");
- rc = -ENOTCONN;
+ crm_ipc_close(native->ipc);
+ crm_ipc_destroy(native->ipc);
+ native->ipc = NULL;
}
} else {
/* With mainloop */
native->source =
mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
- crm_debug("Could not connect to the Stonith API");
rc = -ENOTCONN;
- }
-
- if (rc == pcmk_ok) {
+ } else {
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, "stonith_command");
crm_xml_add(hello, F_TYPE, T_STONITH_NG);
crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(hello, F_STONITH_CLIENTNAME, name);
rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
if (rc < 0) {
- crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc);
+ crm_debug("Couldn't register with the fencer: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
rc = -ECOMM;
} else if (reply == NULL) {
- crm_err("Did not receive registration reply");
+ crm_debug("Couldn't register with the fencer: no reply");
rc = -EPROTO;
} else {
const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION);
const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID);
if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
- crm_err("Invalid registration message: %s", msg_type);
- crm_log_xml_err(reply, "Bad reply");
+ crm_debug("Couldn't register with the fencer: invalid reply type '%s'",
+ (msg_type? msg_type : "(missing)"));
+ crm_log_xml_debug(reply, "Invalid fencer reply");
rc = -EPROTO;
} else if (tmp_ticket == NULL) {
- crm_err("No registration token provided");
- crm_log_xml_err(reply, "Bad reply");
+ crm_debug("Couldn't register with the fencer: no token in reply");
+ crm_log_xml_debug(reply, "Invalid fencer reply");
rc = -EPROTO;
} else {
- crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
+#if HAVE_MSGFROMIPC_TIMEOUT
+ stonith->call_timeout = MAX_IPC_DELAY;
+#endif
+ crm_debug("Connection to fencer by %s succeeded (registration token: %s)",
+ display_name, native->token);
rc = pcmk_ok;
}
}
free_xml(reply);
free_xml(hello);
}
- if (rc == pcmk_ok) {
-#if HAVE_MSGFROMIPC_TIMEOUT
- stonith->call_timeout = MAX_IPC_DELAY;
-#endif
- crm_debug("Connection to fencer successful");
- return pcmk_ok;
+ if (rc != pcmk_ok) {
+ crm_debug("Connection attempt to fencer by %s failed: %s "
+ CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc);
+ stonith->cmds->disconnect(stonith);
}
-
- crm_debug("Connection to fencer failed: %s", pcmk_strerror(rc));
- stonith->cmds->disconnect(stonith);
return rc;
}
static int
stonith_set_notification(stonith_t * stonith, const char *callback, int enabled)
{
int rc = pcmk_ok;
xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__);
stonith_private_t *native = stonith->st_private;
if (stonith->state != stonith_disconnected) {
crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY);
if (enabled) {
crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback);
} else {
crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback);
}
rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc);
rc = -ECOMM;
} else {
rc = pcmk_ok;
}
}
free_xml(notify_msg);
return rc;
}
static int
stonith_api_add_notification(stonith_t * stonith, const char *event,
void (*callback) (stonith_t * stonith, stonith_event_t * e))
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
private = stonith->st_private;
crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list));
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = callback;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
if (list_item != NULL) {
crm_warn("Callback already present");
free(new_client);
return -ENOTUNIQ;
} else {
private->notify_list = g_list_append(private->notify_list, new_client);
stonith_set_notification(stonith, event, 1);
crm_trace("Callback added (%d)", g_list_length(private->notify_list));
}
return pcmk_ok;
}
static int
stonith_api_del_notification(stonith_t * stonith, const char *event)
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
crm_debug("Removing callback for %s events", event);
private = stonith->st_private;
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = NULL;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
stonith_set_notification(stonith, event, 0);
if (list_item != NULL) {
stonith_notify_client_t *list_client = list_item->data;
private->notify_list = g_list_remove(private->notify_list, list_client);
free(list_client);
crm_trace("Removed callback");
} else {
crm_trace("Callback not present");
}
free(new_client);
return pcmk_ok;
}
static int
stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options,
void *user_data, const char *callback_name,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_client_t *blob = NULL;
stonith_private_t *private = NULL;
CRM_CHECK(stonith != NULL, return -EINVAL);
CRM_CHECK(stonith->st_private != NULL, return -EINVAL);
private = stonith->st_private;
if (call_id == 0) {
private->op_callback = callback;
} else if (call_id < 0) {
if (!(options & st_opt_report_only_success)) {
crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
invoke_callback(stonith, call_id, call_id, user_data, callback);
} else {
crm_warn("Fencer call failed: %s", pcmk_strerror(call_id));
}
return FALSE;
}
blob = calloc(1, sizeof(stonith_callback_client_t));
blob->id = callback_name;
blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE;
blob->user_data = user_data;
blob->callback = callback;
blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE;
if (timeout > 0) {
set_callback_timeout(blob, stonith, call_id, timeout);
}
g_hash_table_insert(private->stonith_op_callback_table, GINT_TO_POINTER(call_id), blob);
crm_trace("Added callback to %s for call %d", callback_name, call_id);
return TRUE;
}
static void
stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data)
{
int call = GPOINTER_TO_INT(key);
stonith_callback_client_t *blob = value;
crm_debug("Call %d (%s): pending", call, crm_str(blob->id));
}
void
stonith_dump_pending_callbacks(stonith_t * stonith)
{
stonith_private_t *private = stonith->st_private;
if (private->stonith_op_callback_table == NULL) {
return;
}
return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL);
}
/*
*/
static stonith_event_t *
xml_to_event(xmlNode * msg)
{
stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
const char *ntype = crm_element_value(msg, F_SUBTYPE);
char *data_addr = crm_strdup_printf("//%s", ntype);
xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
crm_log_xml_trace(msg, "stonith_notify");
crm_element_value_int(msg, F_STONITH_RC, &(event->result));
if (safe_str_eq(ntype, T_STONITH_NOTIFY_FENCE)) {
event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION);
if (data) {
event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN);
event->action = crm_element_value_copy(data, F_STONITH_ACTION);
event->target = crm_element_value_copy(data, F_STONITH_TARGET);
event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE);
event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID);
event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME);
event->device = crm_element_value_copy(data, F_STONITH_DEVICE);
} else {
crm_err("No data for %s event", ntype);
crm_log_xml_notice(msg, "BadEvent");
}
}
free(data_addr);
return event;
}
static void
event_free(stonith_event_t * event)
{
free(event->id);
free(event->type);
free(event->message);
free(event->operation);
free(event->origin);
free(event->action);
free(event->target);
free(event->executioner);
free(event->device);
free(event->client_origin);
free(event);
}
static void
stonith_send_notification(gpointer data, gpointer user_data)
{
struct notify_blob_s *blob = user_data;
stonith_notify_client_t *entry = data;
stonith_event_t *st_event = NULL;
const char *event = NULL;
if (blob->xml == NULL) {
crm_warn("Skipping callback - NULL message");
return;
}
event = crm_element_value(blob->xml, F_SUBTYPE);
if (entry == NULL) {
crm_warn("Skipping callback - NULL callback client");
return;
} else if (entry->notify == NULL) {
crm_warn("Skipping callback - NULL callback");
return;
} else if (safe_str_neq(entry->event, event)) {
crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event);
return;
}
st_event = xml_to_event(blob->xml);
crm_trace("Invoking callback for %p/%s event...", entry, event);
entry->notify(blob->stonith, st_event);
crm_trace("Callback invoked...");
event_free(st_event);
}
int
stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data,
int call_options, int timeout)
{
int rc = 0;
int reply_id = -1;
enum crm_ipc_flags ipc_flags = crm_ipc_flags_none;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
stonith_private_t *native = stonith->st_private;
if (stonith->state == stonith_disconnected) {
return -ENOTCONN;
}
if (output_data != NULL) {
*output_data = NULL;
}
if (op == NULL) {
crm_err("No operation specified");
return -EINVAL;
}
if (call_options & st_opt_sync_call) {
ipc_flags |= crm_ipc_client_response;
}
stonith->call_id++;
/* prevent call_id from being negative (or zero) and conflicting
* with the stonith_errors enum
* use 2 because we use it as (stonith->call_id - 1) below
*/
if (stonith->call_id < 1) {
stonith->call_id = 1;
}
CRM_CHECK(native->token != NULL,;
);
op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options);
if (op_msg == NULL) {
return -EINVAL;
}
crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout);
crm_trace("Sending %s message to fencer with timeout %ds", op, timeout);
rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply);
free_xml(op_msg);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (!(call_options & st_opt_sync_call)) {
crm_trace("Async call %d, returning", stonith->call_id);
CRM_CHECK(stonith->call_id != 0, return -EPROTO);
free_xml(op_reply);
return stonith->call_id;
}
rc = pcmk_ok;
crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id);
if (reply_id == stonith->call_id) {
crm_trace("Synchronous reply %d received", reply_id);
if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) {
rc = -ENOMSG;
}
if ((call_options & st_opt_discard_reply) || output_data == NULL) {
crm_trace("Discarding reply");
} else {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
} else if (reply_id <= 0) {
crm_err("Received bad reply: No id set");
crm_log_xml_err(op_reply, "Bad reply");
free_xml(op_reply);
rc = -ENOMSG;
} else {
crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id);
crm_log_xml_err(op_reply, "Old reply");
free_xml(op_reply);
rc = -ENOMSG;
}
done:
if (crm_ipc_connected(native->ipc) == FALSE) {
crm_err("Fencer disconnected");
stonith->state = stonith_disconnected;
}
free_xml(op_reply);
return rc;
}
/* Not used with mainloop */
bool
stonith_dispatch(stonith_t * st)
{
gboolean stay_connected = TRUE;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->st_private;
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
stonith_dispatch_internal(msg, strlen(msg), st);
}
if (crm_ipc_connected(private->ipc) == FALSE) {
crm_err("Connection closed");
stay_connected = FALSE;
}
}
return stay_connected;
}
static int
stonith_api_free(stonith_t * stonith)
{
int rc = pcmk_ok;
crm_trace("Destroying %p", stonith);
if (stonith->state != stonith_disconnected) {
crm_trace("Disconnecting %p first", stonith);
rc = stonith->cmds->disconnect(stonith);
}
if (stonith->state == stonith_disconnected) {
stonith_private_t *private = stonith->st_private;
crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table));
g_hash_table_destroy(private->stonith_op_callback_table);
crm_trace("Destroying %d notification clients", g_list_length(private->notify_list));
g_list_free_full(private->notify_list, free);
free(stonith->st_private);
free(stonith->cmds);
free(stonith);
} else {
crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc);
}
return rc;
}
void
stonith_api_delete(stonith_t * stonith)
{
crm_trace("Destroying %p", stonith);
if(stonith) {
stonith->cmds->free(stonith);
}
}
static int
stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
const char *namespace_s, const char *agent,
stonith_key_value_t *params, int timeout, char **output,
char **error_output)
{
/* Validation should be done directly via the agent, so we can get it from
* stonith_admin when the cluster is not running, which is important for
* higher-level tools.
*/
int rc = pcmk_ok;
/* Use a dummy node name in case the agent requires a target. We assume the
* actual target doesn't matter for validation purposes (if in practice,
* that is incorrect, we will need to allow the caller to pass the target).
*/
const char *target = "node1";
GHashTable *params_table = crm_str_table_new();
// Convert parameter list to a hash table
for (; params; params = params->next) {
// Strip out Pacemaker-implemented parameters
if (!crm_starts_with(params->key, "pcmk_")
&& strcmp(params->key, "provides")
&& strcmp(params->key, "stonith-timeout")) {
g_hash_table_insert(params_table, strdup(params->key),
strdup(params->value));
}
}
#if SUPPORT_CIBSECRETS
rc = replace_secret_params(rsc_id, params_table);
if (rc < 0) {
crm_warn("Could not replace secret parameters for validation of %s: %s",
agent, pcmk_strerror(rc));
}
#endif
if (output) {
*output = NULL;
}
if (error_output) {
*error_output = NULL;
}
switch (stonith_get_namespace(agent, namespace_s)) {
case st_namespace_rhcs:
rc = stonith__rhcs_validate(st, call_options, target, agent,
params_table, timeout, output,
error_output);
break;
#if HAVE_STONITH_STONITH_H
case st_namespace_lha:
rc = stonith__lha_validate(st, call_options, target, agent,
params_table, timeout, output,
error_output);
break;
#endif
default:
rc = -EINVAL;
errno = EINVAL;
crm_perror(LOG_ERR,
"Agent %s not found or does not support validation",
agent);
break;
}
g_hash_table_destroy(params_table);
return rc;
}
stonith_t *
stonith_api_new(void)
{
stonith_t *new_stonith = NULL;
stonith_private_t *private = NULL;
new_stonith = calloc(1, sizeof(stonith_t));
private = calloc(1, sizeof(stonith_private_t));
new_stonith->st_private = private;
private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
NULL, stonith_destroy_op_callback);
private->notify_list = NULL;
new_stonith->call_id = 1;
new_stonith->state = stonith_disconnected;
new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t));
/* *INDENT-OFF* */
new_stonith->cmds->free = stonith_api_free;
new_stonith->cmds->connect = stonith_api_signon;
new_stonith->cmds->disconnect = stonith_api_signoff;
new_stonith->cmds->list = stonith_api_list;
new_stonith->cmds->monitor = stonith_api_monitor;
new_stonith->cmds->status = stonith_api_status;
new_stonith->cmds->fence = stonith_api_fence;
new_stonith->cmds->confirm = stonith_api_confirm;
new_stonith->cmds->history = stonith_api_history;
new_stonith->cmds->list_agents = stonith_api_device_list;
new_stonith->cmds->metadata = stonith_api_device_metadata;
new_stonith->cmds->query = stonith_api_query;
new_stonith->cmds->remove_device = stonith_api_remove_device;
new_stonith->cmds->register_device = stonith_api_register_device;
new_stonith->cmds->remove_level = stonith_api_remove_level;
new_stonith->cmds->remove_level_full = stonith_api_remove_level_full;
new_stonith->cmds->register_level = stonith_api_register_level;
new_stonith->cmds->register_level_full = stonith_api_register_level_full;
new_stonith->cmds->remove_callback = stonith_api_del_callback;
new_stonith->cmds->register_callback = stonith_api_add_callback;
new_stonith->cmds->remove_notification = stonith_api_del_notification;
new_stonith->cmds->register_notification = stonith_api_add_notification;
new_stonith->cmds->validate = stonith_api_validate;
/* *INDENT-ON* */
return new_stonith;
}
+/*!
+ * \brief Make a blocking connection attempt to the fencer
+ *
+ * \param[in,out] st Fencer API object
+ * \param[in] name Client name to use with fencer
+ * \param[in] max_attempts Return error if this many attempts fail
+ *
+ * \return pcmk_ok on success, result of last attempt otherwise
+ */
+int
+stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts)
+{
+ int rc = -EINVAL; // if max_attempts is not positive
+
+ for (int attempt = 1; attempt <= max_attempts; attempt++) {
+ rc = st->cmds->connect(st, name, NULL);
+ if (rc == pcmk_ok) {
+ return pcmk_ok;
+ } else if (attempt < max_attempts) {
+ crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s "
+ CRM_XS " rc=%d",
+ attempt, max_attempts, pcmk_strerror(rc), rc);
+ sleep(2);
+ }
+ }
+ crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d",
+ pcmk_strerror(rc), rc);
+ return rc;
+}
+
stonith_key_value_t *
stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
{
stonith_key_value_t *p, *end;
p = calloc(1, sizeof(stonith_key_value_t));
if (key) {
p->key = strdup(key);
}
if (value) {
p->value = strdup(value);
}
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
{
stonith_key_value_t *p;
while (head) {
p = head->next;
if (keys) {
free(head->key);
}
if (values) {
free(head->value);
}
free(head);
head = p;
}
}
#define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON)
#define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __FUNCTION__, args)
int
stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
{
- char *name = NULL;
- const char *action = "reboot";
-
- int rc = -EPROTO;
- stonith_t *st = NULL;
- enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
+ int rc = pcmk_ok;
+ stonith_t *st = stonith_api_new();
+ const char *action = off? "off" : "reboot";
api_log_open();
- st = stonith_api_new();
- if (st) {
- rc = st->cmds->connect(st, "stonith-api", NULL);
- if(rc != pcmk_ok) {
- api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
- }
+ if (st == NULL) {
+ api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s",
+ action, nodeid, uname);
+ return -EPROTO;
}
- if (uname != NULL) {
- name = strdup(uname);
-
- } else if (nodeid > 0) {
- opts |= st_opt_cs_nodeid;
- name = crm_itoa(nodeid);
- }
-
- if (off) {
- action = "off";
- }
-
- if (rc == pcmk_ok) {
+ rc = st->cmds->connect(st, "stonith-api", NULL);
+ if (rc != pcmk_ok) {
+ api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)",
+ action, nodeid, uname, pcmk_strerror(rc), rc);
+ } else {
+ char *name = NULL;
+ enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
+
+ if (uname != NULL) {
+ name = strdup(uname);
+ } else if (nodeid > 0) {
+ opts |= st_opt_cs_nodeid;
+ name = crm_itoa(nodeid);
+ }
rc = st->cmds->fence(st, opts, name, action, timeout, 0);
- if(rc != pcmk_ok) {
- api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
+ free(name);
+
+ if (rc != pcmk_ok) {
+ api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)",
+ action, nodeid, uname, pcmk_strerror(rc), rc);
} else {
- api_log(LOG_NOTICE, "Node %u/%s kicked: %s ", nodeid, uname, action);
+ api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action);
}
}
- if (st) {
- st->cmds->disconnect(st);
- stonith_api_delete(st);
- }
-
- free(name);
+ stonith_api_delete(st);
return rc;
}
time_t
stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
{
- int rc = 0;
- char *name = NULL;
-
+ int rc = pcmk_ok;
time_t when = 0;
- stonith_t *st = NULL;
+ stonith_t *st = stonith_api_new();
stonith_history_t *history = NULL, *hp = NULL;
- enum stonith_call_options opts = st_opt_sync_call;
-
- st = stonith_api_new();
- if (st) {
- rc = st->cmds->connect(st, "stonith-api", NULL);
- if(rc != pcmk_ok) {
- api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
- }
- }
-
- if (uname != NULL) {
- name = strdup(uname);
- } else if (nodeid > 0) {
- opts |= st_opt_cs_nodeid;
- name = crm_itoa(nodeid);
+ if (st == NULL) {
+ api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: "
+ "API initialization failed", nodeid, uname);
+ return when;
}
- if (st && rc == pcmk_ok) {
+ rc = st->cmds->connect(st, "stonith-api", NULL);
+ if (rc != pcmk_ok) {
+ api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
+ } else {
int entries = 0;
int progress = 0;
int completed = 0;
-
+ char *name = NULL;
+ enum stonith_call_options opts = st_opt_sync_call;
+
+ if (uname != NULL) {
+ name = strdup(uname);
+ } else if (nodeid > 0) {
+ opts |= st_opt_cs_nodeid;
+ name = crm_itoa(nodeid);
+ }
rc = st->cmds->history(st, opts, name, &history, 120);
+ free(name);
for (hp = history; hp; hp = hp->next) {
entries++;
if (in_progress) {
progress++;
if (hp->state != st_done && hp->state != st_failed) {
when = time(NULL);
}
} else if (hp->state == st_done) {
completed++;
if (hp->completed > when) {
when = hp->completed;
}
}
}
stonith_history_free(history);
if(rc == pcmk_ok) {
api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed);
} else {
api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc);
}
}
- if (st) {
- st->cmds->disconnect(st);
- stonith_api_delete(st);
- }
+ stonith_api_delete(st);
if(when) {
api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when);
}
- free(name);
return when;
}
bool
stonith_agent_exists(const char *agent, int timeout)
{
stonith_t *st = NULL;
stonith_key_value_t *devices = NULL;
stonith_key_value_t *dIter = NULL;
bool rc = FALSE;
if (agent == NULL) {
return rc;
}
st = stonith_api_new();
st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout);
for (dIter = devices; dIter != NULL; dIter = dIter->next) {
if (crm_str_eq(dIter->value, agent, TRUE)) {
rc = TRUE;
break;
}
}
stonith_key_value_freeall(devices, 1, 1);
stonith_api_delete(st);
return rc;
}
const char *
stonith_action_str(const char *action)
{
if (action == NULL) {
return "fencing";
} else if (!strcmp(action, "on")) {
return "unfencing";
} else if (!strcmp(action, "off")) {
return "turning off";
} else {
return action;
}
}
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index e101b626b7..bed079611c 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,4415 +1,4414 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include /* crm_ends_with_ext */
#include
#include
#include
#include
#include
#include
#include
#include
#include <../lib/pengine/unpack.h>
#include
#include
static void clean_up_connections(void);
static crm_exit_t clean_up(crm_exit_t exit_code);
static void crm_diff_update(const char *event, xmlNode * msg);
static gboolean mon_refresh_display(gpointer user_data);
static int cib_connect(gboolean full);
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
static void kick_refresh(gboolean data_updated);
static char *get_node_display_name(node_t *node);
/*
* Definitions indicating which items to print
*/
#define mon_show_times (0x0001U)
#define mon_show_stack (0x0002U)
#define mon_show_dc (0x0004U)
#define mon_show_count (0x0008U)
#define mon_show_nodes (0x0010U)
#define mon_show_resources (0x0020U)
#define mon_show_attributes (0x0040U)
#define mon_show_failcounts (0x0080U)
#define mon_show_operations (0x0100U)
#define mon_show_tickets (0x0200U)
#define mon_show_bans (0x0400U)
#define mon_show_fence_history (0x0800U)
#define mon_show_headers (mon_show_times | mon_show_stack | mon_show_dc \
| mon_show_count)
#define mon_show_default (mon_show_headers | mon_show_nodes \
| mon_show_resources)
#define mon_show_all (mon_show_default | mon_show_attributes \
| mon_show_failcounts | mon_show_operations \
| mon_show_tickets | mon_show_bans \
| mon_show_fence_history)
static unsigned int show = mon_show_default;
/*
* Definitions indicating how to output
*/
enum mon_output_format_e {
mon_output_none,
mon_output_monitor,
mon_output_plain,
mon_output_console,
mon_output_xml,
mon_output_html,
mon_output_cgi
} output_format = mon_output_console;
static char *output_filename = NULL; /* if sending output to a file, its name */
/* other globals */
static char *pid_file = NULL;
static gboolean group_by_node = FALSE;
static gboolean inactive_resources = FALSE;
static int reconnect_msec = 5000;
static gboolean daemonize = FALSE;
static GMainLoop *mainloop = NULL;
static guint timer_id = 0;
static mainloop_timer_t *refresh_timer = NULL;
static pe_working_set_t *mon_data_set = NULL;
static GList *attr_list = NULL;
static const char *external_agent = NULL;
static const char *external_recipient = NULL;
static cib_t *cib = NULL;
static stonith_t *st = NULL;
static xmlNode *current_cib = NULL;
static gboolean one_shot = FALSE;
static gboolean has_warnings = FALSE;
static gboolean print_timing = FALSE;
static gboolean watch_fencing = FALSE;
static gboolean fence_history = FALSE;
static gboolean fence_full_history = FALSE;
static gboolean fence_connect = FALSE;
static int fence_history_level = 1;
static gboolean print_brief = FALSE;
static gboolean print_pending = TRUE;
static gboolean print_clone_detail = FALSE;
#if CURSES_ENABLED
static gboolean curses_console_initialized = FALSE;
#endif
/* FIXME allow, detect, and correctly interpret glob pattern or regex? */
const char *print_neg_location_prefix = "";
/* Never display node attributes whose name starts with one of these prefixes */
#define FILTER_STR { CRM_FAIL_COUNT_PREFIX, CRM_LAST_FAILURE_PREFIX, \
"shutdown", "terminate", "standby", "probe_complete", \
"#", NULL }
long last_refresh = 0;
crm_trigger_t *refresh_trigger = NULL;
/* Define exit codes for monitoring-compatible output
* For nagios plugins, the possibilities are
* OK=0, WARN=1, CRIT=2, and UNKNOWN=3
*/
#define MON_STATUS_WARN CRM_EX_ERROR
#define MON_STATUS_CRIT CRM_EX_INVALID_PARAM
#define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE
/* Convenience macro for prettifying output (e.g. "node" vs "nodes") */
#define s_if_plural(i) (((i) == 1)? "" : "s")
#if CURSES_ENABLED
# define print_dot() if (output_format == mon_output_console) { \
printw("."); \
clrtoeol(); \
refresh(); \
} else { \
fprintf(stdout, "."); \
}
#else
# define print_dot() fprintf(stdout, ".");
#endif
#if CURSES_ENABLED
# define print_as(fmt, args...) if (output_format == mon_output_console) { \
printw(fmt, ##args); \
clrtoeol(); \
refresh(); \
} else { \
fprintf(stdout, fmt, ##args); \
}
#else
# define print_as(fmt, args...) fprintf(stdout, fmt, ##args);
#endif
static void
blank_screen(void)
{
#if CURSES_ENABLED
int lpc = 0;
for (lpc = 0; lpc < LINES; lpc++) {
move(lpc, 0);
clrtoeol();
}
move(0, 0);
refresh();
#endif
}
static gboolean
mon_timer_popped(gpointer data)
{
int rc = pcmk_ok;
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
if (timer_id > 0) {
g_source_remove(timer_id);
timer_id = 0;
}
print_as("Reconnecting...\n");
rc = cib_connect(TRUE);
if (rc != pcmk_ok) {
timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
}
return FALSE;
}
static void
mon_cib_connection_destroy(gpointer user_data)
{
print_as("Connection to the cluster-daemons terminated\n");
if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
mainloop_timer_stop(refresh_timer);
}
if (timer_id) {
/* we'll trigger a new reconnect-timeout at the end */
g_source_remove(timer_id);
timer_id = 0;
}
if (st) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
if (st->state != stonith_disconnected) {
st->cmds->disconnect(st);
}
}
if (cib) {
cib->cmds->signoff(cib);
timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
}
return;
}
/*
* Mainloop signal handler.
*/
static void
mon_shutdown(int nsig)
{
clean_up(CRM_EX_OK);
}
#if CURSES_ENABLED
static sighandler_t ncurses_winch_handler;
static void
mon_winresize(int nsig)
{
static int not_done;
int lines = 0, cols = 0;
if (!not_done++) {
if (ncurses_winch_handler)
/* the original ncurses WINCH signal handler does the
* magic of retrieving the new window size;
* otherwise, we'd have to use ioctl or tgetent */
(*ncurses_winch_handler) (SIGWINCH);
getmaxyx(stdscr, lines, cols);
resizeterm(lines, cols);
mainloop_set_trigger(refresh_trigger);
}
not_done--;
}
#endif
static int
cib_connect(gboolean full)
{
int rc = pcmk_ok;
static gboolean need_pass = TRUE;
CRM_CHECK(cib != NULL, return -EINVAL);
if (getenv("CIB_passwd") != NULL) {
need_pass = FALSE;
}
if ((fence_connect) && (st == NULL)) {
st = stonith_api_new();
}
if ((fence_connect) && (st->state == stonith_disconnected)) {
- crm_trace("Connecting to stonith");
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
if (watch_fencing) {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_event);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
} else {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_display);
st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
}
}
}
if (cib->state != cib_connected_query && cib->state != cib_connected_command) {
crm_trace("Connecting to the CIB");
if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) {
need_pass = FALSE;
print_as("Password:");
}
rc = cib->cmds->signon(cib, crm_system_name, cib_query);
if (rc != pcmk_ok) {
return rc;
}
rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
if (rc == pcmk_ok) {
mon_refresh_display(NULL);
}
if (rc == pcmk_ok && full) {
if (rc == pcmk_ok) {
rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
if (rc == -EPROTONOSUPPORT) {
print_as
("Notification setup not supported, won't be able to reconnect after failure");
if (output_format == mon_output_console) {
sleep(2);
}
rc = pcmk_ok;
}
}
if (rc == pcmk_ok) {
cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
}
if (rc != pcmk_ok) {
print_as("Notification setup failed, could not monitor CIB actions");
if (output_format == mon_output_console) {
sleep(2);
}
clean_up_connections();
}
}
}
return rc;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"quiet", 0, 0, 'Q', "\tDisplay only essential output" },
{"-spacer-", 1, 0, '-', "\nModes (mutually exclusive):"},
{"as-html", 1, 0, 'h', "\tWrite cluster status to the named html file"},
{"as-xml", 0, 0, 'X', "\t\tWrite cluster status as xml to stdout. This will enable one-shot mode."},
{"web-cgi", 0, 0, 'w', "\t\tWeb mode with output suitable for CGI (preselected when run as *.cgi)"},
{"simple-status", 0, 0, 's', "\tDisplay the cluster status once as a simple one line output (suitable for nagios)"},
{"-spacer-", 1, 0, '-', "\nDisplay Options:"},
{"group-by-node", 0, 0, 'n', "\tGroup resources by node" },
{"inactive", 0, 0, 'r', "\t\tDisplay inactive resources" },
{"failcounts", 0, 0, 'f', "\tDisplay resource fail counts"},
{"operations", 0, 0, 'o', "\tDisplay resource operation history" },
{"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" },
{"tickets", 0, 0, 'c', "\t\tDisplay cluster tickets"},
{"watch-fencing", 0, 0, 'W', "\tListen for fencing events. For use with --external-agent"},
{"fence-history", 2, 0, 'm', "Show fence history\n"
"\t\t\t\t\t0=off, 1=failures and pending (default without option),\n"
"\t\t\t\t\t2=add successes (default without value for option),\n"
"\t\t\t\t\t3=show full history without reduction to most recent of each flavor"},
{"neg-locations", 2, 0, 'L', "Display negative location constraints [optionally filtered by id prefix]"},
{"show-node-attributes", 0, 0, 'A', "Display node attributes" },
{"hide-headers", 0, 0, 'D', "\tHide all headers" },
{"show-detail", 0, 0, 'R', "\tShow more details (node IDs, individual clone instances)" },
{"brief", 0, 0, 'b', "\t\tBrief output" },
{"pending", 0, 0, 'j', "\t\tDisplay pending state if 'record-pending' is enabled", pcmk_option_hidden},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"interval", 1, 0, 'i', "\tUpdate frequency in seconds" },
{"one-shot", 0, 0, '1', "\t\tDisplay the cluster status once on the console and exit"},
{"disable-ncurses",0, 0, 'N', "\tDisable the use of ncurses", !CURSES_ENABLED},
{"daemonize", 0, 0, 'd', "\tRun in the background as a daemon"},
{"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"},
{"external-agent", 1, 0, 'E', "A program to run when resource operations take place."},
{"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."},
{"xml-file", 1, 0, 'x', NULL, pcmk_option_hidden},
{"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', "Display the cluster status on the console with updates as they occur:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Display the cluster status on the console just once then exit:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon -1", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Display your cluster status, group resources by node, and include inactive resources in the list:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --group-by-node --inactive", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --daemonize --as-html /path/to/docroot/filename.html", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Start crm_mon and export the current cluster status as xml to stdout, then exit.:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --as-xml", pcmk_option_example},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
#if CURSES_ENABLED
static const char *
get_option_desc(char c)
{
int lpc;
for (lpc = 0; long_options[lpc].name != NULL; lpc++) {
if (long_options[lpc].name[0] == '-')
continue;
if (long_options[lpc].val == c) {
static char *buf = NULL;
const char *rv;
char *nl;
/* chop off tabs and cut at newline */
free(buf); /* free string from last usage */
buf = strdup(long_options[lpc].desc);
rv = buf; /* make a copy to keep buf pointer unaltered
for freeing when we come by next time.
Like this the result stays valid until
the next call.
*/
while(isspace(rv[0])) {
rv++;
}
nl = strchr(rv, '\n');
if (nl) {
*nl = '\0';
}
return rv;
}
}
return NULL;
}
#define print_option_help(option, condition) \
print_as("%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option));
static gboolean
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer unused)
{
int c;
gboolean config_mode = FALSE;
while (1) {
/* Get user input */
c = getchar();
switch (c) {
case 'm':
if (!fence_history_level) {
fence_history = TRUE;
fence_connect = TRUE;
if (st == NULL) {
mon_cib_connection_destroy(NULL);
}
}
show ^= mon_show_fence_history;
break;
case 'c':
show ^= mon_show_tickets;
break;
case 'f':
show ^= mon_show_failcounts;
break;
case 'n':
group_by_node = ! group_by_node;
break;
case 'o':
show ^= mon_show_operations;
if ((show & mon_show_operations) == 0) {
print_timing = 0;
}
break;
case 'r':
inactive_resources = ! inactive_resources;
break;
case 'R':
print_clone_detail = ! print_clone_detail;
break;
case 't':
print_timing = ! print_timing;
if (print_timing) {
show |= mon_show_operations;
}
break;
case 'A':
show ^= mon_show_attributes;
break;
case 'L':
show ^= mon_show_bans;
break;
case 'D':
/* If any header is shown, clear them all, otherwise set them all */
if (show & mon_show_headers) {
show &= ~mon_show_headers;
} else {
show |= mon_show_headers;
}
break;
case 'b':
print_brief = ! print_brief;
break;
case 'j':
print_pending = ! print_pending;
break;
case '?':
config_mode = TRUE;
break;
default:
goto refresh;
}
if (!config_mode)
goto refresh;
blank_screen();
print_as("Display option change mode\n");
print_as("\n");
print_option_help('c', show & mon_show_tickets);
print_option_help('f', show & mon_show_failcounts);
print_option_help('n', group_by_node);
print_option_help('o', show & mon_show_operations);
print_option_help('r', inactive_resources);
print_option_help('t', print_timing);
print_option_help('A', show & mon_show_attributes);
print_option_help('L', show & mon_show_bans);
print_option_help('D', (show & mon_show_headers) == 0);
print_option_help('R', print_clone_detail);
print_option_help('b', print_brief);
print_option_help('j', print_pending);
print_option_help('m', (show & mon_show_fence_history));
print_as("\n");
print_as("Toggle fields via field letter, type any other key to return");
}
refresh:
mon_refresh_display(NULL);
return TRUE;
}
#endif
// Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag
static void
avoid_zombies()
{
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
if (sigemptyset(&sa.sa_mask) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno));
return;
}
sa.sa_handler = SIG_IGN;
sa.sa_flags = SA_RESTART|SA_NOCLDWAIT;
if (sigaction(SIGCHLD, &sa, NULL) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno));
}
}
int
main(int argc, char **argv)
{
int flag;
int argerr = 0;
int option_index = 0;
int rc = pcmk_ok;
pid_file = strdup("/tmp/ClusterMon.pid");
crm_log_cli_init("crm_mon");
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
// Avoid needing to wait for subprocesses forked for -E/--external-agent
avoid_zombies();
if (crm_ends_with_ext(argv[0], ".cgi") == TRUE) {
output_format = mon_output_cgi;
one_shot = TRUE;
}
/* to enable stonith-connection when called via some application like pcs
* set environment-variable FENCE_HISTORY to desired level
* so you don't have to modify this application
*/
/* fence_history_level = crm_atoi(getenv("FENCE_HISTORY"), "0"); */
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'Q':
show &= ~mon_show_times;
break;
case 'i':
reconnect_msec = crm_get_msec(optarg);
break;
case 'n':
group_by_node = TRUE;
break;
case 'r':
inactive_resources = TRUE;
break;
case 'W':
watch_fencing = TRUE;
fence_connect = TRUE;
break;
case 'm':
fence_history_level = crm_atoi(optarg, "2");
break;
case 'd':
daemonize = TRUE;
break;
case 't':
print_timing = TRUE;
show |= mon_show_operations;
break;
case 'o':
show |= mon_show_operations;
break;
case 'f':
show |= mon_show_failcounts;
break;
case 'A':
show |= mon_show_attributes;
break;
case 'L':
show |= mon_show_bans;
print_neg_location_prefix = optarg? optarg : "";
break;
case 'D':
show &= ~mon_show_headers;
break;
case 'b':
print_brief = TRUE;
break;
case 'j':
print_pending = TRUE;
break;
case 'R':
print_clone_detail = TRUE;
break;
case 'c':
show |= mon_show_tickets;
break;
case 'p':
free(pid_file);
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
pid_file = strdup(optarg);
break;
case 'x':
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
setenv("CIB_file", optarg, 1);
one_shot = TRUE;
break;
case 'h':
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
argerr += (output_format != mon_output_console);
output_format = mon_output_html;
output_filename = strdup(optarg);
umask(S_IWGRP | S_IWOTH);
break;
case 'X':
argerr += (output_format != mon_output_console);
output_format = mon_output_xml;
one_shot = TRUE;
break;
case 'w':
/* do not allow argv[0] and argv[1...] redundancy */
argerr += (output_format != mon_output_console);
output_format = mon_output_cgi;
one_shot = TRUE;
break;
case 's':
argerr += (output_format != mon_output_console);
output_format = mon_output_monitor;
one_shot = TRUE;
break;
case 'E':
external_agent = optarg;
break;
case 'e':
external_recipient = optarg;
break;
case '1':
one_shot = TRUE;
break;
case 'N':
if (output_format == mon_output_console) {
output_format = mon_output_plain;
}
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (watch_fencing) {
/* don't moan as fence_history_level == 1 is default */
fence_history_level = 0;
}
/* create the cib-object early to be able to do further
* decisions based on the cib-source
*/
cib = cib_new();
if (cib == NULL) {
rc = -EINVAL;
} else {
switch (cib->variant) {
case cib_native:
/* cib & fencing - everything available */
break;
case cib_file:
/* Don't try to connect to fencing as we
* either don't have a running cluster or
* the fencing-information would possibly
* not match the cib data from a file.
* As we don't expect cib-updates coming
* in enforce one-shot. */
fence_history_level = 0;
one_shot = TRUE;
break;
case cib_remote:
/* updates coming in but no fencing */
fence_history_level = 0;
break;
case cib_undefined:
case cib_database:
default:
/* something is odd */
rc = -EINVAL;
crm_err("Invalid cib-source");
break;
}
}
switch (fence_history_level) {
case 3:
fence_full_history = TRUE;
/* fall through to next lower level */
case 2:
show |= mon_show_fence_history;
/* fall through to next lower level */
case 1:
fence_history = TRUE;
fence_connect = TRUE;
break;
default:
break;
}
/* Extra sanity checks when in CGI mode */
if (output_format == mon_output_cgi) {
argerr += (optind < argc);
argerr += (output_filename != NULL);
argerr += ((cib) && (cib->variant == cib_file));
argerr += (external_agent != NULL);
argerr += (daemonize == TRUE); /* paranoia */
} else if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
return clean_up(CRM_EX_USAGE);
}
/* XML output always prints everything */
if (output_format == mon_output_xml) {
show = mon_show_all;
print_timing = TRUE;
}
if (one_shot) {
if (output_format == mon_output_console) {
output_format = mon_output_plain;
}
} else if (daemonize) {
if ((output_format == mon_output_console) || (output_format == mon_output_plain)) {
output_format = mon_output_none;
}
crm_enable_stderr(FALSE);
if ((output_format != mon_output_html)
&& !external_agent) {
printf ("Looks like you forgot to specify one or more of: "
"--as-html, --external-agent\n");
return clean_up(CRM_EX_USAGE);
}
if (cib) {
/* to be on the safe side don't have cib-object around
* when we are forking
*/
cib_delete(cib);
cib = NULL;
crm_make_daemon(crm_system_name, TRUE, pid_file);
cib = cib_new();
if (cib == NULL) {
rc = -EINVAL;
}
/* otherwise assume we've got the same cib-object we've just destroyed
* in our parent
*/
}
} else if (output_format == mon_output_console) {
#if CURSES_ENABLED
initscr();
cbreak();
noecho();
crm_enable_stderr(FALSE);
curses_console_initialized = TRUE;
#else
one_shot = TRUE;
output_format = mon_output_plain;
printf("Defaulting to one-shot mode\n");
printf("You need to have curses available at compile time to enable console mode\n");
#endif
}
crm_info("Starting %s", crm_system_name);
if (cib) {
do {
if (!one_shot) {
print_as("Waiting until cluster is available on this node ...\n");
}
rc = cib_connect(!one_shot);
if (one_shot) {
break;
} else if (rc != pcmk_ok) {
sleep(reconnect_msec / 1000);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
} else {
if (output_format == mon_output_html) {
print_as("Writing html to %s ...\n", output_filename);
}
}
} while (rc == -ENOTCONN);
}
if (rc != pcmk_ok) {
if (output_format == mon_output_monitor) {
printf("CLUSTER CRIT: Connection to cluster failed: %s\n",
pcmk_strerror(rc));
return clean_up(MON_STATUS_CRIT);
} else {
if (rc == -ENOTCONN) {
print_as("\nError: cluster is not available on this node\n");
} else {
print_as("\nConnection to cluster failed: %s\n",
pcmk_strerror(rc));
}
}
if (output_format == mon_output_console) {
sleep(2);
}
return clean_up(crm_errno2exit(rc));
}
if (one_shot) {
return clean_up(CRM_EX_OK);
}
mainloop = g_main_loop_new(NULL, FALSE);
mainloop_add_signal(SIGTERM, mon_shutdown);
mainloop_add_signal(SIGINT, mon_shutdown);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize);
if (ncurses_winch_handler == SIG_DFL ||
ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
ncurses_winch_handler = NULL;
g_io_add_watch(g_io_channel_unix_new(STDIN_FILENO), G_IO_IN, detect_user_input, NULL);
}
#endif
refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
crm_info("Exiting %s", crm_system_name);
return clean_up(CRM_EX_OK);
}
#define mon_warn(fmt...) do { \
if (!has_warnings) { \
print_as("CLUSTER WARN:"); \
} else { \
print_as(","); \
} \
print_as(fmt); \
has_warnings = TRUE; \
} while(0)
static int
count_resources(pe_working_set_t * data_set, resource_t * rsc)
{
int count = 0;
GListPtr gIter = NULL;
if (rsc == NULL) {
gIter = data_set->resources;
} else if (rsc->children) {
gIter = rsc->children;
} else {
return is_not_set(rsc->flags, pe_rsc_orphan);
}
for (; gIter != NULL; gIter = gIter->next) {
count += count_resources(data_set, gIter->data);
}
return count;
}
/*!
* \internal
* \brief Print one-line status suitable for use with monitoring software
*
* \param[in] data_set Working set of CIB state
* \param[in] history List of stonith actions
*
* \note This function's output (and the return code when the program exits)
* should conform to https://www.monitoring-plugins.org/doc/guidelines.html
*/
static void
print_simple_status(pe_working_set_t * data_set,
stonith_history_t *history)
{
GListPtr gIter = NULL;
int nodes_online = 0;
int nodes_standby = 0;
int nodes_maintenance = 0;
if (data_set->dc_node == NULL) {
mon_warn(" No DC");
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node->details->standby && node->details->online) {
nodes_standby++;
} else if (node->details->maintenance && node->details->online) {
nodes_maintenance++;
} else if (node->details->online) {
nodes_online++;
} else {
mon_warn(" offline node: %s", node->details->uname);
}
}
if (!has_warnings) {
int nresources = count_resources(data_set, NULL);
print_as("CLUSTER OK: %d node%s online", nodes_online, s_if_plural(nodes_online));
if (nodes_standby > 0) {
print_as(", %d standby node%s", nodes_standby, s_if_plural(nodes_standby));
}
if (nodes_maintenance > 0) {
print_as(", %d maintenance node%s", nodes_maintenance, s_if_plural(nodes_maintenance));
}
print_as(", %d resource%s configured", nresources, s_if_plural(nresources));
}
print_as("\n");
}
/*!
* \internal
* \brief Print a [name]=[value][units] pair, optionally using time string
*
* \param[in] stream File stream to display output to
* \param[in] name Name to display
* \param[in] value Value to display (or NULL to convert time instead)
* \param[in] units Units to display (or NULL for no units)
* \param[in] epoch_time Epoch time to convert if value is NULL
*/
static void
print_nvpair(FILE *stream, const char *name, const char *value,
const char *units, time_t epoch_time)
{
/* print name= */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" %s=", name);
break;
case mon_output_html:
case mon_output_cgi:
case mon_output_xml:
fprintf(stream, " %s=", name);
break;
default:
break;
}
/* If we have a value (and optionally units), print it */
if (value) {
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("%s%s", value, (units? units : ""));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "%s%s", value, (units? units : ""));
break;
case mon_output_xml:
fprintf(stream, "\"%s%s\"", value, (units? units : ""));
break;
default:
break;
}
/* Otherwise print user-friendly time string */
} else {
static char empty_str[] = "";
char *c, *date_str = asctime(localtime(&epoch_time));
for (c = (date_str != NULL) ? date_str : empty_str; *c != '\0'; ++c) {
if (*c == '\n') {
*c = '\0';
break;
}
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("'%s'", date_str);
break;
case mon_output_html:
case mon_output_cgi:
case mon_output_xml:
fprintf(stream, "\"%s\"", date_str);
break;
default:
break;
}
}
}
/*!
* \internal
* \brief Print whatever is needed to start a node section
*
* \param[in] stream File stream to display output to
* \param[in] node Node to print
*/
static void
print_node_start(FILE *stream, node_t *node)
{
char *node_name;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
node_name = get_node_display_name(node);
print_as("* Node %s:\n", node_name);
free(node_name);
break;
case mon_output_html:
case mon_output_cgi:
node_name = get_node_display_name(node);
fprintf(stream, " Node: %s
\n \n", node_name);
free(node_name);
break;
case mon_output_xml:
fprintf(stream, " \n", node->details->uname);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever is needed to end a node section
*
* \param[in] stream File stream to display output to
*/
static void
print_node_end(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print resources section heading appropriate to options
*
* \param[in] stream File stream to display output to
*/
static void
print_resources_heading(FILE *stream)
{
const char *heading;
if (group_by_node) {
/* Active resources have already been printed by node */
heading = (inactive_resources? "Inactive resources" : NULL);
} else if (inactive_resources) {
heading = "Full list of resources";
} else {
heading = "Active resources";
}
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n%s:\n\n", heading);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n %s
\n", heading);
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever resource section closing is appropriate
*
* \param[in] stream File stream to display output to
*/
static void
print_resources_closing(FILE *stream, gboolean printed_heading)
{
const char *heading;
/* What type of resources we did or did not display */
if (group_by_node) {
heading = "inactive ";
} else if (inactive_resources) {
heading = "";
} else {
heading = "active ";
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (!printed_heading) {
print_as("\nNo %sresources\n\n", heading);
}
break;
case mon_output_html:
case mon_output_cgi:
if (!printed_heading) {
fprintf(stream, "
\n No %sresources
\n", heading);
}
break;
case mon_output_xml:
fprintf(stream, " %s\n",
(printed_heading? "" : ""));
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever resource section(s) are appropriate
*
* \param[in] stream File stream to display output to
* \param[in] data_set Cluster state to display
* \param[in] print_opts Bitmask of pe_print_options
*/
static void
print_resources(FILE *stream, pe_working_set_t *data_set, int print_opts)
{
GListPtr rsc_iter;
const char *prefix = NULL;
gboolean printed_heading = FALSE;
gboolean brief_output = print_brief;
/* If we already showed active resources by node, and
* we're not showing inactive resources, we have nothing to do
*/
if (group_by_node && !inactive_resources) {
return;
}
/* XML uses an indent, and ignores brief option for resources */
if (output_format == mon_output_xml) {
prefix = " ";
brief_output = FALSE;
}
/* If we haven't already printed resources grouped by node,
* and brief output was requested, print resource summary */
if (brief_output && !group_by_node) {
print_resources_heading(stream);
printed_heading = TRUE;
print_rscs_brief(data_set->resources, NULL, print_opts, stream,
inactive_resources);
}
/* For each resource, display it if appropriate */
for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) {
resource_t *rsc = (resource_t *) rsc_iter->data;
/* Complex resources may have some sub-resources active and some inactive */
gboolean is_active = rsc->fns->active(rsc, TRUE);
gboolean partially_active = rsc->fns->active(rsc, FALSE);
/* Skip inactive orphans (deleted but still in CIB) */
if (is_set(rsc->flags, pe_rsc_orphan) && !is_active) {
continue;
/* Skip active resources if we already displayed them by node */
} else if (group_by_node) {
if (is_active) {
continue;
}
/* Skip primitives already counted in a brief summary */
} else if (brief_output && (rsc->variant == pe_native)) {
continue;
/* Skip resources that aren't at least partially active,
* unless we're displaying inactive resources
*/
} else if (!partially_active && !inactive_resources) {
continue;
}
/* Print this resource */
if (printed_heading == FALSE) {
print_resources_heading(stream);
printed_heading = TRUE;
}
rsc->fns->print(rsc, prefix, print_opts, stream);
}
print_resources_closing(stream, printed_heading);
}
/*!
* \internal
* \brief Print heading for resource history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node that ran this resource
* \param[in] rsc Resource to print
* \param[in] rsc_id ID of resource to print
* \param[in] all Whether to print every resource or just failed ones
*/
static void
print_rsc_history_start(FILE *stream, pe_working_set_t *data_set, node_t *node,
resource_t *rsc, const char *rsc_id, gboolean all)
{
time_t last_failure = 0;
int failcount = rsc?
pe_get_failcount(node, rsc, &last_failure, pe_fc_default,
NULL, data_set)
: 0;
if (!all && !failcount && (last_failure <= 0)) {
return;
}
/* Print resource ID */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" %s:", rsc_id);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s:", rsc_id);
break;
case mon_output_xml:
fprintf(stream, " 0)) {
/* Print migration threshold */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" migration-threshold=%d", rsc->migration_threshold);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " migration-threshold=%d", rsc->migration_threshold);
break;
case mon_output_xml:
fprintf(stream, " orphan=\"false\" migration-threshold=\"%d\"",
rsc->migration_threshold);
break;
default:
break;
}
/* Print fail count if any */
if (failcount > 0) {
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" " CRM_FAIL_COUNT_PREFIX "=%d", failcount);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " " CRM_FAIL_COUNT_PREFIX "=%d", failcount);
break;
case mon_output_xml:
fprintf(stream, " " CRM_FAIL_COUNT_PREFIX "=\"%d\"",
failcount);
break;
default:
break;
}
}
/* Print last failure time if any */
if (last_failure > 0) {
print_nvpair(stream, CRM_LAST_FAILURE_PREFIX, NULL, NULL,
last_failure);
}
}
/* End the heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "\n \n");
break;
case mon_output_xml:
fprintf(stream, ">\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print closing for resource history
*
* \param[in] stream File stream to display output to
*/
static void
print_rsc_history_end(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print operation history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node this operation is for
* \param[in] xml_op Root of XML tree describing this operation
* \param[in] task Task parsed from this operation's XML
* \param[in] interval_ms_s Interval parsed from this operation's XML
* \param[in] rc Return code parsed from this operation's XML
*/
static void
print_op_history(FILE *stream, pe_working_set_t *data_set, node_t *node,
xmlNode *xml_op, const char *task, const char *interval_ms_s,
int rc)
{
const char *value = NULL;
const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
/* Begin the operation description */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" + (%s) %s:", call, task);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " (%s) %s:", call, task);
break;
case mon_output_xml:
fprintf(stream, " 0) {
print_nvpair(stream, attr, NULL, NULL, int_value);
}
}
attr = XML_RSC_OP_LAST_RUN;
value = crm_element_value(xml_op, attr);
if (value) {
int_value = crm_parse_int(value, NULL);
if (int_value > 0) {
print_nvpair(stream, attr, NULL, NULL, int_value);
}
}
attr = XML_RSC_OP_T_EXEC;
value = crm_element_value(xml_op, attr);
if (value) {
print_nvpair(stream, attr, value, "ms", 0);
}
attr = XML_RSC_OP_T_QUEUE;
value = crm_element_value(xml_op, attr);
if (value) {
print_nvpair(stream, attr, value, "ms", 0);
}
}
/* End the operation description */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc));
break;
case mon_output_xml:
fprintf(stream, " rc=\"%d\" rc_text=\"%s\" />\n", rc, services_ocf_exitcode_str(rc));
break;
default:
break;
}
}
/*!
* \internal
* \brief Print resource operation/failure history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node that ran this resource
* \param[in] rsc_entry Root of XML tree describing resource status
* \param[in] operations Whether to print operations or just failcounts
*/
static void
print_rsc_history(FILE *stream, pe_working_set_t *data_set, node_t *node,
xmlNode *rsc_entry, gboolean operations)
{
GListPtr gIter = NULL;
GListPtr op_list = NULL;
gboolean printed = FALSE;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = pe_find_resource(data_set->resources, rsc_id);
xmlNode *rsc_op = NULL;
/* If we're not showing operations, just print the resource failure summary */
if (operations == FALSE) {
print_rsc_history_start(stream, data_set, node, rsc, rsc_id, FALSE);
print_rsc_history_end(stream);
return;
}
/* Create a list of this resource's operations */
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_append(op_list, rsc_op);
}
}
op_list = g_list_sort(op_list, sort_op_by_callid);
/* Print each operation */
for (gIter = op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *xml_op = (xmlNode *) gIter->data;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *interval_ms_s = crm_element_value(xml_op,
XML_LRM_ATTR_INTERVAL_MS);
const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC);
int rc = crm_parse_int(op_rc, "0");
/* Display 0-interval monitors as "probe" */
if (safe_str_eq(task, CRMD_ACTION_STATUS)
&& ((interval_ms_s == NULL) || safe_str_eq(interval_ms_s, "0"))) {
task = "probe";
}
/* Ignore notifies and some probes */
if (safe_str_eq(task, CRMD_ACTION_NOTIFY) || (safe_str_eq(task, "probe") && (rc == 7))) {
continue;
}
/* If this is the first printed operation, print heading for resource */
if (printed == FALSE) {
printed = TRUE;
print_rsc_history_start(stream, data_set, node, rsc, rsc_id, TRUE);
}
/* Print the operation */
print_op_history(stream, data_set, node, xml_op, task, interval_ms_s,
rc);
}
/* Free the list we created (no need to free the individual items) */
g_list_free(op_list);
/* If we printed anything, close the resource */
if (printed) {
print_rsc_history_end(stream);
}
}
/*!
* \internal
* \brief Print node operation/failure history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node_state Root of XML tree describing node status
* \param[in] operations Whether to print operations or just failcounts
*/
static void
print_node_history(FILE *stream, pe_working_set_t *data_set,
xmlNode *node_state, gboolean operations)
{
node_t *node = pe_find_node_id(data_set->nodes, ID(node_state));
xmlNode *lrm_rsc = NULL;
xmlNode *rsc_entry = NULL;
if (node && node->details && node->details->online) {
print_node_start(stream, node);
lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
/* Print history of each of the node's resources */
for (rsc_entry = __xml_first_child(lrm_rsc); rsc_entry != NULL;
rsc_entry = __xml_next(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
print_rsc_history(stream, data_set, node, rsc_entry, operations);
}
}
print_node_end(stream);
}
}
/*!
* \internal
* \brief Print extended information about an attribute if appropriate
*
* \param[in] data_set Working set of CIB state
*
* \return TRUE if extended information was printed, FALSE otherwise
* \note Currently, extended information is only supported for ping/pingd
* resources, for which a message will be printed if connectivity is lost
* or degraded.
*/
static gboolean
print_attr_msg(FILE *stream, node_t * node, GListPtr rsc_list, const char *attrname, const char *attrvalue)
{
GListPtr gIter = NULL;
for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
const char *type = g_hash_table_lookup(rsc->meta, "type");
if (rsc->children != NULL) {
if (print_attr_msg(stream, node, rsc->children, attrname, attrvalue)) {
return TRUE;
}
}
if (safe_str_eq(type, "ping") || safe_str_eq(type, "pingd")) {
const char *name = g_hash_table_lookup(rsc->parameters, "name");
if (name == NULL) {
name = "pingd";
}
/* To identify the resource with the attribute name. */
if (safe_str_eq(name, attrname)) {
int host_list_num = 0;
int expected_score = 0;
int value = crm_parse_int(attrvalue, "0");
const char *hosts = g_hash_table_lookup(rsc->parameters, "host_list");
const char *multiplier = g_hash_table_lookup(rsc->parameters, "multiplier");
if(hosts) {
char **host_list = g_strsplit(hosts, " ", 0);
host_list_num = g_strv_length(host_list);
g_strfreev(host_list);
}
/* pingd multiplier is the same as the default value. */
expected_score = host_list_num * crm_parse_int(multiplier, "1");
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (value <= 0) {
print_as("\t: Connectivity is lost");
} else if (value < expected_score) {
print_as("\t: Connectivity is degraded (Expected=%d)", expected_score);
}
break;
case mon_output_html:
case mon_output_cgi:
if (value <= 0) {
fprintf(stream, " (connectivity is lost)");
} else if (value < expected_score) {
fprintf(stream, " (connectivity is degraded -- expected %d)",
expected_score);
}
break;
case mon_output_xml:
fprintf(stream, " expected=\"%d\"", expected_score);
break;
default:
break;
}
return TRUE;
}
}
}
return FALSE;
}
static int
compare_attribute(gconstpointer a, gconstpointer b)
{
int rc;
rc = strcmp((const char *)a, (const char *)b);
return rc;
}
static void
create_attr_list(gpointer name, gpointer value, gpointer data)
{
int i;
const char *filt_str[] = FILTER_STR;
CRM_CHECK(name != NULL, return);
/* filtering automatic attributes */
for (i = 0; filt_str[i] != NULL; i++) {
if (g_str_has_prefix(name, filt_str[i])) {
return;
}
}
attr_list = g_list_insert_sorted(attr_list, name, compare_attribute);
}
/* structure for passing multiple user data to g_list_foreach() */
struct mon_attr_data {
FILE *stream;
node_t *node;
};
static void
print_node_attribute(gpointer name, gpointer user_data)
{
const char *value = NULL;
struct mon_attr_data *data = (struct mon_attr_data *) user_data;
value = pe_node_attribute_raw(data->node, name);
/* Print attribute name and value */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" + %-32s\t: %-10s", (char *)name, value);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->stream, " %s: %s",
(char *)name, value);
break;
case mon_output_xml:
fprintf(data->stream,
" stream, data->node, data->node->details->running_rsc,
name, value);
/* Close out the attribute */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->stream, "\n");
break;
case mon_output_xml:
fprintf(data->stream, " />\n");
break;
default:
break;
}
}
static void
print_node_summary(FILE *stream, pe_working_set_t * data_set, gboolean operations)
{
xmlNode *node_state = NULL;
xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
/* Print heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (operations) {
print_as("\nOperations:\n");
} else {
print_as("\nMigration Summary:\n");
}
break;
case mon_output_html:
case mon_output_cgi:
if (operations) {
fprintf(stream, "
\n Operations
\n");
} else {
fprintf(stream, "
\n Migration Summary
\n");
}
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each node in the CIB status */
for (node_state = __xml_first_child(cib_status); node_state != NULL;
node_state = __xml_next(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
print_node_history(stream, data_set, node_state, operations);
}
}
/* Close section */
switch (output_format) {
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
static void
print_ticket(gpointer name, gpointer value, gpointer data)
{
ticket_t *ticket = (ticket_t *) value;
FILE *stream = (FILE *) data;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("* %s:\t%s%s", ticket->id,
(ticket->granted? "granted" : "revoked"),
(ticket->standby? " [standby]" : ""));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s: %s%s", ticket->id,
(ticket->granted? "granted" : "revoked"),
(ticket->standby? " [standby]" : ""));
break;
case mon_output_xml:
fprintf(stream, " id, (ticket->granted? "granted" : "revoked"),
(ticket->standby? "true" : "false"));
break;
default:
break;
}
if (ticket->last_granted > -1) {
print_nvpair(stdout, "last-granted", NULL, NULL, ticket->last_granted);
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "\n");
break;
case mon_output_xml:
fprintf(stream, " />\n");
break;
default:
break;
}
}
static void
print_cluster_tickets(FILE *stream, pe_working_set_t * data_set)
{
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nTickets:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Tickets
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each ticket */
g_hash_table_foreach(data_set->tickets, print_ticket, stream);
/* Close section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Return human-friendly string representing node name
*
* The returned string will be in the format
* uname[@hostUname] [(nodeID)]
* "@hostUname" will be printed if the node is a guest node.
* "(nodeID)" will be printed if the node ID is different from the node uname,
* and detailed output has been requested.
*
* \param[in] node Node to represent
* \return Newly allocated string with representation of node name
* \note It is the caller's responsibility to free the result with free().
*/
static char *
get_node_display_name(node_t *node)
{
char *node_name;
const char *node_host = NULL;
const char *node_id = NULL;
int name_len;
CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL));
/* Host is displayed only if this is a guest node */
if (pe__is_guest_node(node)) {
node_t *host_node = pe__current_node(node->details->remote_rsc);
if (host_node && host_node->details) {
node_host = host_node->details->uname;
}
if (node_host == NULL) {
node_host = ""; /* so we at least get "uname@" to indicate guest */
}
}
/* Node ID is displayed if different from uname and detail is requested */
if (print_clone_detail && safe_str_neq(node->details->uname, node->details->id)) {
node_id = node->details->id;
}
/* Determine name length */
name_len = strlen(node->details->uname) + 1;
if (node_host) {
name_len += strlen(node_host) + 1; /* "@node_host" */
}
if (node_id) {
name_len += strlen(node_id) + 3; /* + " (node_id)" */
}
/* Allocate and populate display name */
node_name = malloc(name_len);
CRM_ASSERT(node_name != NULL);
strcpy(node_name, node->details->uname);
if (node_host) {
strcat(node_name, "@");
strcat(node_name, node_host);
}
if (node_id) {
strcat(node_name, " (");
strcat(node_name, node_id);
strcat(node_name, ")");
}
return node_name;
}
/*!
* \internal
* \brief Print a negative location constraint
*
* \param[in] stream File stream to display output to
* \param[in] node Node affected by constraint
* \param[in] location Constraint to print
*/
static void
print_ban(FILE *stream, pe_node_t *node, pe__location_t *location)
{
char *node_name = NULL;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
node_name = get_node_display_name(node);
print_as(" %s\tprevents %s from running %son %s\n",
location->id, location->rsc_lh->id,
((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""),
node_name);
break;
case mon_output_html:
case mon_output_cgi:
node_name = get_node_display_name(node);
fprintf(stream, " %s prevents %s from running %son %s\n",
location->id, location->rsc_lh->id,
((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""),
node_name);
break;
case mon_output_xml:
fprintf(stream,
" \n",
location->id, location->rsc_lh->id, node->details->uname, node->weight,
((location->role_filter == RSC_ROLE_MASTER)? "true" : "false"));
break;
default:
break;
}
free(node_name);
}
/*!
* \internal
* \brief Print section for negative location constraints
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set corresponding to CIB status to display
*/
static void
print_neg_locations(FILE *stream, pe_working_set_t *data_set)
{
GListPtr gIter, gIter2;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nNegative Location Constraints:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Negative Location Constraints
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each ban */
for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
pe__location_t *location = gIter->data;
if (!g_str_has_prefix(location->id, print_neg_location_prefix))
continue;
for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) {
node_t *node = (node_t *) gIter2->data;
if (node->weight < 0) {
print_ban(stream, node, location);
}
}
}
/* Close section */
switch (output_format) {
case mon_output_cgi:
case mon_output_html:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
static void
crm_mon_get_parameters(resource_t *rsc, pe_working_set_t * data_set)
{
get_rsc_attributes(rsc->parameters, rsc, NULL, data_set);
crm_trace("Beekhof: unpacked params for %s (%d)", rsc->id, g_hash_table_size(rsc->parameters));
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
crm_mon_get_parameters(gIter->data, data_set);
}
}
}
/*!
* \internal
* \brief Print node attributes section
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_node_attributes(FILE *stream, pe_working_set_t *data_set)
{
GListPtr gIter = NULL;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nNode Attributes:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Node Attributes
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Unpack all resource parameters (it would be more efficient to do this
* only when needed for the first time in print_attr_msg())
*/
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
crm_mon_get_parameters(gIter->data, data_set);
}
/* Display each node's attributes */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
struct mon_attr_data data;
data.stream = stream;
data.node = (node_t *) gIter->data;
if (data.node && data.node->details && data.node->details->online) {
print_node_start(stream, data.node);
g_hash_table_foreach(data.node->details->attrs, create_attr_list, NULL);
g_list_foreach(attr_list, print_node_attribute, &data);
g_list_free(attr_list);
attr_list = NULL;
print_node_end(stream);
}
}
/* Print section footer */
switch (output_format) {
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Return resource display options corresponding to command-line choices
*
* \return Bitmask of pe_print_options suitable for resource print functions
*/
static int
get_resource_display_options(void)
{
int print_opts;
/* Determine basic output format */
switch (output_format) {
case mon_output_console:
print_opts = pe_print_ncurses;
break;
case mon_output_html:
case mon_output_cgi:
print_opts = pe_print_html;
break;
case mon_output_xml:
print_opts = pe_print_xml;
break;
default:
print_opts = pe_print_printf;
break;
}
/* Add optional display elements */
if (print_pending) {
print_opts |= pe_print_pending;
}
if (print_clone_detail) {
print_opts |= pe_print_clone_details|pe_print_implicit;
}
if (!inactive_resources) {
print_opts |= pe_print_clone_active;
}
if (print_brief) {
print_opts |= pe_print_brief;
}
return print_opts;
}
/*!
* \internal
* \brief Print header for cluster summary if needed
*
* \param[in] stream File stream to display output to
*/
static void
print_cluster_summary_header(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Cluster Summary
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print footer for cluster summary if needed
*
* \param[in] stream File stream to display output to
*/
static void
print_cluster_summary_footer(FILE *stream)
{
switch (output_format) {
case mon_output_cgi:
case mon_output_html:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print times the display was last updated and CIB last changed
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_times(FILE *stream, pe_working_set_t *data_set)
{
const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN);
const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER);
const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT);
const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG);
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Last updated: %s", crm_now_string());
print_as((user || client || origin)? "\n" : "\t\t");
print_as("Last change: %s", last_written ? last_written : "");
if (user) {
print_as(" by %s", user);
}
if (client) {
print_as(" via %s", client);
}
if (origin) {
print_as(" on %s", origin);
}
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Last updated: %s
\n", crm_now_string());
fprintf(stream, " Last change: %s", last_written ? last_written : "");
if (user) {
fprintf(stream, " by %s", user);
}
if (client) {
fprintf(stream, " via %s", client);
}
if (origin) {
fprintf(stream, " on %s", origin);
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n", crm_now_string());
fprintf(stream, " \n",
last_written ? last_written : "", user ? user : "",
client ? client : "", origin ? origin : "");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster stack
*
* \param[in] stream File stream to display output to
* \param[in] stack_s Stack name
*/
static void
print_cluster_stack(FILE *stream, const char *stack_s)
{
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Stack: %s\n", stack_s);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Stack: %s
\n", stack_s);
break;
case mon_output_xml:
fprintf(stream, " \n", stack_s);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print current DC and its version
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_dc(FILE *stream, pe_working_set_t *data_set)
{
node_t *dc = data_set->dc_node;
xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']",
data_set->input, LOG_DEBUG);
const char *dc_version_s = dc_version?
crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE)
: NULL;
const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM);
char *dc_name = dc? get_node_display_name(dc) : NULL;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Current DC: ");
if (dc) {
print_as("%s (version %s) - partition %s quorum\n",
dc_name, (dc_version_s? dc_version_s : "unknown"),
(crm_is_true(quorum) ? "with" : "WITHOUT"));
} else {
print_as("NONE\n");
}
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Current DC: ");
if (dc) {
fprintf(stream, "%s (version %s) - partition %s quorum",
dc_name, (dc_version_s? dc_version_s : "unknown"),
(crm_is_true(quorum)? "with" : "WITHOUT"));
} else {
fprintf(stream, "NONE");
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " details->uname, dc->details->id,
(crm_is_true(quorum) ? "true" : "false"));
} else {
fprintf(stream, "present=\"false\"");
}
fprintf(stream, " />\n");
break;
default:
break;
}
free(dc_name);
}
/*!
* \internal
* \brief Print counts of configured nodes and resources
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
* \param[in] stack_s Stack name
*/
static void
print_cluster_counts(FILE *stream, pe_working_set_t *data_set, const char *stack_s)
{
int nnodes = g_list_length(data_set->nodes);
int nresources = count_resources(data_set, NULL);
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n%d node%s configured\n", nnodes, s_if_plural(nnodes));
print_as("%d resource%s configured",
nresources, s_if_plural(nresources));
if(data_set->disabled_resources || data_set->blocked_resources) {
print_as(" (");
if (data_set->disabled_resources) {
print_as("%d DISABLED", data_set->disabled_resources);
}
if (data_set->disabled_resources && data_set->blocked_resources) {
print_as(", ");
}
if (data_set->blocked_resources) {
print_as("%d BLOCKED from starting due to failure",
data_set->blocked_resources);
}
print_as(")");
}
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %d node%s configured
\n",
nnodes, s_if_plural(nnodes));
fprintf(stream, " %d resource%s configured",
nresources, s_if_plural(nresources));
if (data_set->disabled_resources || data_set->blocked_resources) {
fprintf(stream, " (");
if (data_set->disabled_resources) {
fprintf(stream, "%d DISABLED",
data_set->disabled_resources);
}
if (data_set->disabled_resources && data_set->blocked_resources) {
fprintf(stream, ", ");
}
if (data_set->blocked_resources) {
fprintf(stream,
"%d BLOCKED from starting due to failure",
data_set->blocked_resources);
}
fprintf(stream, ")");
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream,
" \n",
g_list_length(data_set->nodes));
fprintf(stream,
" \n",
count_resources(data_set, NULL),
data_set->disabled_resources, data_set->blocked_resources);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster-wide options
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*
* \note Currently this is only implemented for HTML and XML output, and
* prints only a few options. If there is demand, more could be added.
*/
static void
print_cluster_options(FILE *stream, pe_working_set_t *data_set)
{
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
print_as("\n *** Resource management is DISABLED ***");
print_as("\n The cluster will not attempt to start, stop or recover services");
print_as("\n");
}
break;
case mon_output_html:
fprintf(stream, "
\n Config Options
\n");
fprintf(stream, " \n");
fprintf(stream, " STONITH of failed nodes | %s |
\n",
is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled");
fprintf(stream, " Cluster is | %ssymmetric |
\n",
is_set(data_set->flags, pe_flag_symmetric_cluster)? "" : "a");
fprintf(stream, " No Quorum Policy | ");
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
fprintf(stream, "Freeze resources");
break;
case no_quorum_stop:
fprintf(stream, "Stop ALL resources");
break;
case no_quorum_ignore:
fprintf(stream, "Ignore");
break;
case no_quorum_suicide:
fprintf(stream, "Suicide");
break;
}
fprintf(stream, " |
\n");
fprintf(stream, " Resource management | ");
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
fprintf(stream, "DISABLED (the cluster will "
"not attempt to start, stop or recover services)");
} else {
fprintf(stream, "enabled");
}
fprintf(stream, " |
\n");
fprintf(stream, "
\n \n");
break;
case mon_output_xml:
fprintf(stream, " flags, pe_flag_stonith_enabled)?
"true" : "false");
fprintf(stream, " symmetric-cluster=\"%s\"",
is_set(data_set->flags, pe_flag_symmetric_cluster)?
"true" : "false");
fprintf(stream, " no-quorum-policy=\"");
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
fprintf(stream, "freeze");
break;
case no_quorum_stop:
fprintf(stream, "stop");
break;
case no_quorum_ignore:
fprintf(stream, "ignore");
break;
case no_quorum_suicide:
fprintf(stream, "suicide");
break;
}
fprintf(stream, "\"");
fprintf(stream, " maintenance-mode=\"%s\"",
is_set(data_set->flags, pe_flag_maintenance_mode)?
"true" : "false");
fprintf(stream, " />\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Get the name of the stack in use (or "unknown" if not available)
*
* \param[in] data_set Working set of CIB state
*
* \return String representing stack name
*/
static const char *
get_cluster_stack(pe_working_set_t *data_set)
{
xmlNode *stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']",
data_set->input, LOG_DEBUG);
return stack? crm_element_value(stack, XML_NVPAIR_ATTR_VALUE) : "unknown";
}
/*!
* \internal
* \brief Print a summary of cluster-wide information
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_summary(FILE *stream, pe_working_set_t *data_set)
{
const char *stack_s = get_cluster_stack(data_set);
gboolean header_printed = FALSE;
if (show & mon_show_stack) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_stack(stream, stack_s);
}
/* Always print DC if none, even if not requested */
if ((data_set->dc_node == NULL) || (show & mon_show_dc)) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_dc(stream, data_set);
}
if (show & mon_show_times) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_times(stream, data_set);
}
if (is_set(data_set->flags, pe_flag_maintenance_mode)
|| data_set->disabled_resources
|| data_set->blocked_resources
|| is_set(show, mon_show_count)) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_counts(stream, data_set, stack_s);
}
/* There is not a separate option for showing cluster options, so show with
* stack for now; a separate option could be added if there is demand
*/
if (show & mon_show_stack) {
print_cluster_options(stream, data_set);
}
if (header_printed) {
print_cluster_summary_footer(stream);
}
}
/*!
* \internal
* \brief Print a failed action
*
* \param[in] stream File stream to display output to
* \param[in] xml_op Root of XML tree describing failed action
*/
static void
print_failed_action(FILE *stream, xmlNode *xml_op)
{
const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
const char *op_key_attr = "op_key";
const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE);
const char *node = crm_element_value(xml_op, XML_ATTR_UNAME);
const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
int rc = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), "0");
int status = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), "0");
char *exit_reason_cleaned;
/* If no op_key was given, use id instead */
if (op_key == NULL) {
op_key = ID(xml_op);
op_key_attr = "id";
}
/* If no exit reason was given, use "none" */
if (exit_reason == NULL) {
exit_reason = "none";
}
/* Print common action information */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("* %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
op_key, node, services_ocf_exitcode_str(rc), rc,
call, services_lrm_status_str(status), exit_reason);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
op_key, node, services_ocf_exitcode_str(rc), rc,
call, services_lrm_status_str(status), exit_reason);
break;
case mon_output_xml:
exit_reason_cleaned = crm_xml_escape(exit_reason);
fprintf(stream, " \n");
break;
case mon_output_xml:
fprintf(stream, " />\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print a section for failed actions
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_failed_actions(FILE *stream, pe_working_set_t *data_set)
{
xmlNode *xml_op = NULL;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nFailed Resource Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream,
"
\n Failed Resource Actions
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each failed action */
for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
xml_op = __xml_next(xml_op)) {
print_failed_action(stream, xml_op);
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Reduce the stonith-history
* for successful actions we keep the last of every action-type & target
* for failed actions we record as well who had failed
* for actions in progress we keep full track
*
* \param[in] history List of stonith actions
*
*/
static stonith_history_t *
reduce_stonith_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
for (np = new; np; np = np->next) {
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* action not in progress */
if (safe_str_eq(hp->target, np->target) &&
safe_str_eq(hp->action, np->action) &&
(hp->state == np->state)) {
if ((hp->state == st_done) ||
safe_str_eq(hp->delegate, np->delegate)) {
/* replace or purge */
if (hp->completed < np->completed) {
/* purge older hp */
tmp = hp->next;
hp->next = NULL;
stonith_history_free(hp);
hp = tmp;
break;
}
/* damn single linked list */
free(hp->target);
free(hp->action);
free(np->origin);
np->origin = hp->origin;
free(np->delegate);
np->delegate = hp->delegate;
free(np->client);
np->client = hp->client;
np->completed = hp->completed;
tmp = hp;
hp = hp->next;
free(tmp);
break;
}
}
if (np->next) {
continue;
}
}
np = 0; /* let outer loop progress hp */
break;
}
/* simply move hp from history to new */
if (np == NULL) {
tmp = hp->next;
hp->next = new;
new = hp;
hp = tmp;
}
}
return new;
}
/*!
* \internal
* \brief Sort the stonith-history
* sort by competed most current on the top
* pending actions lacking a completed-stamp are gathered at the top
*
* \param[in] history List of stonith actions
*
*/
static stonith_history_t *
sort_stonith_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
tmp = hp->next;
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* sort into new */
if ((!new) || (hp->completed > new->completed)) {
hp->next = new;
new = hp;
} else {
np = new;
do {
if ((!np->next) || (hp->completed > np->next->completed)) {
hp->next = np->next;
np->next = hp;
break;
}
np = np->next;
} while (1);
}
} else {
/* put into pending */
hp->next = pending;
pending = hp;
}
hp = tmp;
}
/* pending actions don't have a completed-stamp so make them go front */
if (pending) {
stonith_history_t *last_pending = pending;
while (last_pending->next) {
last_pending = last_pending->next;
}
last_pending->next = new;
new = pending;
}
return new;
}
/*!
* \internal
* \brief Print a stonith action
*
* \param[in] stream File stream to display output to
* \param[in] event stonith event
*/
static void
print_stonith_action(FILE *stream, stonith_history_t *event)
{
const char *action_s = stonith_action_str(event->action);
char *run_at_s = ctime(&event->completed);
if ((run_at_s) && (run_at_s[0] != 0)) {
run_at_s[strlen(run_at_s)-1] = 0; /* Overwrite the newline */
}
switch(output_format) {
case mon_output_xml:
fprintf(stream, " target, event->action);
switch(event->state) {
case st_done:
fprintf(stream, " state=\"success\"");
break;
case st_failed:
fprintf(stream, " state=\"failed\"");
break;
default:
fprintf(stream, " state=\"pending\"");
}
fprintf(stream, " origin=\"%s\" client=\"%s\"",
event->origin, event->client);
if (event->delegate) {
fprintf(stream, " delegate=\"%s\"", event->delegate);
}
switch(event->state) {
case st_done:
case st_failed:
fprintf(stream, " completed=\"%s\"", run_at_s?run_at_s:"");
break;
default:
break;
}
fprintf(stream, " />\n");
break;
case mon_output_plain:
case mon_output_console:
switch(event->state) {
case st_done:
print_as("* %s of %s successful: delegate=%s, client=%s, origin=%s,\n"
" %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
fence_full_history?"completed":"last-successful",
run_at_s?run_at_s:"");
break;
case st_failed:
print_as("* %s of %s failed: delegate=%s, client=%s, origin=%s,\n"
" %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
fence_full_history?"completed":"last-failed",
run_at_s?run_at_s:"");
break;
default:
print_as("* %s of %s pending: client=%s, origin=%s\n",
action_s, event->target,
event->client, event->origin);
}
break;
case mon_output_html:
case mon_output_cgi:
switch(event->state) {
case st_done:
fprintf(stream, " %s of %s successful: delegate=%s, "
"client=%s, origin=%s, %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
fence_full_history?"completed":"last-successful",
run_at_s?run_at_s:"");
break;
case st_failed:
fprintf(stream, " %s of %s failed: delegate=%s, "
"client=%s, origin=%s, %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
fence_full_history?"completed":"last-failed",
run_at_s?run_at_s:"");
break;
default:
fprintf(stream, " %s of %s pending: client=%s, "
"origin=%s\n",
action_s, event->target,
event->client, event->origin);
}
break;
default:
/* no support for fence history for other formats so far */
break;
}
}
/*!
* \internal
* \brief Print a section for failed stonith actions
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_failed_stonith_actions(FILE *stream, stonith_history_t *history)
{
stonith_history_t *hp;
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_failed) {
break;
}
}
if (!hp) {
return;
}
/* Print section heading */
switch (output_format) {
/* no need to take care of xml in here as xml gets full
* history anyway
*/
case mon_output_plain:
case mon_output_console:
print_as("\nFailed Fencing Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Failed Fencing Actions
\n \n");
break;
default:
break;
}
/* Print each failed stonith action */
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_failed) {
print_stonith_action(stream, hp);
}
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print pending stonith actions
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_stonith_pending(FILE *stream, stonith_history_t *history)
{
/* xml-output always shows the full history
* so we'll never have to show pending-actions
* separately
*/
if (history && (history->state != st_failed) &&
(history->state != st_done)) {
stonith_history_t *hp;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nPending Fencing Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Pending Fencing Actions
\n \n");
break;
default:
break;
}
for (hp = history; hp; hp = hp->next) {
if ((hp->state == st_failed) || (hp->state == st_done)) {
break;
}
print_stonith_action(stream, hp);
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
default:
break;
}
}
}
/*!
* \internal
* \brief Print a section for stonith-history
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_stonith_history(FILE *stream, stonith_history_t *history)
{
stonith_history_t *hp;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nFencing History:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Fencing History
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
for (hp = history; hp; hp = hp->next) {
if ((hp->state != st_failed) || (output_format == mon_output_xml)) {
print_stonith_action(stream, hp);
}
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster status to screen
*
* This uses the global display preferences set by command-line options
* to display cluster status in a human-friendly way.
*
* \param[in] data_set Working set of CIB state
* \param[in] stonith_history List of stonith actions
*/
static void
print_status(pe_working_set_t * data_set,
stonith_history_t *stonith_history)
{
GListPtr gIter = NULL;
int print_opts = get_resource_display_options();
/* space-separated lists of node names */
char *online_nodes = NULL;
char *online_remote_nodes = NULL;
char *online_guest_nodes = NULL;
char *offline_nodes = NULL;
char *offline_remote_nodes = NULL;
if (output_format == mon_output_console) {
blank_screen();
}
print_cluster_summary(stdout, data_set);
print_as("\n");
/* Gather node information (and print if in bad state or grouping by node) */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_mode = NULL;
char *node_name = get_node_display_name(node);
/* Get node mode */
if (node->details->unclean) {
if (node->details->online) {
node_mode = "UNCLEAN (online)";
} else if (node->details->pending) {
node_mode = "UNCLEAN (pending)";
} else {
node_mode = "UNCLEAN (offline)";
}
} else if (node->details->pending) {
node_mode = "pending";
} else if (node->details->standby_onfail && node->details->online) {
node_mode = "standby (on-fail)";
} else if (node->details->standby) {
if (node->details->online) {
if (node->details->running_rsc) {
node_mode = "standby (with active resources)";
} else {
node_mode = "standby";
}
} else {
node_mode = "OFFLINE (standby)";
}
} else if (node->details->maintenance) {
if (node->details->online) {
node_mode = "maintenance";
} else {
node_mode = "OFFLINE (maintenance)";
}
} else if (node->details->online) {
node_mode = "online";
if (group_by_node == FALSE) {
if (pe__is_guest_node(node)) {
online_guest_nodes = add_list_element(online_guest_nodes, node_name);
} else if (pe__is_remote_node(node)) {
online_remote_nodes = add_list_element(online_remote_nodes, node_name);
} else {
online_nodes = add_list_element(online_nodes, node_name);
}
free(node_name);
continue;
}
} else {
node_mode = "OFFLINE";
if (group_by_node == FALSE) {
if (pe__is_remote_node(node)) {
offline_remote_nodes = add_list_element(offline_remote_nodes, node_name);
} else if (pe__is_guest_node(node)) {
/* ignore offline guest nodes */
} else {
offline_nodes = add_list_element(offline_nodes, node_name);
}
free(node_name);
continue;
}
}
/* If we get here, node is in bad state, or we're grouping by node */
/* Print the node name and status */
if (pe__is_guest_node(node)) {
print_as("Guest");
} else if (pe__is_remote_node(node)) {
print_as("Remote");
}
print_as("Node %s: %s\n", node_name, node_mode);
/* If we're grouping by node, print its resources */
if (group_by_node) {
if (print_brief) {
print_rscs_brief(node->details->running_rsc, "\t", print_opts | pe_print_rsconly,
stdout, FALSE);
} else {
GListPtr gIter2 = NULL;
for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout);
}
}
}
free(node_name);
}
/* If we're not grouping by node, summarize nodes by status */
if (online_nodes) {
print_as("Online: [%s ]\n", online_nodes);
free(online_nodes);
}
if (offline_nodes) {
print_as("OFFLINE: [%s ]\n", offline_nodes);
free(offline_nodes);
}
if (online_remote_nodes) {
print_as("RemoteOnline: [%s ]\n", online_remote_nodes);
free(online_remote_nodes);
}
if (offline_remote_nodes) {
print_as("RemoteOFFLINE: [%s ]\n", offline_remote_nodes);
free(offline_remote_nodes);
}
if (online_guest_nodes) {
print_as("GuestOnline: [%s ]\n", online_guest_nodes);
free(online_guest_nodes);
}
/* Print resources section, if needed */
print_resources(stdout, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stdout, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stdout, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stdout, data_set);
}
/* Print failed stonith actions */
if (fence_history) {
print_failed_stonith_actions(stdout, stonith_history);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stdout, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stdout, data_set);
}
/* Print stonith history */
if (fence_history) {
if (show & mon_show_fence_history) {
print_stonith_history(stdout, stonith_history);
} else {
print_stonith_pending(stdout, stonith_history);
}
}
#if CURSES_ENABLED
if (output_format == mon_output_console) {
refresh();
}
#endif
}
/*!
* \internal
* \brief Print cluster status in XML format
*
* \param[in] data_set Working set of CIB state
*/
static void
print_xml_status(pe_working_set_t * data_set,
stonith_history_t *stonith_history)
{
FILE *stream = stdout;
GListPtr gIter = NULL;
int print_opts = get_resource_display_options();
fprintf(stream, "\n");
fprintf(stream, "\n", VERSION);
print_cluster_summary(stream, data_set);
/*** NODES ***/
fprintf(stream, " \n");
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_type = "unknown";
switch (node->details->type) {
case node_member:
node_type = "member";
break;
case node_remote:
node_type = "remote";
break;
case node_ping:
node_type = "ping";
break;
}
fprintf(stream, " details->uname);
fprintf(stream, "id=\"%s\" ", node->details->id);
fprintf(stream, "online=\"%s\" ", node->details->online ? "true" : "false");
fprintf(stream, "standby=\"%s\" ", node->details->standby ? "true" : "false");
fprintf(stream, "standby_onfail=\"%s\" ", node->details->standby_onfail ? "true" : "false");
fprintf(stream, "maintenance=\"%s\" ", node->details->maintenance ? "true" : "false");
fprintf(stream, "pending=\"%s\" ", node->details->pending ? "true" : "false");
fprintf(stream, "unclean=\"%s\" ", node->details->unclean ? "true" : "false");
fprintf(stream, "shutdown=\"%s\" ", node->details->shutdown ? "true" : "false");
fprintf(stream, "expected_up=\"%s\" ", node->details->expected_up ? "true" : "false");
fprintf(stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false");
fprintf(stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc));
fprintf(stream, "type=\"%s\" ", node_type);
if (pe__is_guest_node(node)) {
fprintf(stream, "id_as_resource=\"%s\" ", node->details->remote_rsc->container->id);
}
if (group_by_node) {
GListPtr lpc2 = NULL;
fprintf(stream, ">\n");
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
resource_t *rsc = (resource_t *) lpc2->data;
rsc->fns->print(rsc, " ", print_opts | pe_print_rsconly, stream);
}
fprintf(stream, " \n");
} else {
fprintf(stream, "/>\n");
}
}
fprintf(stream, " \n");
/* Print resources section, if needed */
print_resources(stream, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stream, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stream, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stream, data_set);
}
/* Print stonith history */
if (fence_history) {
print_stonith_history(stdout, stonith_history);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stream, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stream, data_set);
}
fprintf(stream, "\n");
fflush(stream);
fclose(stream);
}
/*!
* \internal
* \brief Print cluster status in HTML format (with HTTP headers if CGI)
*
* \param[in] data_set Working set of CIB state
* \param[in] filename Name of file to write HTML to (ignored if CGI)
*
* \return 0 on success, -1 on error
*/
static int
print_html_status(pe_working_set_t * data_set,
const char *filename,
stonith_history_t *stonith_history)
{
FILE *stream;
GListPtr gIter = NULL;
char *filename_tmp = NULL;
int print_opts = get_resource_display_options();
if (output_format == mon_output_cgi) {
stream = stdout;
fprintf(stream, "Content-Type: text/html\n\n");
} else {
filename_tmp = crm_concat(filename, "tmp", '.');
stream = fopen(filename_tmp, "w");
if (stream == NULL) {
crm_perror(LOG_ERR, "Cannot open %s for writing", filename_tmp);
free(filename_tmp);
return -1;
}
}
fprintf(stream, "\n");
fprintf(stream, " \n");
fprintf(stream, " Cluster status\n");
fprintf(stream, " \n", reconnect_msec / 1000);
fprintf(stream, " \n");
fprintf(stream, "\n");
print_cluster_summary(stream, data_set);
/*** NODE LIST ***/
fprintf(stream, "
\n Node List
\n");
fprintf(stream, "\n");
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
char *node_name = get_node_display_name(node);
fprintf(stream, "- Node: %s: ", node_name);
if (node->details->standby_onfail && node->details->online) {
fprintf(stream, "standby (on-fail)\n");
} else if (node->details->standby && node->details->online) {
fprintf(stream, "standby%s\n",
node->details->running_rsc?" (with active resources)":"");
} else if (node->details->standby) {
fprintf(stream, "OFFLINE (standby)\n");
} else if (node->details->maintenance && node->details->online) {
fprintf(stream, "maintenance\n");
} else if (node->details->maintenance) {
fprintf(stream, "OFFLINE (maintenance)\n");
} else if (node->details->online) {
fprintf(stream, "online\n");
} else {
fprintf(stream, "OFFLINE\n");
}
if (print_brief && group_by_node) {
fprintf(stream, "
\n");
print_rscs_brief(node->details->running_rsc, NULL, print_opts | pe_print_rsconly,
stream, FALSE);
fprintf(stream, "
\n");
} else if (group_by_node) {
GListPtr lpc2 = NULL;
fprintf(stream, "\n");
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
resource_t *rsc = (resource_t *) lpc2->data;
fprintf(stream, "- ");
rsc->fns->print(rsc, NULL, print_opts | pe_print_rsconly, stream);
fprintf(stream, "
\n");
}
fprintf(stream, "
\n");
}
fprintf(stream, " \n");
free(node_name);
}
fprintf(stream, "
\n");
/* Print resources section, if needed */
print_resources(stream, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stream, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stream, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stream, data_set);
}
/* Print failed stonith actions */
if (fence_history) {
print_failed_stonith_actions(stream, stonith_history);
}
/* Print stonith history */
if (fence_history) {
if (show & mon_show_fence_history) {
print_stonith_history(stream, stonith_history);
} else {
print_stonith_pending(stdout, stonith_history);
}
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stream, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stream, data_set);
}
fprintf(stream, "\n");
fprintf(stream, "\n");
fflush(stream);
fclose(stream);
if (output_format != mon_output_cgi) {
if (rename(filename_tmp, filename) != 0) {
crm_perror(LOG_ERR, "Unable to rename %s->%s", filename_tmp, filename);
}
free(filename_tmp);
}
return 0;
}
static int
send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
int status, const char *desc)
{
pid_t pid;
/*setenv needs chars, these are ints */
char *rc_s = crm_itoa(rc);
char *status_s = crm_itoa(status);
char *target_rc_s = crm_itoa(target_rc);
crm_debug("Sending external notification to '%s' via '%s'", external_recipient, external_agent);
if(rsc) {
setenv("CRM_notify_rsc", rsc, 1);
}
if (external_recipient) {
setenv("CRM_notify_recipient", external_recipient, 1);
}
setenv("CRM_notify_node", node, 1);
setenv("CRM_notify_task", task, 1);
setenv("CRM_notify_desc", desc, 1);
setenv("CRM_notify_rc", rc_s, 1);
setenv("CRM_notify_target_rc", target_rc_s, 1);
setenv("CRM_notify_status", status_s, 1);
pid = fork();
if (pid == -1) {
crm_perror(LOG_ERR, "notification fork() failed.");
}
if (pid == 0) {
/* crm_debug("notification: I am the child. Executing the nofitication program."); */
execl(external_agent, external_agent, NULL);
exit(CRM_EX_ERROR);
}
crm_trace("Finished running custom notification program '%s'.", external_agent);
free(target_rc_s);
free(status_s);
free(rc_s);
return 0;
}
static void
handle_rsc_op(xmlNode * xml, const char *node_id)
{
int rc = -1;
int status = -1;
int target_rc = -1;
gboolean notify = TRUE;
char *rsc = NULL;
char *task = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *id = NULL;
const char *node = NULL;
xmlNode *n = xml;
xmlNode * rsc_op = xml;
if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) {
xmlNode *cIter;
for(cIter = xml->children; cIter; cIter = cIter->next) {
handle_rsc_op(cIter, node_id);
}
return;
}
id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY);
if (id == NULL) {
/* Compatibility with <= 1.1.5 */
id = ID(rsc_op);
}
magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC);
if (magic == NULL) {
/* non-change */
return;
}
if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc,
&target_rc)) {
crm_err("Invalid event %s detected for %s", magic, id);
return;
}
if (parse_op_key(id, &rsc, &task, NULL) == FALSE) {
crm_err("Invalid event detected for %s", id);
goto bail;
}
node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET);
while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) {
n = n->parent;
}
if(node == NULL && n) {
node = crm_element_value(n, XML_ATTR_UNAME);
}
if (node == NULL && n) {
node = ID(n);
}
if (node == NULL) {
node = node_id;
}
if (node == NULL) {
crm_err("No node detected for event %s (%s)", magic, id);
goto bail;
}
/* look up where we expected it to be? */
desc = pcmk_strerror(pcmk_ok);
if (status == PCMK_LRM_OP_DONE && target_rc == rc) {
crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
if (rc == PCMK_OCF_NOT_RUNNING) {
notify = FALSE;
}
} else if (status == PCMK_LRM_OP_DONE) {
desc = services_ocf_exitcode_str(rc);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
} else {
desc = services_lrm_status_str(status);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
}
if (notify && external_agent) {
send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
}
bail:
free(rsc);
free(task);
}
static gboolean
mon_trigger_refresh(gpointer user_data)
{
mainloop_set_trigger(refresh_trigger);
return FALSE;
}
#define NODE_PATT "/lrm[@id="
static char *
get_node_from_xpath(const char *xpath)
{
char *nodeid = NULL;
char *tmp = strstr(xpath, NODE_PATT);
if(tmp) {
tmp += strlen(NODE_PATT);
tmp += 1;
nodeid = strdup(tmp);
tmp = strstr(nodeid, "\'");
CRM_ASSERT(tmp);
tmp[0] = 0;
}
return nodeid;
}
static void
crm_diff_update_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) {
const char *name = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
xmlNode *match = NULL;
const char *node = NULL;
if(op == NULL) {
continue;
} else if(strcmp(op, "create") == 0) {
match = change->children;
} else if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "delete") == 0) {
continue;
} else if(strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
}
if(match) {
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
if(xpath == NULL) {
/* Version field, ignore */
} else if(name == NULL) {
crm_debug("No result for %s operation to %s", op, xpath);
CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0);
} else if(strcmp(name, XML_TAG_CIB) == 0) {
xmlNode *state = NULL;
xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS);
for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
node = crm_element_value(state, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(state);
}
handle_rsc_op(state, node);
}
} else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) {
xmlNode *state = NULL;
for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) {
node = crm_element_value(state, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(state);
}
handle_rsc_op(state, node);
}
} else if(strcmp(name, XML_CIB_TAG_STATE) == 0) {
node = crm_element_value(match, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(match);
}
handle_rsc_op(match, node);
} else if(strcmp(name, XML_CIB_TAG_LRM) == 0) {
node = ID(match);
handle_rsc_op(match, node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else {
crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
}
}
}
static void
crm_diff_update_v1(const char *event, xmlNode * msg)
{
/* Process operation updates */
xmlXPathObject *xpathObj = xpath_search(msg,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RSC_OP);
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
handle_rsc_op(rsc_op, NULL);
}
freeXpathObject(xpathObj);
}
static void
crm_diff_update(const char *event, xmlNode * msg)
{
int rc = -1;
static bool stale = FALSE;
gboolean cib_updated = FALSE;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
print_dot();
if (current_cib != NULL) {
rc = xml_apply_patchset(current_cib, diff, TRUE);
switch (rc) {
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
break;
case pcmk_ok:
cib_updated = TRUE;
break;
default:
crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
}
}
if (current_cib == NULL) {
crm_trace("Re-requesting the full cib");
cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
}
if (external_agent) {
int format = 0;
crm_element_value_int(diff, "format", &format);
switch(format) {
case 1:
crm_diff_update_v1(event, msg);
break;
case 2:
crm_diff_update_v2(event, msg);
break;
default:
crm_err("Unknown patch format: %d", format);
}
}
if (current_cib == NULL) {
if(!stale) {
print_as("--- Stale data ---");
}
stale = TRUE;
return;
}
stale = FALSE;
kick_refresh(cib_updated);
}
static gboolean
mon_refresh_display(gpointer user_data)
{
xmlNode *cib_copy = copy_xml(current_cib);
stonith_history_t *stonith_history = NULL;
last_refresh = time(NULL);
if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
if (cib) {
cib->cmds->signoff(cib);
}
print_as("Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation));
if (output_format == mon_output_console) {
sleep(2);
}
clean_up(CRM_EX_CONFIG);
return FALSE;
}
/* get the stonith-history if there is evidence we need it
*/
while (fence_history) {
if (st != NULL) {
if (st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120)) {
fprintf(stderr, "Critical: Unable to get stonith-history\n");
mon_cib_connection_destroy(NULL);
} else {
if ((!fence_full_history) && (output_format != mon_output_xml)) {
stonith_history = reduce_stonith_history(stonith_history);
}
stonith_history = sort_stonith_history(stonith_history);
break; /* all other cases are errors */
}
} else {
fprintf(stderr, "Critical: No stonith-API\n");
}
free_xml(cib_copy);
print_as("Reading stonith-history failed");
if (output_format == mon_output_console) {
sleep(2);
}
return FALSE;
}
if (mon_data_set == NULL) {
mon_data_set = pe_new_working_set();
CRM_ASSERT(mon_data_set != NULL);
}
mon_data_set->input = cib_copy;
cluster_status(mon_data_set);
/* Unpack constraints if any section will need them
* (tickets may be referenced in constraints but not granted yet,
* and bans need negative location constraints) */
if (show & (mon_show_bans | mon_show_tickets)) {
xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS,
mon_data_set->input);
unpack_constraints(cib_constraints, mon_data_set);
}
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
if (print_html_status(mon_data_set, output_filename, stonith_history) != 0) {
fprintf(stderr, "Critical: Unable to output html file\n");
clean_up(CRM_EX_CANTCREAT);
return FALSE;
}
break;
case mon_output_xml:
print_xml_status(mon_data_set, stonith_history);
break;
case mon_output_monitor:
print_simple_status(mon_data_set, stonith_history);
if (has_warnings) {
clean_up(MON_STATUS_WARN);
return FALSE;
}
break;
case mon_output_plain:
case mon_output_console:
print_status(mon_data_set, stonith_history);
break;
case mon_output_none:
break;
}
stonith_history_free(stonith_history);
stonith_history = NULL;
pe_reset_working_set(mon_data_set);
return TRUE;
}
static void
mon_st_callback_event(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else if (external_agent) {
char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)",
e->operation, e->origin, e->target, pcmk_strerror(e->result),
e->id);
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
free(desc);
}
}
static void
kick_refresh(gboolean data_updated)
{
static int updates = 0;
long now = time(NULL);
if (data_updated) {
updates++;
}
if(refresh_timer == NULL) {
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
}
/* Refresh
* - immediately if the last update was more than 5s ago
* - every 10 cib-updates
* - at most 2s after the last update
*/
if ((now - last_refresh) > (reconnect_msec / 1000)) {
mainloop_set_trigger(refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else if(updates >= 10) {
mainloop_set_trigger(refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else {
mainloop_timer_start(refresh_timer);
}
}
static void
mon_st_callback_display(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else {
print_dot();
kick_refresh(TRUE);
}
}
static void
clean_up_connections(void)
{
if (cib != NULL) {
cib->cmds->signoff(cib);
cib_delete(cib);
cib = NULL;
}
if (st != NULL) {
if (st->state != stonith_disconnected) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
st->cmds->disconnect(st);
}
stonith_api_delete(st);
st = NULL;
}
}
/*
* De-init ncurses, disconnect from the CIB manager, disconnect fencing,
* deallocate memory and show usage-message if requested.
*
* We don't actually return, but nominally returning crm_exit_t allows a usage
* like "return clean_up(exit_code);" which helps static analysis understand the
* code flow.
*/
static crm_exit_t
clean_up(crm_exit_t exit_code)
{
#if CURSES_ENABLED
if (curses_console_initialized) {
output_format = mon_output_plain;
echo();
nocbreak();
endwin();
curses_console_initialized = FALSE;
}
#endif
clean_up_connections();
free(output_filename);
free(pid_file);
pe_free_working_set(mon_data_set);
mon_data_set = NULL;
if (exit_code == CRM_EX_USAGE) {
if (output_format == mon_output_cgi) {
fprintf(stdout, "Content-Type: text/plain\n"
"Status: 500\n\n");
} else {
crm_help('?', CRM_EX_USAGE);
}
}
crm_exit(exit_code);
}
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index d960fb13e4..6be66c6cbb 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -1,786 +1,763 @@
/*
* Copyright 2009-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{ "help", no_argument, NULL, '?',
"\tDisplay this text and exit."
},
{ "version", no_argument, NULL, '$',
"\tDisplay version information and exit."
},
{ "verbose", no_argument, NULL, 'V',
"\tIncrease debug output (may be specified multiple times)."
},
{ "quiet", no_argument, NULL, 'q',
"\tBe less descriptive in output."
},
{ "cleanup", no_argument, NULL, 'c',
"\tCleanup wherever appropriate. Requires: --history."
},
{ "broadcast", no_argument, NULL, 'b',
"Broadcast wherever appropriate."
},
PCMK__OUTPUT_OPTIONS("text, xml"),
{ "-spacer-", no_argument, NULL, '-', "\nDevice definition commands:" },
{ "register", required_argument, NULL, 'R',
"Register the named stonith device. Requires: --agent.\n"
"\t\t\tOptional: --option, --env-option."
},
{ "deregister", required_argument, NULL, 'D',
"De-register the named stonith device."
},
{ "register-level", required_argument, NULL, 'r',
"Register a stonith level for the named target,\n"
"\t\t\tspecified as one of NAME, @PATTERN, or ATTR=VALUE.\n"
"\t\t\tRequires: --index and one or more --device entries."
},
{ "deregister-level", required_argument, NULL, 'd',
"Unregister a stonith level for the named target,\n"
"\t\t\tspecified as for --register-level. Requires: --index."
},
{ "-spacer-", no_argument, NULL, '-', "\nQueries:" },
{ "list", required_argument, NULL, 'l',
"List devices that can terminate the specified host.\n"
"\t\t\tOptional: --timeout."
},
{ "list-registered", no_argument, NULL, 'L',
"List all registered devices. Optional: --timeout."
},
{ "list-installed", no_argument, NULL, 'I',
"List all installed devices. Optional: --timeout."
},
{ "list-targets", required_argument, NULL, 's',
"List the targets that can be fenced by the\n"
"\t\t\tnamed device. Optional: --timeout."
},
{ "metadata", no_argument, NULL, 'M',
"\tShow agent metadata. Requires: --agent.\n"
"\t\t\tOptional: --timeout."
},
{ "query", required_argument, NULL, 'Q',
"Check the named device's status. Optional: --timeout."
},
{ "history", required_argument, NULL, 'H',
"Show last successful fencing operation for named node\n"
"\t\t\t(or '*' for all nodes). Optional: --timeout, --cleanup,\n"
"\t\t\t--quiet (show only the operation's epoch timestamp),\n"
"\t\t\t--verbose (show all recorded and pending operations),\n"
"\t\t\t--broadcast (update history from all nodes available)."
},
{ "last", required_argument, NULL, 'h',
"Indicate when the named node was last fenced.\n"
"\t\t\tOptional: --as-node-id."
},
{ "validate", no_argument, NULL, 'K',
"\tValidate a fence device configuration.\n"
"\t\t\tRequires: --agent. Optional: --option, --env-option,\n"
"\t\t\t--quiet (print no output, only return status).\n"
},
{ "-spacer-", no_argument, NULL, '-', "\nFencing Commands:" },
{ "fence", required_argument, NULL, 'F',
"Fence named host. Optional: --timeout, --tolerance."
},
{ "unfence", required_argument, NULL, 'U',
"Unfence named host. Optional: --timeout, --tolerance."
},
{ "reboot", required_argument, NULL, 'B',
"Reboot named host. Optional: --timeout, --tolerance."
},
{ "confirm", required_argument, NULL, 'C',
"Tell cluster that named host is now safely down."
},
{ "-spacer-", no_argument, NULL, '-', "\nAdditional Options:" },
{ "agent", required_argument, NULL, 'a',
"The agent to use (for example, fence_xvm;\n"
"\t\t\twith --register, --metadata, --validate)."
},
{ "option", required_argument, NULL, 'o',
"Specify a device configuration parameter as NAME=VALUE\n"
"\t\t\t(may be specified multiple times; with --register,\n"
"\t\t\t--validate)."
},
{ "env-option", required_argument, NULL, 'e',
"Specify a device configuration parameter with the\n"
"\t\t\tspecified name, using the value of the\n"
"\t\t\tenvironment variable of the same name prefixed with\n"
"\t\t\tOCF_RESKEY_ (may be specified multiple times;\n"
"\t\t\twith --register, --validate)."
},
{ "tag", required_argument, NULL, 'T',
"Identify fencing operations in logs with the specified\n"
"\t\t\ttag; useful when multiple entities might invoke\n"
"\t\t\tstonith_admin (used with most commands)."
},
{ "device", required_argument, NULL, 'v',
"Device ID (with --register-level, device to associate with\n"
"\t\t\ta given host and level; may be specified multiple times)"
#if SUPPORT_CIBSECRETS
"\n\t\t\t(with --validate, name to use to load CIB secrets)"
#endif
"."
},
{ "index", required_argument, NULL, 'i',
"The stonith level (1-9) (with --register-level,\n"
"\t\t\t--deregister-level)."
},
{ "timeout", required_argument, NULL, 't',
"Operation timeout in seconds (default 120;\n"
"\t\t\tused with most commands)."
},
{ "as-node-id", no_argument, NULL, 'n',
"(Advanced) The supplied node is the corosync node ID\n"
"\t\t\t(with --last)."
},
{ "tolerance", required_argument, NULL, 0,
"(Advanced) Do nothing if an equivalent --fence request\n"
"\t\t\tsucceeded less than this many seconds earlier\n"
"\t\t\t(with --fence, --unfence, --reboot)."
},
{ 0, 0, 0, 0 }
};
/* *INDENT-ON* */
static int st_opts = st_opt_sync_call | st_opt_allow_suicide;
static GMainLoop *mainloop = NULL;
struct {
stonith_t *st;
const char *target;
const char *action;
char *name;
int timeout;
int tolerance;
int rc;
} async_fence_data;
-static int
-try_mainloop_connect(void)
-{
- stonith_t *st = async_fence_data.st;
- int tries = 10;
- int i = 0;
- int rc = 0;
-
- for (i = 0; i < tries; i++) {
- crm_debug("Connecting as %s", async_fence_data.name);
- rc = st->cmds->connect(st, async_fence_data.name, NULL);
-
- if (!rc) {
- crm_debug("stonith client connection established");
- return 0;
- } else {
- crm_debug("stonith client connection failed");
- }
- sleep(1);
- }
-
- crm_err("Could not connect to the fencer");
- return -1;
-}
-
static void
notify_callback(stonith_t * st, stonith_event_t * e)
{
if (e->result != pcmk_ok) {
return;
}
if (safe_str_eq(async_fence_data.target, e->target) &&
safe_str_eq(async_fence_data.action, e->action)) {
async_fence_data.rc = e->result;
g_main_loop_quit(mainloop);
}
}
static void
fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
async_fence_data.rc = data->rc;
g_main_loop_quit(mainloop);
}
static gboolean
async_fence_helper(gpointer user_data)
{
stonith_t *st = async_fence_data.st;
int call_id = 0;
+ int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
- if (try_mainloop_connect()) {
+ if (rc != pcmk_ok) {
+ fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
g_main_loop_quit(mainloop);
return TRUE;
}
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback);
call_id = st->cmds->fence(st,
st_opt_allow_suicide,
async_fence_data.target,
async_fence_data.action,
async_fence_data.timeout, async_fence_data.tolerance);
if (call_id < 0) {
g_main_loop_quit(mainloop);
return TRUE;
}
st->cmds->register_callback(st,
call_id,
async_fence_data.timeout,
st_opt_timeout_updates, NULL, "callback", fence_callback);
return TRUE;
}
static int
mainloop_fencing(stonith_t * st, const char *target, const char *action, int timeout, int tolerance)
{
crm_trigger_t *trig;
async_fence_data.st = st;
async_fence_data.target = target;
async_fence_data.action = action;
async_fence_data.timeout = timeout;
async_fence_data.tolerance = tolerance;
async_fence_data.rc = -1;
trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
mainloop_set_trigger(trig);
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
return async_fence_data.rc;
}
static int
handle_level(stonith_t *st, char *target, int fence_level,
stonith_key_value_t *devices, bool added)
{
char *node = NULL;
char *pattern = NULL;
char *name = NULL;
char *value = strchr(target, '=');
/* Determine if targeting by attribute, node name pattern or node name */
if (value != NULL) {
name = target;
*value++ = '\0';
} else if (*target == '@') {
pattern = target + 1;
} else {
node = target;
}
/* Register or unregister level as appropriate */
if (added) {
return st->cmds->register_level_full(st, st_opts, node, pattern,
name, value, fence_level,
devices);
}
return st->cmds->remove_level_full(st, st_opts, node, pattern,
name, value, fence_level);
}
static int
handle_history(stonith_t *st, const char *target, int timeout, int quiet,
int verbose, int cleanup, int broadcast, pcmk__output_t *out)
{
stonith_history_t *history = NULL, *hp, *latest = NULL;
int rc = 0;
if (!quiet) {
if (cleanup) {
out->info(out, "cleaning up fencing-history%s%s",
target ? " for node " : "", target ? target : "");
}
if (broadcast) {
out->info(out, "gather fencing-history from all nodes");
}
}
rc = st->cmds->history(st, st_opts | (cleanup?st_opt_cleanup:0) |
(broadcast?st_opt_broadcast:0),
(safe_str_eq(target, "*")? NULL : target),
&history, timeout);
out->begin_list(out, "Fencing history", "event", "events");
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_done) {
latest = hp;
}
if (quiet || !verbose) {
continue;
}
out->message(out, "stonith-event", hp);
}
if (latest) {
if (quiet && out->supports_quiet) {
out->info(out, "%lld", (long long) latest->completed);
} else if (!verbose) { // already printed if verbose
out->message(out, "stonith-event", latest);
}
}
out->end_list(out);
stonith_history_free(history);
return rc;
}
static int
validate(stonith_t *st, const char *agent, const char *id,
stonith_key_value_t *params, int timeout, int quiet,
pcmk__output_t *out)
{
int rc = 1;
char *output = NULL;
char *error_output = NULL;
rc = st->cmds->validate(st, st_opt_sync_call, id, NULL, agent, params,
timeout, &output, &error_output);
if (quiet) {
return rc;
}
out->message(out, "validate", agent, id, output, error_output, rc);
return rc;
}
int
main(int argc, char **argv)
{
int flag;
int rc = 0;
int quiet = 0;
int cleanup = 0;
int broadcast = 0;
int verbose = 0;
int argerr = 0;
int timeout = 120;
int option_index = 0;
int fence_level = 0;
int no_connect = 0;
int tolerance = 0;
int as_nodeid = FALSE;
bool required_agent = false;
char *name = NULL;
char *value = NULL;
char *target = NULL;
char *lists = NULL;
const char *agent = NULL;
const char *device = NULL;
const char *longname = NULL;
char action = 0;
crm_exit_t exit_code = CRM_EX_OK;
stonith_t *st = NULL;
stonith_key_value_t *params = NULL;
stonith_key_value_t *devices = NULL;
stonith_key_value_t *dIter = NULL;
char *output_ty = NULL;
char *output_dest = NULL;
pcmk__output_t *out = NULL;
crm_log_cli_init("stonith_admin");
crm_set_options(NULL, " []", long_options,
"access the Pacemaker fencing API");
async_fence_data.name = strdup(crm_system_name);
while (1) {
flag = crm_get_option_long(argc, argv, &option_index, &longname);
if (flag == -1)
break;
switch (flag) {
case 'V':
verbose = 1;
crm_bump_log_level(argc, argv);
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
case 'K':
required_agent = true;
/* fall through */
case 'I':
no_connect = 1;
/* fall through */
case 'L':
action = flag;
break;
case 'q':
quiet = 1;
break;
case 'c':
cleanup = 1;
break;
case 'b':
broadcast = 1;
break;
case 'R':
required_agent = true;
/* fall through */
case 'Q':
case 'D':
case 's':
action = flag;
device = optarg;
break;
case 'T':
free(async_fence_data.name);
async_fence_data.name = crm_strdup_printf("%s.%s", crm_system_name, optarg);
break;
case 'a':
agent = optarg;
break;
case 'l':
target = optarg;
action = 'L';
break;
case 'M':
no_connect = 1;
action = flag;
required_agent = true;
break;
case 't':
timeout = crm_atoi(optarg, NULL);
break;
case 'B':
case 'F':
case 'U':
/* using mainloop here */
no_connect = 1;
/* fall through */
case 'C':
/* Always log the input arguments */
crm_log_args(argc, argv);
target = optarg;
action = flag;
break;
case 'n':
as_nodeid = TRUE;
break;
case 'h':
case 'H':
case 'r':
case 'd':
target = optarg;
action = flag;
break;
case 'i':
fence_level = crm_atoi(optarg, NULL);
break;
case 'v':
devices = stonith_key_value_add(devices, NULL, optarg);
break;
case 'o':
crm_info("Scanning: -o %s", optarg);
rc = pcmk_scan_nvpair(optarg, &name, &value);
if (rc != 2) {
crm_err("Invalid option: -o %s: %s", optarg, pcmk_strerror(rc));
++argerr;
} else {
crm_info("Got: '%s'='%s'", name, value);
params = stonith_key_value_add(params, name, value);
}
free(value); value = NULL;
free(name); name = NULL;
break;
case 'e':
{
char *key = crm_concat("OCF_RESKEY", optarg, '_');
const char *env = getenv(key);
if (env == NULL) {
crm_err("Invalid option: -e %s", optarg);
++argerr;
} else {
crm_info("Got: '%s'='%s'", optarg, env);
params = stonith_key_value_add(params, optarg, env);
}
free(key);
}
break;
case 0:
if (safe_str_eq("tolerance", longname)) {
tolerance = crm_get_msec(optarg) / 1000; /* Send in seconds */
} else if (pcmk__parse_output_args(longname, optarg, &output_ty,
&output_dest) == false) {
fprintf(stderr, "Unknown long option used: %s\n", longname);
++argerr;
}
break;
default:
++argerr;
break;
}
}
if (optind > argc || action == 0) {
++argerr;
}
if (required_agent && agent == NULL) {
fprintf(stderr, "Please specify an agent to query using -a,--agent [value]\n");
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
CRM_ASSERT(pcmk__register_format("text", pcmk__mk_text_output) == 0);
CRM_ASSERT(pcmk__register_format("xml", pcmk__mk_xml_output) == 0);
rc = pcmk__output_new(&out, output_ty, output_dest, argv);
if (rc != 0) {
fprintf(stderr, "Error creating output format %s: %s\n", output_ty, pcmk_strerror(rc));
exit_code = CRM_EX_ERROR;
goto done;
}
stonith__register_messages(out);
st = stonith_api_new();
if (!no_connect) {
rc = st->cmds->connect(st, async_fence_data.name, NULL);
if (rc < 0) {
fprintf(stderr, "Could not connect to fencer: %s\n",
pcmk_strerror(rc));
exit_code = CRM_EX_DISCONNECT;
goto done;
}
}
switch (action) {
case 'I':
rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout);
if (rc < 0) {
fprintf(stderr, "Failed to list installed devices: %s\n", pcmk_strerror(rc));
break;
}
out->begin_list(out, "Installed fence devices", "fence device", "fence devices");
for (dIter = devices; dIter; dIter = dIter->next) {
out->list_item(out, "device", dIter->value);
}
out->end_list(out);
rc = 0;
stonith_key_value_freeall(devices, 1, 1);
break;
case 'L':
rc = st->cmds->query(st, st_opts, target, &devices, timeout);
if (rc < 0) {
fprintf(stderr, "Failed to list registered devices: %s\n", pcmk_strerror(rc));
break;
}
out->begin_list(out, "Registered fence devices", "fence device", "fence devices");
for (dIter = devices; dIter; dIter = dIter->next) {
out->list_item(out, "device", dIter->value);
}
out->end_list(out);
rc = 0;
stonith_key_value_freeall(devices, 1, 1);
break;
case 'Q':
rc = st->cmds->monitor(st, st_opts, device, timeout);
if (rc < 0) {
rc = st->cmds->list(st, st_opts, device, NULL, timeout);
}
break;
case 's':
rc = st->cmds->list(st, st_opts, device, &lists, timeout);
if (rc == 0 && lists) {
char *head = lists;
char *eol = NULL;
out->begin_list(out, "Fence targets", "fence target", "fence targets");
do {
char *line = NULL;
char *elem = NULL;
char *hostname = NULL;
char *uuid = NULL;
char *status = NULL;
eol = strstr(head, "\\n");
line = strndup(head, eol-head);
while ((elem = strsep(&line, " ")) != NULL) {
if (strcmp(elem, "") == 0) {
continue;
}
if (hostname == NULL) {
hostname = elem;
} else if (uuid == NULL) {
uuid = elem;
} else if (status == NULL) {
char *end = NULL;
status = elem;
end = strchr(status, '\n');
if (end != NULL) {
*end = '\0';
}
}
}
if (hostname != NULL && uuid != NULL && status != NULL) {
out->message(out, "fence-target", hostname, uuid, status);
}
free(line);
head = eol+2;
} while (eol != NULL);
out->end_list(out);
} else if (rc != 0) {
fprintf(stderr, "List command returned error. rc : %d\n", rc);
}
break;
case 'R':
rc = st->cmds->register_device(st, st_opts, device, NULL, agent,
params);
break;
case 'D':
rc = st->cmds->remove_device(st, st_opts, device);
break;
case 'd':
case 'r':
rc = handle_level(st, target, fence_level, devices, action == 'r');
break;
case 'M':
{
char *buffer = NULL;
rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, timeout);
if (rc == pcmk_ok) {
out->output_xml(out, "metadata", buffer);
}
free(buffer);
}
break;
case 'C':
rc = st->cmds->confirm(st, st_opts, target);
break;
case 'B':
rc = mainloop_fencing(st, target, "reboot", timeout, tolerance);
break;
case 'F':
rc = mainloop_fencing(st, target, "off", timeout, tolerance);
break;
case 'U':
rc = mainloop_fencing(st, target, "on", timeout, tolerance);
break;
case 'h':
{
time_t when = 0;
if(as_nodeid) {
uint32_t nodeid = atol(target);
when = stonith_api_time(nodeid, NULL, FALSE);
} else {
when = stonith_api_time(0, target, FALSE);
}
out->message(out, "last-fenced", target, when);
}
break;
case 'H':
rc = handle_history(st, target, timeout, quiet,
verbose, cleanup, broadcast, out);
break;
case 'K':
device = (devices? devices->key : NULL);
rc = validate(st, agent, device, params, timeout, quiet, out);
break;
}
crm_info("Command returned: %s (%d)", pcmk_strerror(rc), rc);
exit_code = crm_errno2exit(rc);
pcmk__output_free(out, exit_code);
done:
free(async_fence_data.name);
stonith_key_value_freeall(params, 1, 1);
if (st != NULL) {
st->cmds->disconnect(st);
stonith_api_delete(st);
}
return exit_code;
}