diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 8ec97bb30c..083b7ae9ad 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2143 +1,2221 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk__ends_with_ext() #include #include #include #include #include #include #include #include #include #include #include #include #include #include "crm_mon.h" #define SUMMARY "Provides a summary of cluster's current state.\n\n" \ "Outputs varying levels of detail in a number of different formats." /* * Definitions indicating which items to print */ static unsigned int show; /* * Definitions indicating how to output */ static mon_output_format_t output_format = mon_output_unset; /* other globals */ static GIOChannel *io_channel = NULL; static GMainLoop *mainloop = NULL; -static guint timer_id = 0; +static guint reconnect_timer = 0; static mainloop_timer_t *refresh_timer = NULL; static pe_working_set_t *mon_data_set = NULL; static cib_t *cib = NULL; static stonith_t *st = NULL; static xmlNode *current_cib = NULL; static GError *error = NULL; static pcmk__common_args_t *args = NULL; static pcmk__output_t *out = NULL; static GOptionContext *context = NULL; static gchar **processed_args = NULL; static time_t last_refresh = 0; crm_trigger_t *refresh_trigger = NULL; static pcmk__supported_format_t formats[] = { #if CURSES_ENABLED CRM_MON_SUPPORTED_FORMAT_CURSES, #endif PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; /* Define exit codes for monitoring-compatible output * For nagios plugins, the possibilities are * OK=0, WARN=1, CRIT=2, and UNKNOWN=3 */ #define MON_STATUS_WARN CRM_EX_ERROR #define MON_STATUS_CRIT CRM_EX_INVALID_PARAM #define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE #define RECONNECT_MSECS 5000 struct { int reconnect_msec; gboolean daemonize; gboolean show_bans; char *pid_file; char *external_agent; char *external_recipient; char *neg_location_prefix; char *only_node; char *only_rsc; unsigned int mon_ops; GSList *user_includes_excludes; GSList *includes_excludes; } options = { .reconnect_msec = RECONNECT_MSECS, .mon_ops = mon_op_default }; -static void clean_up_connections(void); +static void clean_up_cib_connection(void); +static void clean_up_fencing_connection(void); static crm_exit_t clean_up(crm_exit_t exit_code); static void crm_diff_update(const char *event, xmlNode * msg); +static void handle_connection_failures(int rc); static int mon_refresh_display(gpointer user_data); static int cib_connect(gboolean full); +static int fencing_connect(void); static void mon_st_callback_event(stonith_t * st, stonith_event_t * e); static void mon_st_callback_display(stonith_t * st, stonith_event_t * e); -static void kick_refresh(gboolean data_updated); +static void refresh_after_event(gboolean data_updated); static unsigned int all_includes(mon_output_format_t fmt) { if (fmt == mon_output_monitor || fmt == mon_output_plain || fmt == mon_output_console) { return ~mon_show_options; } else { return mon_show_all; } } static unsigned int default_includes(mon_output_format_t fmt) { switch (fmt) { case mon_output_monitor: case mon_output_plain: case mon_output_console: return mon_show_stack | mon_show_dc | mon_show_times | mon_show_counts | mon_show_nodes | mon_show_resources | mon_show_failures; case mon_output_xml: case mon_output_legacy_xml: return all_includes(fmt); case mon_output_html: case mon_output_cgi: return mon_show_summary | mon_show_nodes | mon_show_resources | mon_show_failures; default: return 0; } } struct { const char *name; unsigned int bit; } sections[] = { { "attributes", mon_show_attributes }, { "bans", mon_show_bans }, { "counts", mon_show_counts }, { "dc", mon_show_dc }, { "failcounts", mon_show_failcounts }, { "failures", mon_show_failures }, { "fencing", mon_show_fencing_all }, { "fencing-failed", mon_show_fence_failed }, { "fencing-pending", mon_show_fence_pending }, { "fencing-succeeded", mon_show_fence_worked }, { "nodes", mon_show_nodes }, { "operations", mon_show_operations }, { "options", mon_show_options }, { "resources", mon_show_resources }, { "stack", mon_show_stack }, { "summary", mon_show_summary }, { "tickets", mon_show_tickets }, { "times", mon_show_times }, { NULL } }; static unsigned int find_section_bit(const char *name) { for (int i = 0; sections[i].name != NULL; i++) { if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) { return sections[i].bit; } } return 0; } static gboolean apply_exclude(const gchar *excludes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(excludes, ",", 0); for (char **s = parts; *s != NULL; s++) { unsigned int bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = 0; } else if (pcmk__str_eq(*s, "none", pcmk__str_none)) { show = all_includes(output_format); } else if (bit != 0) { show &= ~bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--exclude options: all, attributes, bans, counts, dc, " "failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, nodes, none, " "operations, options, resources, stack, summary, " "tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include(const gchar *includes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(includes, ",", 0); for (char **s = parts; *s != NULL; s++) { unsigned int bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = all_includes(output_format); } else if (pcmk__starts_with(*s, "bans")) { show |= mon_show_bans; if (options.neg_location_prefix != NULL) { free(options.neg_location_prefix); options.neg_location_prefix = NULL; } if (strlen(*s) > 4 && (*s)[4] == ':') { options.neg_location_prefix = strdup(*s+5); } } else if (pcmk__str_any_of(*s, "default", "defaults", NULL)) { show |= default_includes(output_format); } else if (pcmk__str_eq(*s, "none", pcmk__str_none)) { show = 0; } else if (bit != 0) { show |= bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--include options: all, attributes, bans[:PREFIX], counts, dc, " "default, failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, nodes, none, operations, " "options, resources, stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include_exclude(GSList *lst, mon_output_format_t fmt, GError **error) { gboolean rc = TRUE; GSList *node = lst; /* Set the default of what to display here. Note that we OR everything to * show instead of set show directly because it could have already had some * settings applied to it in main. */ show |= default_includes(fmt); while (node != NULL) { char *s = node->data; if (pcmk__starts_with(s, "--include=")) { rc = apply_include(s+10, error); } else if (pcmk__starts_with(s, "-I=")) { rc = apply_include(s+3, error); } else if (pcmk__starts_with(s, "--exclude=")) { rc = apply_exclude(s+10, error); } else if (pcmk__starts_with(s, "-U=")) { rc = apply_exclude(s+3, error); } if (rc != TRUE) { break; } node = node->next; } return rc; } static gboolean user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s); return TRUE; } static gboolean include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.includes_excludes = g_slist_append(options.includes_excludes, s); return TRUE; } static gboolean as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("html"); output_format = mon_output_cgi; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } if (args->output_dest != NULL) { free(args->output_dest); args->output_dest = NULL; } if (optarg != NULL) { args->output_dest = strdup(optarg); } args->output_ty = strdup("html"); output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); return TRUE; } static gboolean as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("text"); output_format = mon_output_monitor; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("xml"); output_format = mon_output_legacy_xml; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_atoi(optarg, "2"); switch (rc) { case 3: options.mon_ops |= mon_op_fence_full_history | mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing", data, err); case 2: options.mon_ops |= mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing", data, err); case 1: options.mon_ops |= mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err); case 0: options.mon_ops &= ~(mon_op_fence_history | mon_op_fence_connect); return include_exclude_cb("--exclude", "fencing", data, err); default: g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3"); return FALSE; } } static gboolean group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_group_by_node; return TRUE; } static gboolean hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--exclude", "summary", data, err); } static gboolean inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_inactive_resources; return TRUE; } static gboolean no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { output_format = mon_output_plain; return TRUE; } static gboolean one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_brief; return TRUE; } static gboolean print_clone_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_clone_detail; return TRUE; } static gboolean print_pending_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_pending; return TRUE; } static gboolean print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_timing; return include_exclude_cb("--include", "operations", data, err); } static gboolean reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_get_msec(optarg); if (rc == -1) { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg); return FALSE; } else { options.reconnect_msec = crm_parse_interval_spec(optarg); } return TRUE; } static gboolean show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "attributes", data, err); } static gboolean show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg != NULL) { char *s = crm_strdup_printf("bans:%s", optarg); gboolean rc = include_exclude_cb("--include", s, data, err); free(s); return rc; } else { return include_exclude_cb("--include", "bans", data, err); } } static gboolean show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "failcounts", data, err); } static gboolean show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "failcounts,operations", data, err); } static gboolean show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "tickets", data, err); } static gboolean use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { setenv("CIB_file", optarg, 1); options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean watch_fencing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_watch_fencing; return TRUE; } #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, "Update frequency (default is 5 seconds)", "TIMESPEC" }, { "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, one_shot_cb, "Display the cluster status once on the console and exit", NULL }, { "daemonize", 'd', 0, G_OPTION_ARG_NONE, &options.daemonize, "Run in the background as a daemon.\n" INDENT "Requires at least one of --output-to and --external-agent.", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file, "(Advanced) Daemon pid file location", "FILE" }, { "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent, "A program to run when resource operations take place", "FILE" }, { "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient, "A recipient for your program (assuming you want the program to send something to someone).", "RCPT" }, { "watch-fencing", 'W', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, watch_fencing_cb, "Listen for fencing events. For use with --external-agent.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb, NULL, NULL }, { NULL } }; static GOptionEntry display_entries[] = { { "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to include in the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to exclude from the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node, "When displaying information about nodes, show only what's related to the given\n" INDENT "node, or to all nodes tagged with the given tag", "NODE" }, { "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc, "When displaying information about resources, show only what's related to the given\n" INDENT "resource, or to all resources tagged with the given tag", "RSC" }, { "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb, "Group resources by node", NULL }, { "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb, "Display inactive resources", NULL }, { "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb, "Display resource fail counts", NULL }, { "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb, "Display resource operation history", NULL }, { "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb, "Display resource operation history with timing details", NULL }, { "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb, "Display cluster tickets", NULL }, { "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb, "Show fence history:\n" INDENT "0=off, 1=failures and pending (default without option),\n" INDENT "2=add successes (default without value for option),\n" INDENT "3=show full history without reduction to most recent of each flavor", "LEVEL" }, { "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb, "Display negative location constraints [optionally filtered by id prefix]", NULL }, { "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb, "Display node attributes", NULL }, { "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb, "Hide all headers", NULL }, { "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_clone_detail_cb, "Show more details (node IDs, individual clone instances)", NULL }, { "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb, "Brief output", NULL }, { "pending", 'j', G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_pending_cb, "Display pending state if 'record-pending' is enabled", NULL }, { "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_simple_cb, "Display the cluster status once as a simple one line output (suitable for nagios)", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb, "Write cluster status to the named HTML file.\n" INDENT "Use --output-as=html --output-to=FILE instead.", "FILE" }, { "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb, "Write cluster status as XML to stdout. This will enable one-shot mode.\n" INDENT "Use --output-as=xml instead.", NULL }, { "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb, "Disable the use of ncurses.\n" INDENT "Use --output-as=text instead.", NULL }, { "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb, "Web mode with output suitable for CGI (preselected when run as *.cgi).\n" INDENT "Use --output-as=html --html-cgi instead.", NULL }, { NULL } }; /* *INDENT-ON* */ +/* Reconnect to the CIB and fencing agent after reconnect_msec has passed. This sounds + * like it would be more broadly useful, but only ever happens after a disconnect via + * mon_cib_connection_destroy. + */ static gboolean -mon_timer_popped(gpointer data) +reconnect_after_timeout(gpointer data) { - int rc = pcmk_ok; - #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif - if (timer_id > 0) { - g_source_remove(timer_id); - timer_id = 0; + if (reconnect_timer > 0) { + g_source_remove(reconnect_timer); + reconnect_timer = 0; } print_as(output_format, "Reconnecting...\n"); - rc = cib_connect(TRUE); - - if (rc != pcmk_ok) { - timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL); + fencing_connect(); + if (cib_connect(TRUE) == pcmk_ok) { + /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */ + mon_refresh_display(NULL); + return FALSE; } - return FALSE; + + reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL); + return TRUE; } +/* Called from various places when we are disconnected from the CIB or from the + * fencing agent. If the CIB connection is still valid, this function will also + * attempt to sign off and reconnect. + */ static void -do_mon_cib_connection_destroy(gpointer user_data, bool is_error) +mon_cib_connection_destroy(gpointer user_data) { - if (is_error) { - out->err(out, "Connection to the cluster-daemons terminated"); - } else { - out->info(out, "Connection to the cluster-daemons terminated"); - } + out->info(out, "Connection to the cluster-daemons terminated"); if (refresh_timer != NULL) { /* we'll trigger a refresh after reconnect */ mainloop_timer_stop(refresh_timer); } - if (timer_id) { + if (reconnect_timer) { /* we'll trigger a new reconnect-timeout at the end */ - g_source_remove(timer_id); - timer_id = 0; + g_source_remove(reconnect_timer); + reconnect_timer = 0; } if (st) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ - st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); - st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); - st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY); - if (st->state != stonith_disconnected) { - st->cmds->disconnect(st); - } + clean_up_fencing_connection(); } if (cib) { cib->cmds->signoff(cib); - timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL); + reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL); } return; } -static void -mon_cib_connection_destroy_regular(gpointer user_data) -{ - do_mon_cib_connection_destroy(user_data, false); -} - -static void -mon_cib_connection_destroy_error(gpointer user_data) -{ - do_mon_cib_connection_destroy(user_data, true); -} - -/* - * Mainloop signal handler. - */ +/* Signal handler installed into the mainloop for normal program shutdown */ static void mon_shutdown(int nsig) { clean_up(CRM_EX_OK); } #if CURSES_ENABLED static sighandler_t ncurses_winch_handler; +/* Signal handler installed the regular way (not into the main loop) for when + * the screen is resized. Commonly, this happens when running in an xterm and + * the user changes its size. + */ static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); + /* Alert the mainloop code we'd like the refresh_trigger to run next + * time the mainloop gets around to checking. + */ mainloop_set_trigger(refresh_trigger); } not_done--; } #endif static int -cib_connect(gboolean full) +fencing_connect(void) { int rc = pcmk_ok; - static gboolean need_pass = TRUE; - - CRM_CHECK(cib != NULL, return -EINVAL); - - if (getenv("CIB_passwd") != NULL) { - need_pass = FALSE; - } if (pcmk_is_set(options.mon_ops, mon_op_fence_connect) && (st == NULL)) { st = stonith_api_new(); } - if (pcmk_is_set(options.mon_ops, mon_op_fence_connect) - && (st != NULL) && (st->state == stonith_disconnected)) { + if (!pcmk_is_set(options.mon_ops, mon_op_fence_connect) || + st == NULL || st->state != stonith_disconnected) { + return rc; + } - rc = st->cmds->connect(st, crm_system_name, NULL); - if (rc == pcmk_ok) { - crm_trace("Setting up stonith callbacks"); - if (pcmk_is_set(options.mon_ops, mon_op_watch_fencing)) { - st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, - mon_st_callback_event); - st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event); - } else { - st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, - mon_st_callback_display); - st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display); - } + rc = st->cmds->connect(st, crm_system_name, NULL); + if (rc == pcmk_ok) { + crm_trace("Setting up stonith callbacks"); + if (pcmk_is_set(options.mon_ops, mon_op_watch_fencing)) { + st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, + mon_st_callback_event); + st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event); + } else { + st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, + mon_st_callback_display); + st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display); } + } else { + st = NULL; + } + + return rc; +} + +static int +cib_connect(gboolean full) +{ + int rc = pcmk_ok; + static gboolean need_pass = TRUE; + + CRM_CHECK(cib != NULL, return -EINVAL); + + if (getenv("CIB_passwd") != NULL) { + need_pass = FALSE; } if (cib->state == cib_connected_query || cib->state == cib_connected_command) { return rc; } crm_trace("Connecting to the CIB"); /* Hack: the CIB signon will print the prompt for a password if needed, * but to stderr. If we're in curses, show it on the screen instead. * * @TODO Add a password prompt (maybe including input) function to * pcmk__output_t and use it in libcib. */ if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) { need_pass = FALSE; print_as(output_format, "Password:"); } rc = cib->cmds->signon(cib, crm_system_name, cib_query); if (rc != pcmk_ok) { out->err(out, "Could not connect to the CIB: %s", pcmk_strerror(rc)); return rc; } rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); - if (rc == pcmk_ok) { - mon_refresh_display(NULL); - } if (rc == pcmk_ok && full) { - rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy_regular); + rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy); if (rc == -EPROTONOSUPPORT) { print_as (output_format, "Notification setup not supported, won't be able to reconnect after failure"); if (output_format == mon_output_console) { sleep(2); } rc = pcmk_ok; } if (rc == pcmk_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if (rc != pcmk_ok) { out->err(out, "Notification setup failed, could not monitor CIB actions"); - clean_up_connections(); + clean_up_cib_connection(); + clean_up_fencing_connection(); } } return rc; } #if CURSES_ENABLED static const char * get_option_desc(char c) { const char *desc = "No help available"; for (GOptionEntry *entry = display_entries; entry != NULL; entry++) { if (entry->short_name == c) { desc = entry->description; break; } } return desc; } #define print_option_help(output_format, option, condition) \ out->info(out, "%c %c: \t%s", ((condition)? '*': ' '), option, get_option_desc(option)); +/* This function is called from the main loop when there is something to be read + * on stdin, like an interactive user's keystroke. All it does is read the keystroke, + * set flags (or show the page showing which keystrokes are valid), and redraw the + * screen. It does not do anything with connections to the CIB or fencing agent + * agent what would happen in mon_refresh_display. + */ static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data) { int c; + int rc; gboolean config_mode = FALSE; while (1) { /* Get user input */ c = getchar(); switch (c) { case 'm': if (!pcmk_is_set(show, mon_show_fencing_all)) { options.mon_ops |= mon_op_fence_history; options.mon_ops |= mon_op_fence_connect; if (st == NULL) { - mon_cib_connection_destroy_regular(NULL); + mon_cib_connection_destroy(NULL); } } if (pcmk_any_flags_set(show, mon_show_fence_failed |mon_show_fence_pending |mon_show_fence_worked)) { show &= ~mon_show_fencing_all; } else { show |= mon_show_fencing_all; } break; case 'c': show ^= mon_show_tickets; break; case 'f': show ^= mon_show_failcounts; break; case 'n': options.mon_ops ^= mon_op_group_by_node; break; case 'o': show ^= mon_show_operations; if (!pcmk_is_set(show, mon_show_operations)) { options.mon_ops &= ~mon_op_print_timing; } break; case 'r': options.mon_ops ^= mon_op_inactive_resources; break; case 'R': options.mon_ops ^= mon_op_print_clone_detail; break; case 't': options.mon_ops ^= mon_op_print_timing; if (pcmk_is_set(options.mon_ops, mon_op_print_timing)) { show |= mon_show_operations; } break; case 'A': show ^= mon_show_attributes; break; case 'L': show ^= mon_show_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (pcmk_any_flags_set(show, mon_show_stack |mon_show_dc |mon_show_times |mon_show_counts)) { show &= ~mon_show_summary; } else { show |= mon_show_summary; } /* Regardless, we don't show options in console mode. */ show &= ~mon_show_options; break; case 'b': options.mon_ops ^= mon_op_print_brief; break; case 'j': options.mon_ops ^= mon_op_print_pending; break; case '?': config_mode = TRUE; break; default: + /* All other keys just redraw the screen. */ goto refresh; } if (!config_mode) goto refresh; blank_screen(); out->info(out, "%s", "Display option change mode\n"); print_option_help(out, 'c', pcmk_is_set(show, mon_show_tickets)); print_option_help(out, 'f', pcmk_is_set(show, mon_show_failcounts)); print_option_help(out, 'n', pcmk_is_set(options.mon_ops, mon_op_group_by_node)); print_option_help(out, 'o', pcmk_is_set(show, mon_show_operations)); print_option_help(out, 'r', pcmk_is_set(options.mon_ops, mon_op_inactive_resources)); print_option_help(out, 't', pcmk_is_set(options.mon_ops, mon_op_print_timing)); print_option_help(out, 'A', pcmk_is_set(show, mon_show_attributes)); print_option_help(out, 'L', pcmk_is_set(show,mon_show_bans)); print_option_help(out, 'D', !pcmk_is_set(show, mon_show_summary)); print_option_help(out, 'R', pcmk_is_set(options.mon_ops, mon_op_print_clone_detail)); print_option_help(out, 'b', pcmk_is_set(options.mon_ops, mon_op_print_brief)); print_option_help(out, 'j', pcmk_is_set(options.mon_ops, mon_op_print_pending)); print_option_help(out, 'm', pcmk_any_flags_set(show, mon_show_fence_failed |mon_show_fence_pending |mon_show_fence_worked)); out->info(out, "%s", "\nToggle fields via field letter, type any other key to return"); } refresh: - mon_refresh_display(NULL); + fencing_connect(); + rc = cib_connect(FALSE); + if (rc == pcmk_rc_ok) { + mon_refresh_display(NULL); + } else { + handle_connection_failures(rc); + } + return TRUE; } #endif // Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag static void avoid_zombies(void) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); if (sigemptyset(&sa.sa_mask) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno)); return; } sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART|SA_NOCLDWAIT; if (sigaction(SIGCHLD, &sa, NULL) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno)); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; const char *description = "Notes:\n\n" "If this program is called as crm_mon.cgi, --output-as=html --html-cgi will\n" "automatically be added to the command line arguments.\n\n" "Time Specification:\n\n" "The TIMESPEC in any command line option can be specified in many different\n" "formats. It can be just an integer number of seconds, a number plus units\n" "(ms/msec/us/usec/s/sec/m/min/h/hr), or an ISO 8601 period specification.\n\n" "Output Control:\n\n" "By default, a certain list of sections are written to the output destination.\n" "The default varies based on the output format - XML includes everything, while\n" "other output formats will display less. This list can be modified with the\n" "--include and --exclude command line options. Each option may be given multiple\n" "times on the command line, and each can give a comma-separated list of sections.\n" "The options are applied to the default set, from left to right as seen on the\n" "command line. For a list of valid sections, pass --include=list or --exclude=list.\n\n" "Examples:\n\n" "Display the cluster status on the console with updates as they occur:\n\n" "\tcrm_mon\n\n" "Display the cluster status on the console just once then exit:\n\n" "\tcrm_mon -1\n\n" "Display your cluster status, group resources by node, and include inactive resources in the list:\n\n" "\tcrm_mon --group-by-node --inactive\n\n" "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:\n\n" "\tcrm_mon --daemonize --output-as html --output-to /path/to/docroot/filename.html\n\n" "Start crm_mon and export the current cluster status as XML to stdout, then exit:\n\n" "\tcrm_mon --output-as xml\n\n"; context = pcmk__build_arg_context(args, "console (default), html, text, xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "display", "Display Options:", "Show display options", display_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } /* If certain format options were specified, we want to set some extra * options. We can just process these like they were given on the * command line. */ static void add_output_args(void) { GError *err = NULL; if (output_format == mon_output_plain) { if (!pcmk__force_args(context, &err, "%s --text-fancy", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_cgi) { if (!pcmk__force_args(context, &err, "%s --html-cgi", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_xml) { if (!pcmk__force_args(context, &err, "%s --xml-simple-list --xml-substitute", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_legacy_xml) { output_format = mon_output_xml; if (!pcmk__force_args(context, &err, "%s --xml-legacy --xml-substitute", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } } /* Which output format to use could come from two places: The --as-xml * style arguments we gave in deprecated_entries above, or the formatted output * arguments added by pcmk__register_formats. If the latter were used, * output_format will be mon_output_unset. * * Call the callbacks as if those older style arguments were provided so * the various things they do get done. */ static void reconcile_output_format(pcmk__common_args_t *args) { gboolean retval = TRUE; GError *err = NULL; if (output_format != mon_output_unset) { return; } if (pcmk__str_eq(args->output_ty, "html", pcmk__str_casei)) { char *dest = NULL; if (args->output_dest != NULL) { dest = strdup(args->output_dest); } retval = as_html_cb("h", dest, NULL, &err); free(dest); } else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_casei)) { retval = no_curses_cb("N", NULL, NULL, &err); } else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_casei)) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("xml"); output_format = mon_output_xml; options.mon_ops |= mon_op_one_shot; } else if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("text"); output_format = mon_output_plain; } else { /* Neither old nor new arguments were given, so set the default. */ if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("console"); output_format = mon_output_console; } if (!retval) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } +static void +handle_connection_failures(int rc) +{ + if (rc == pcmk_ok) { + return; + } + + if (output_format == mon_output_monitor) { + g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s", + pcmk_strerror(rc)); + rc = MON_STATUS_CRIT; + } else if (rc == -ENOTCONN) { + g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); + rc = crm_errno2exit(rc); + } else { + g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc)); + rc = crm_errno2exit(rc); + } + + clean_up(rc); +} + +static void +one_shot() +{ + int rc; + + fencing_connect(); + + rc = cib_connect(FALSE); + if (rc == pcmk_rc_ok) { + mon_refresh_display(NULL); + } else { + handle_connection_failures(rc); + } + + clean_up(CRM_EX_OK); +} + int main(int argc, char **argv) { int rc = pcmk_ok; GOptionGroup *output_group = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); options.pid_file = strdup("/tmp/ClusterMon.pid"); crm_log_cli_init("crm_mon"); // Avoid needing to wait for subprocesses forked for -E/--external-agent avoid_zombies(); if (pcmk__ends_with_ext(argv[0], ".cgi")) { output_format = mon_output_cgi; options.mon_ops |= mon_op_one_shot; } processed_args = pcmk__cmdline_preproc(argv, "ehimpxEILU"); fence_history_cb("--fence-history", "1", NULL, NULL); /* Set an HTML title regardless of what format we will eventually use. This can't * be done in add_output_args. That function is called after command line * arguments are processed in the next block, which means it'll override whatever * title the user provides. Doing this here means the user can give their own * title on the command line. */ if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"", g_get_prgname())) { return clean_up(CRM_EX_USAGE); } if (!g_option_context_parse_strv(context, &processed_args, &error)) { return clean_up(CRM_EX_USAGE); } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!args->version) { if (args->quiet) { include_exclude_cb("--exclude", "times", NULL, NULL); } if (pcmk_is_set(options.mon_ops, mon_op_watch_fencing)) { fence_history_cb("--fence-history", "0", NULL, NULL); options.mon_ops |= mon_op_fence_connect; } /* create the cib-object early to be able to do further * decisions based on the cib-source */ cib = cib_new(); if (cib == NULL) { rc = -EINVAL; } else { switch (cib->variant) { case cib_native: /* cib & fencing - everything available */ break; case cib_file: /* Don't try to connect to fencing as we * either don't have a running cluster or * the fencing-information would possibly * not match the cib data from a file. * As we don't expect cib-updates coming * in enforce one-shot. */ fence_history_cb("--fence-history", "0", NULL, NULL); options.mon_ops |= mon_op_one_shot; break; case cib_remote: /* updates coming in but no fencing */ fence_history_cb("--fence-history", "0", NULL, NULL); break; case cib_undefined: case cib_database: default: /* something is odd */ rc = -EINVAL; break; } } if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) { if (output_format == mon_output_console) { output_format = mon_output_plain; } } else if (options.daemonize) { if ((output_format == mon_output_console) || (output_format == mon_output_plain)) { output_format = mon_output_none; } crm_enable_stderr(FALSE); if (pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches | pcmk__str_casei) && !options.external_agent) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires at least one of --output-to and --external-agent"); return clean_up(CRM_EX_USAGE); } if (cib) { /* to be on the safe side don't have cib-object around * when we are forking */ cib_delete(cib); cib = NULL; pcmk__daemonize(crm_system_name, options.pid_file); cib = cib_new(); if (cib == NULL) { rc = -EINVAL; } /* otherwise assume we've got the same cib-object we've just destroyed * in our parent */ } } else if (output_format == mon_output_console) { #if CURSES_ENABLED crm_enable_stderr(FALSE); #else options.mon_ops |= mon_op_one_shot; output_format = mon_output_plain; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } } if (rc != pcmk_ok) { // Shouldn't really be possible g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source"); return clean_up(CRM_EX_ERROR); } reconcile_output_format(args); add_output_args(); if (args->version && output_format == mon_output_console) { /* Use the text output format here if we are in curses mode but were given * --version. Displaying version information uses printf, and then we * immediately exit. We don't want to initialize curses for that. */ rc = pcmk__output_new(&out, "text", args->output_dest, argv); } else { rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); } if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); return clean_up(CRM_EX_ERROR); } /* output_format MUST NOT BE CHANGED AFTER THIS POINT. */ /* Apply --include/--exclude flags we used internally. There's no error reporting * here because this would be a programming error. */ apply_include_exclude(options.includes_excludes, output_format, &error); /* And now apply any --include/--exclude flags the user gave on the command line. * These are done in a separate pass from the internal ones because we want to * make sure whatever the user specifies overrides whatever we do. */ if (!apply_include_exclude(options.user_includes_excludes, output_format, &error)) { return clean_up(CRM_EX_USAGE); } crm_mon_register_messages(out); pe__register_messages(out); stonith__register_messages(out); if (args->version) { out->version(out, false); return clean_up(CRM_EX_OK); } /* Extra sanity checks when in CGI mode */ if (output_format == mon_output_cgi) { if (cib->variant == cib_file) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode used with CIB file"); return clean_up(CRM_EX_USAGE); } else if (options.external_agent != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with --external-agent"); return clean_up(CRM_EX_USAGE); } else if (options.daemonize == TRUE) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with -d"); return clean_up(CRM_EX_USAGE); } } if (output_format == mon_output_xml || output_format == mon_output_legacy_xml) { options.mon_ops |= mon_op_print_timing | mon_op_inactive_resources; } if ((output_format == mon_output_html || output_format == mon_output_cgi) && out->dest != stdout) { pcmk__html_add_header("meta", "http-equiv", "refresh", "content", crm_itoa(options.reconnect_msec/1000), NULL); } crm_info("Starting %s", crm_system_name); + if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) { + one_shot(); + } + do { - if (!pcmk_is_set(options.mon_ops, mon_op_one_shot)) { - print_as(output_format ,"Waiting until cluster is available on this node ...\n"); - } - rc = cib_connect(!pcmk_is_set(options.mon_ops, mon_op_one_shot)); + print_as(output_format ,"Waiting until cluster is available on this node ...\n"); - if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) { - break; + fencing_connect(); + rc = cib_connect(TRUE); - } else if (rc != pcmk_ok) { + if (rc != pcmk_ok) { sleep(options.reconnect_msec / 1000); #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif } else if (output_format == mon_output_html && out->dest != stdout) { printf("Writing html to %s ...\n", args->output_dest); } } while (rc == -ENOTCONN); - if (rc != pcmk_ok) { - if (output_format == mon_output_monitor) { - g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s", - pcmk_strerror(rc)); - return clean_up(MON_STATUS_CRIT); - } else { - if (rc == -ENOTCONN) { - g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); - } else { - g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc)); - } - } - return clean_up(crm_errno2exit(rc)); - } - - if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) { - return clean_up(CRM_EX_OK); - } + handle_connection_failures(rc); + mon_refresh_display(NULL); mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (output_format == mon_output_console) { ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; io_channel = g_io_channel_unix_new(STDIN_FILENO); g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL); } #endif + + /* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In + * this file, that is anywhere mainloop_set_trigger is called. + */ refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); if (io_channel != NULL) { g_io_channel_shutdown(io_channel, TRUE, NULL); } crm_info("Exiting %s", crm_system_name); return clean_up(CRM_EX_OK); } /*! * \internal * \brief Print one-line status suitable for use with monitoring software * * \param[in] data_set Working set of CIB state * * \note This function's output (and the return code when the program exits) * should conform to https://www.monitoring-plugins.org/doc/guidelines.html */ static void print_simple_status(pcmk__output_t *out, pe_working_set_t * data_set, unsigned int mon_ops) { GListPtr gIter = NULL; int nodes_online = 0; int nodes_standby = 0; int nodes_maintenance = 0; char *offline_nodes = NULL; size_t offline_nodes_len = 0; gboolean no_dc = FALSE; gboolean offline = FALSE; if (data_set->dc_node == NULL) { mon_ops |= mon_op_has_warnings; no_dc = TRUE; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node->details->standby && node->details->online) { nodes_standby++; } else if (node->details->maintenance && node->details->online) { nodes_maintenance++; } else if (node->details->online) { nodes_online++; } else { char *s = crm_strdup_printf("offline node: %s", node->details->uname); /* coverity[leaked_storage] False positive */ pcmk__add_word(&offline_nodes, &offline_nodes_len, s); free(s); mon_ops |= mon_op_has_warnings; offline = TRUE; } } if (pcmk_is_set(mon_ops, mon_op_has_warnings)) { out->info(out, "CLUSTER WARN: %s%s%s", no_dc ? "No DC" : "", no_dc && offline ? ", " : "", (offline? offline_nodes : "")); free(offline_nodes); } else { char *nodes_standby_s = NULL; char *nodes_maint_s = NULL; if (nodes_standby > 0) { nodes_standby_s = crm_strdup_printf(", %d standby node%s", nodes_standby, pcmk__plural_s(nodes_standby)); } if (nodes_maintenance > 0) { nodes_maint_s = crm_strdup_printf(", %d maintenance node%s", nodes_maintenance, pcmk__plural_s(nodes_maintenance)); } out->info(out, "CLUSTER OK: %d node%s online%s%s, " "%d resource instance%s configured", nodes_online, pcmk__plural_s(nodes_online), nodes_standby_s != NULL ? nodes_standby_s : "", nodes_maint_s != NULL ? nodes_maint_s : "", data_set->ninstances, pcmk__plural_s(data_set->ninstances)); free(nodes_standby_s); free(nodes_maint_s); } /* coverity[leaked_storage] False positive */ } static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = crm_itoa(rc); char *status_s = crm_itoa(status); char *target_rc_s = crm_itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (options.external_recipient) { setenv("CRM_notify_recipient", options.external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { crm_perror(LOG_ERR, "notification fork() failed."); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(options.external_agent, options.external_agent, NULL); exit(CRM_EX_ERROR); } crm_trace("Finished running custom notification program '%s'.", options.external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static void handle_rsc_op(xmlNode * xml, const char *node_id) { int rc = -1; int status = -1; int target_rc = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) { xmlNode *cIter; for(cIter = xml->children; cIter; cIter = cIter->next) { handle_rsc_op(cIter, node_id); } return; } id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY); if (id == NULL) { /* Compatibility with <= 1.1.5 */ id = ID(rsc_op); } magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return; } if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return; } if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); while (n != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(n), pcmk__str_casei)) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, XML_ATTR_UNAME); } if (node == NULL && n) { node = ID(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_strerror(pcmk_ok); if (status == PCMK_LRM_OP_DONE && target_rc == rc) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_LRM_OP_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = services_lrm_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && options.external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(rsc); free(task); } +/* This function is just a wrapper around mainloop_set_trigger so that it can be + * called from a mainloop directly. It's simply another way of ensuring the screen + * gets redrawn. + */ static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger(refresh_trigger); return FALSE; } static void crm_diff_update_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = pcmk__xml_first_child(diff); change != NULL; change = pcmk__xml_next(change)) { const char *name = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); xmlNode *match = NULL; const char *node = NULL; if(op == NULL) { continue; } else if(strcmp(op, "create") == 0) { match = change->children; } else if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "delete") == 0) { continue; } else if(strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); } else if(strcmp(name, XML_TAG_CIB) == 0) { xmlNode *state = NULL; xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS); for (state = pcmk__xe_first_child(status); state != NULL; state = pcmk__xe_next(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) { xmlNode *state = NULL; for (state = pcmk__xe_first_child(match); state != NULL; state = pcmk__xe_next(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) { node = crm_element_value(match, XML_ATTR_UNAME); if (node == NULL) { node = ID(match); } handle_rsc_op(match, node); } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { node = ID(match); handle_rsc_op(match, node); } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } } } static void crm_diff_update_v1(const char *event, xmlNode * msg) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op, NULL); } freeXpathObject(xpathObj); } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; static bool stale = FALSE; gboolean cib_updated = FALSE; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); print_dot(output_format); if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; break; case pcmk_ok: cib_updated = TRUE; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); } if (options.external_agent) { int format = 0; crm_element_value_int(diff, "format", &format); switch(format) { case 1: crm_diff_update_v1(event, msg); break; case 2: crm_diff_update_v2(event, msg); break; default: crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { print_as(output_format, "--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; - kick_refresh(cib_updated); + refresh_after_event(cib_updated); } static int -mon_refresh_display(gpointer user_data) +get_fencing_history(stonith_history_t **stonith_history) { - xmlNode *cib_copy = copy_xml(current_cib); - stonith_history_t *stonith_history = NULL; - int history_rc = 0; - - last_refresh = time(NULL); - - if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { - if (cib) { - cib->cmds->signoff(cib); - } - out->err(out, "Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation)); - clean_up(CRM_EX_CONFIG); - return 0; - } + int rc = 0; - /* get the stonith-history if there is evidence we need it - */ while (pcmk_is_set(options.mon_ops, mon_op_fence_history)) { if (st != NULL) { - history_rc = st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120); + rc = st->cmds->history(st, st_opt_sync_call, NULL, stonith_history, 120); - if (history_rc != 0) { - out->err(out, "Critical: Unable to get stonith-history"); - mon_cib_connection_destroy_error(NULL); - } else { - stonith_history = stonith__sort_history(stonith_history); + if (rc == 0) { + *stonith_history = stonith__sort_history(*stonith_history); if (!pcmk_is_set(options.mon_ops, mon_op_fence_full_history) && (output_format != mon_output_xml)) { - stonith_history = pcmk__reduce_fence_history(stonith_history); + *stonith_history = pcmk__reduce_fence_history(*stonith_history); } break; /* all other cases are errors */ } } else { - out->err(out, "Critical: No stonith-API"); + rc = ENOTCONN; + break; } - free_xml(cib_copy); - out->err(out, "Reading stonith-history failed"); + } + + return rc; +} + +static int +mon_refresh_display(gpointer user_data) +{ + xmlNode *cib_copy = copy_xml(current_cib); + stonith_history_t *stonith_history = NULL; + int history_rc = 0; + + last_refresh = time(NULL); + + if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { + clean_up_cib_connection(); + out->err(out, "Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation)); + clean_up(CRM_EX_CONFIG); return 0; } + /* get the stonith-history if there is evidence we need it */ + history_rc = get_fencing_history(&stonith_history); + if (mon_data_set == NULL) { mon_data_set = pe_new_working_set(); CRM_ASSERT(mon_data_set != NULL); } pe__set_working_set_flags(mon_data_set, pe_flag_no_compat); mon_data_set->input = cib_copy; cluster_status(mon_data_set); /* Unpack constraints if any section will need them * (tickets may be referenced in constraints but not granted yet, * and bans need negative location constraints) */ if (pcmk_is_set(show, mon_show_bans) || pcmk_is_set(show, mon_show_tickets)) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, mon_data_set->input); unpack_constraints(cib_constraints, mon_data_set); } switch (output_format) { case mon_output_html: case mon_output_cgi: - if (print_html_status(out, mon_data_set, stonith_history, options.mon_ops, + if (print_html_status(out, mon_data_set, crm_errno2exit(history_rc), + stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node, options.only_rsc) != 0) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_CANTCREAT, "Critical: Unable to output html file"); clean_up(CRM_EX_CANTCREAT); return 0; } break; case mon_output_legacy_xml: case mon_output_xml: print_xml_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node, options.only_rsc); break; case mon_output_monitor: print_simple_status(out, mon_data_set, options.mon_ops); if (pcmk_is_set(options.mon_ops, mon_op_has_warnings)) { clean_up(MON_STATUS_WARN); return FALSE; } break; case mon_output_console: /* If curses is not enabled, this will just fall through to the plain * text case. */ #if CURSES_ENABLED blank_screen(); - print_status(out, mon_data_set, stonith_history, options.mon_ops, show, - options.neg_location_prefix, options.only_node, options.only_rsc); + print_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history, + options.mon_ops, show, options.neg_location_prefix, + options.only_node, options.only_rsc); refresh(); break; #endif case mon_output_plain: - print_status(out, mon_data_set, stonith_history, options.mon_ops, show, - options.neg_location_prefix, options.only_node, options.only_rsc); + print_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history, + options.mon_ops, show, options.neg_location_prefix, + options.only_node, options.only_rsc); break; case mon_output_unset: case mon_output_none: break; } if (options.daemonize) { out->reset(out); } stonith_history_free(stonith_history); stonith_history = NULL; pe_reset_working_set(mon_data_set); return 1; } +/* This function is called for fencing events (see fencing_connect for which ones) when + * --watch-fencing is used on the command line. + */ static void mon_st_callback_event(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ - mon_cib_connection_destroy_regular(NULL); + mon_cib_connection_destroy(NULL); } else if (options.external_agent) { char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)", e->operation, e->origin, e->target, pcmk_strerror(e->result), e->id); send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); free(desc); } } +/* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met: + * + * - If the last update occurred more than reconnect_msec ago (defaults to 5s, but can be + * changed via the -i command line option), or + * - After every 10 CIB updates, or + * - If it's been 2s since the last update + * + * This function sounds like it would be more broadly useful, but it is only called when a + * fencing event is received or a CIB diff occurrs. + */ static void -kick_refresh(gboolean data_updated) +refresh_after_event(gboolean data_updated) { static int updates = 0; time_t now = time(NULL); if (data_updated) { updates++; } if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } - /* Refresh - * - immediately if the last update was more than 5s ago - * - every 10 cib-updates - * - at most 2s after the last update - */ if ((now - last_refresh) > (options.reconnect_msec / 1000)) { mainloop_set_trigger(refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else if(updates >= 10) { mainloop_set_trigger(refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } +/* This function is called for fencing events (see fencing_connect for which ones) when + * --watch-fencing is NOT used on the command line. + */ static void mon_st_callback_display(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ - mon_cib_connection_destroy_regular(NULL); + mon_cib_connection_destroy(NULL); } else { print_dot(output_format); - kick_refresh(TRUE); + refresh_after_event(TRUE); } } static void -clean_up_connections(void) +clean_up_cib_connection(void) { - if (cib != NULL) { - cib->cmds->signoff(cib); - cib_delete(cib); - cib = NULL; + if (cib == NULL) { + return; } - if (st != NULL) { - if (st->state != stonith_disconnected) { - st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); - st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); - st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY); - st->cmds->disconnect(st); - } - stonith_api_delete(st); - st = NULL; + cib->cmds->signoff(cib); + cib_delete(cib); + cib = NULL; +} + +static void +clean_up_fencing_connection(void) +{ + if (st == NULL) { + return; + } + + if (st->state != stonith_disconnected) { + st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); + st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); + st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY); + st->cmds->disconnect(st); } + + stonith_api_delete(st); + st = NULL; } /* * De-init ncurses, disconnect from the CIB manager, disconnect fencing, * deallocate memory and show usage-message if requested. * * We don't actually return, but nominally returning crm_exit_t allows a usage * like "return clean_up(exit_code);" which helps static analysis understand the * code flow. */ static crm_exit_t clean_up(crm_exit_t exit_code) { /* Quitting crm_mon is much more complicated than it ought to be. */ /* (1) Close connections, free things, etc. */ - clean_up_connections(); + clean_up_cib_connection(); + clean_up_fencing_connection(); free(options.pid_file); free(options.neg_location_prefix); g_slist_free_full(options.includes_excludes, free); pe_free_working_set(mon_data_set); mon_data_set = NULL; g_strfreev(processed_args); /* (2) If this is abnormal termination and we're in curses mode, shut down * curses first. Any messages displayed to the screen before curses is shut * down will be lost because doing the shut down will also restore the * screen to whatever it looked like before crm_mon was started. */ if ((error != NULL || exit_code == CRM_EX_USAGE) && output_format == mon_output_console) { out->finish(out, exit_code, false, NULL); pcmk__output_free(out); out = NULL; } /* (3) If this is a command line usage related failure, print the usage * message. */ if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); } pcmk__free_arg_context(context); /* (4) If this is any kind of error, print the error out and exit. Make * sure to handle situations both before and after formatted output is * set up. We want errors to appear formatted if at all possible. */ if (error != NULL) { if (out != NULL) { out->err(out, "%s: %s", g_get_prgname(), error->message); out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } else { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); } g_clear_error(&error); crm_exit(exit_code); } /* (5) Print formatted output to the screen if we made it far enough in * crm_mon to be able to do so. */ if (out != NULL) { if (options.daemonize) { out->dest = freopen(NULL, "w", out->dest); CRM_ASSERT(out->dest != NULL); } out->finish(out, exit_code, true, NULL); pcmk__output_free(out); pcmk__unregister_formats(); } crm_exit(exit_code); } diff --git a/tools/crm_mon.h b/tools/crm_mon.h index f746507f3d..73c926d2c1 100644 --- a/tools/crm_mon.h +++ b/tools/crm_mon.h @@ -1,126 +1,126 @@ /* * Copyright 2019-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include /* Never display node attributes whose name starts with one of these prefixes */ #define FILTER_STR { PCMK__FAIL_COUNT_PREFIX, PCMK__LAST_FAILURE_PREFIX, \ "shutdown", "terminate", "standby", "probe_complete", \ "#", NULL } #if CURSES_ENABLED # define print_dot(output_format) if (output_format == mon_output_console) { \ printw("."); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, "."); \ } #else # define print_dot(output_format) fprintf(stdout, "."); #endif #if CURSES_ENABLED # define print_as(output_format, fmt, args...) if (output_format == mon_output_console) { \ printw(fmt, ##args); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, fmt, ##args); \ } #else # define print_as(output_format, fmt, args...) fprintf(stdout, fmt, ##args); #endif typedef enum mon_output_format_e { mon_output_unset, mon_output_none, mon_output_monitor, mon_output_plain, mon_output_console, mon_output_xml, mon_output_legacy_xml, mon_output_html, mon_output_cgi } mon_output_format_t; #define mon_show_stack (1 << 0) #define mon_show_dc (1 << 1) #define mon_show_times (1 << 2) #define mon_show_counts (1 << 3) #define mon_show_options (1 << 4) #define mon_show_nodes (1 << 5) #define mon_show_resources (1 << 6) #define mon_show_attributes (1 << 7) #define mon_show_failcounts (1 << 8) #define mon_show_operations (1 << 9) #define mon_show_fence_failed (1 << 10) #define mon_show_fence_pending (1 << 11) #define mon_show_fence_worked (1 << 12) #define mon_show_tickets (1 << 13) #define mon_show_bans (1 << 14) #define mon_show_failures (1 << 15) #define mon_show_fencing_all (mon_show_fence_failed | mon_show_fence_pending | mon_show_fence_worked) #define mon_show_summary (mon_show_stack | mon_show_dc | mon_show_times | mon_show_counts) #define mon_show_all (mon_show_summary | mon_show_nodes | mon_show_resources | \ mon_show_attributes | mon_show_failcounts | mon_show_operations | \ mon_show_fencing_all | mon_show_tickets | mon_show_bans | \ mon_show_failures | mon_show_options) #define mon_op_group_by_node (0x0001U) #define mon_op_inactive_resources (0x0002U) #define mon_op_one_shot (0x0004U) #define mon_op_has_warnings (0x0008U) #define mon_op_print_timing (0x0010U) #define mon_op_watch_fencing (0x0020U) #define mon_op_fence_history (0x0040U) #define mon_op_fence_full_history (0x0080U) #define mon_op_fence_connect (0x0100U) #define mon_op_print_brief (0x0200U) #define mon_op_print_pending (0x0400U) #define mon_op_print_clone_detail (0x0800U) #define mon_op_default (mon_op_print_pending | mon_op_fence_history | mon_op_fence_connect) void print_status(pcmk__output_t *out, pe_working_set_t *data_set, - stonith_history_t *stonith_history, unsigned int mon_ops, - unsigned int show, char *prefix, char *only_node, - char *only_rsc); + crm_exit_t history_rc, stonith_history_t *stonith_history, + unsigned int mon_ops, unsigned int show, char *prefix, + char *only_node, char *only_rsc); void print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set, crm_exit_t history_rc, stonith_history_t *stonith_history, unsigned int mon_ops, unsigned int show, char *prefix, char *only_node, char *only_rsc); int print_html_status(pcmk__output_t *out, pe_working_set_t *data_set, - stonith_history_t *stonith_history, unsigned int mon_ops, - unsigned int show, char *prefix, char *only_node, - char *only_rsc); + crm_exit_t history_rc, stonith_history_t *stonith_history, + unsigned int mon_ops, unsigned int show, char *prefix, + char *only_node, char *only_rsc); GList *append_attr_list(GList *attr_list, char *name); void blank_screen(void); unsigned int get_resource_display_options(unsigned int mon_ops); void crm_mon_register_messages(pcmk__output_t *out); pcmk__output_t *crm_mon_mk_curses_output(char **argv); void curses_indented_printf(pcmk__output_t *out, const char *format, ...) G_GNUC_PRINTF(2, 3); void curses_indented_vprintf(pcmk__output_t *out, const char *format, va_list args) G_GNUC_PRINTF(2, 0); #if CURSES_ENABLED extern GOptionEntry crm_mon_curses_output_entries[]; #define CRM_MON_SUPPORTED_FORMAT_CURSES { "console", crm_mon_mk_curses_output, crm_mon_curses_output_entries } #endif pcmk__output_t *crm_mon_mk_xml_output(char **argv); #define CRM_MON_SUPPORTED_FORMAT_XML { "xml", crm_mon_mk_xml_output, pcmk__xml_output_entries } diff --git a/tools/crm_mon_print.c b/tools/crm_mon_print.c index 8ae11bfdf9..73406bd50a 100644 --- a/tools/crm_mon_print.c +++ b/tools/crm_mon_print.c @@ -1,997 +1,1035 @@ /* * Copyright 2019-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #ifndef PCMK__CONFIG_H # define PCMK__CONFIG_H # include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "crm_mon.h" static int print_rsc_history(pcmk__output_t *out, pe_working_set_t *data_set, pe_node_t *node, xmlNode *rsc_entry, unsigned int mon_ops, GListPtr op_list); static int print_node_history(pcmk__output_t *out, pe_working_set_t *data_set, pe_node_t *node, xmlNode *node_state, gboolean operations, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc); static gboolean add_extra_info(pcmk__output_t *out, pe_node_t *node, GListPtr rsc_list, pe_working_set_t *data_set, const char *attrname, int *expected_score); static void print_node_attribute(gpointer name, gpointer user_data); static int print_node_summary(pcmk__output_t *out, pe_working_set_t * data_set, gboolean operations, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer); static int print_cluster_tickets(pcmk__output_t *out, pe_working_set_t * data_set, gboolean print_spacer); static int print_neg_locations(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int mon_ops, const char *prefix, GListPtr only_rsc, gboolean print_spacer); static int print_node_attributes(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer); static int print_failed_actions(pcmk__output_t *out, pe_working_set_t *data_set, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer); static GListPtr build_uname_list(pe_working_set_t *data_set, const char *s) { GListPtr unames = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { /* Nothing was given so return a list of all node names. Or, '*' was * given. This would normally fall into the pe__unames_with_tag branch * where it will return an empty list. Catch it here instead. */ unames = g_list_prepend(unames, strdup("*")); } else { pe_node_t *node = pe_find_node(data_set->nodes, s); if (node) { /* The given string was a valid uname for a node. Return a * singleton list containing just that uname. */ unames = g_list_prepend(unames, strdup(s)); } else { /* The given string was not a valid uname. It's either a tag or * it's a typo or something. In the first case, we'll return a * list of all the unames of the nodes with the given tag. In the * second case, we'll return a NULL pointer and nothing will * get displayed. */ unames = pe__unames_with_tag(data_set, s); } } return unames; } static GListPtr build_rsc_list(pe_working_set_t *data_set, const char *s) { GListPtr resources = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { resources = g_list_prepend(resources, strdup("*")); } else { pe_resource_t *rsc = pe_find_resource_with_flags(data_set->resources, s, pe_find_renamed|pe_find_any); if (rsc) { /* A colon in the name we were given means we're being asked to filter * on a specific instance of a cloned resource. Put that exact string * into the filter list. Otherwise, use the printable ID of whatever * resource was found that matches what was asked for. */ if (strstr(s, ":") != NULL) { resources = g_list_prepend(resources, strdup(rsc->id)); } else { resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc))); } } else { /* The given string was not a valid resource name. It's either * a tag or it's a typo or something. See build_uname_list for * more detail. */ resources = pe__rscs_with_tag(data_set, s); } } return resources; } static int failure_count(pe_working_set_t *data_set, pe_node_t *node, pe_resource_t *rsc, time_t *last_failure) { return rsc ? pe_get_failcount(node, rsc, last_failure, pe_fc_default, NULL, data_set) : 0; } static GListPtr get_operation_list(xmlNode *rsc_entry) { GListPtr op_list = NULL; xmlNode *rsc_op = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { const char *task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); int op_rc_i = crm_parse_int(op_rc, "0"); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* Ignore notifies and some probes */ if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei) || (pcmk__str_eq(task, "probe", pcmk__str_casei) && (op_rc_i == 7))) { continue; } if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_append(op_list, rsc_op); } } op_list = g_list_sort(op_list, sort_op_by_callid); return op_list; } /*! * \internal * \brief Print resource operation/failure history * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] node Node that ran this resource. * \param[in] rsc_entry Root of XML tree describing resource status. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] op_list A list of operations to print. */ static int print_rsc_history(pcmk__output_t *out, pe_working_set_t *data_set, pe_node_t *node, xmlNode *rsc_entry, unsigned int mon_ops, GListPtr op_list) { GListPtr gIter = NULL; int rc = pcmk_rc_no_output; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); /* Print each operation */ for (gIter = op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); int op_rc_i = crm_parse_int(op_rc, "0"); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* If this is the first printed operation, print heading for resource */ if (rc == pcmk_rc_no_output) { time_t last_failure = 0; int failcount = failure_count(data_set, node, rsc, &last_failure); out->message(out, "resource-history", rsc, rsc_id, TRUE, failcount, last_failure, TRUE); rc = pcmk_rc_ok; } /* Print the operation */ out->message(out, "op-history", xml_op, task, interval_ms_s, op_rc_i, pcmk_is_set(mon_ops, mon_op_print_timing)); } /* Free the list we created (no need to free the individual items) */ g_list_free(op_list); PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Print node operation/failure history * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] node_state Root of XML tree describing node status. * \param[in] operations Whether to print operations or just failcounts. * \param[in] mon_ops Bitmask of mon_op_*. */ static int print_node_history(pcmk__output_t *out, pe_working_set_t *data_set, pe_node_t *node, xmlNode *node_state, gboolean operations, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc) { xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; int rc = pcmk_rc_no_output; lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); /* Print history of each of the node's resources */ for (rsc_entry = pcmk__xe_first_child(lrm_rsc); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { continue; } /* We can't use is_filtered here to filter group resources. For is_filtered, * we have to decide whether to check the parent or not. If we check the * parent, all elements of a group will always be printed because that's how * is_filtered works for groups. If we do not check the parent, sometimes * this will filter everything out. * * For other resource types, is_filtered is okay. */ if (uber_parent(rsc)->variant == pe_group) { if (!pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) && !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)))) { continue; } } else { if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { continue; } } if (operations == FALSE) { time_t last_failure = 0; int failcount = failure_count(data_set, node, rsc, &last_failure); if (failcount <= 0) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, get_resource_display_options(mon_ops), FALSE, NULL, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(mon_ops, mon_op_print_brief), pcmk_is_set(mon_ops, mon_op_group_by_node), only_node, only_rsc); } out->message(out, "resource-history", rsc, rsc_id, FALSE, failcount, last_failure, FALSE); } else { GListPtr op_list = get_operation_list(rsc_entry); if (op_list == NULL) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, get_resource_display_options(mon_ops), FALSE, NULL, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(mon_ops, mon_op_print_brief), pcmk_is_set(mon_ops, mon_op_group_by_node), only_node, only_rsc); } print_rsc_history(out, data_set, node, rsc_entry, mon_ops, op_list); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Determine whether extended information about an attribute should be added. * * \param[in] out The output functions structure. * \param[in] node Node that ran this resource. * \param[in] rsc_list The list of resources for this node. * \param[in] attrname The attribute to find. * \param[out] expected_score The expected value for this attribute. * * \return TRUE if extended information should be printed, FALSE otherwise * \note Currently, extended information is only supported for ping/pingd * resources, for which a message will be printed if connectivity is lost * or degraded. */ static gboolean add_extra_info(pcmk__output_t *out, pe_node_t *node, GListPtr rsc_list, pe_working_set_t *data_set, const char *attrname, int *expected_score) { GListPtr gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, "type"); const char *name = NULL; GHashTable *params = NULL; if (rsc->children != NULL) { if (add_extra_info(out, node, rsc->children, data_set, attrname, expected_score)) { return TRUE; } } if (!pcmk__strcase_any_of(type, "ping", "pingd", NULL)) { continue; } params = pe_rsc_params(rsc, node, data_set); name = g_hash_table_lookup(params, "name"); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (pcmk__str_eq(name, attrname, pcmk__str_casei)) { int host_list_num = 0; /* int value = crm_parse_int(attrvalue, "0"); */ const char *hosts = g_hash_table_lookup(params, "host_list"); const char *multiplier = g_hash_table_lookup(params, "multiplier"); if (hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } /* pingd multiplier is the same as the default value. */ *expected_score = host_list_num * crm_parse_int(multiplier, "1"); return TRUE; } } return FALSE; } /* structure for passing multiple user data to g_list_foreach() */ struct mon_attr_data { pcmk__output_t *out; pe_node_t *node; pe_working_set_t *data_set; }; static void print_node_attribute(gpointer name, gpointer user_data) { const char *value = NULL; int expected_score = 0; gboolean add_extra = FALSE; struct mon_attr_data *data = (struct mon_attr_data *) user_data; value = pe_node_attribute_raw(data->node, name); add_extra = add_extra_info(data->out, data->node, data->node->details->running_rsc, data->data_set, name, &expected_score); /* Print attribute name and value */ data->out->message(data->out, "node-attribute", name, value, add_extra, expected_score); } /*! * \internal * \brief Print history for all nodes. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] operations Whether to print operations or just failcounts. * \param[in] mon_ops Bitmask of mon_op_*. */ static int print_node_summary(pcmk__output_t *out, pe_working_set_t * data_set, gboolean operations, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer) { xmlNode *node_state = NULL; xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); int rc = pcmk_rc_no_output; if (xmlChildElementCount(cib_status) == 0) { return rc; } /* Print each node in the CIB status */ for (node_state = pcmk__xe_first_child(cib_status); node_state != NULL; node_state = pcmk__xe_next(node_state)) { pe_node_t *node; if (!pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { continue; } node = pe_find_node_id(data_set->nodes, ID(node_state)); if (!node || !node->details || !node->details->online) { continue; } if (!pcmk__str_in_list(only_node, node->details->uname)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, operations ? "Operations" : "Migration Summary"); print_node_history(out, data_set, node, node_state, operations, mon_ops, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Print all tickets. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. */ static int print_cluster_tickets(pcmk__output_t *out, pe_working_set_t * data_set, gboolean print_spacer) { GHashTableIter iter; gpointer key, value; if (g_hash_table_size(data_set->tickets) == 0) { return pcmk_rc_no_output; } PCMK__OUTPUT_SPACER_IF(out, print_spacer); /* Print section heading */ out->begin_list(out, NULL, NULL, "Tickets"); /* Print each ticket */ g_hash_table_iter_init(&iter, data_set->tickets); while (g_hash_table_iter_next(&iter, &key, &value)) { pe_ticket_t *ticket = (pe_ticket_t *) value; out->message(out, "ticket", ticket); } /* Close section */ out->end_list(out); return pcmk_rc_ok; } /*! * \internal * \brief Print section for negative location constraints * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] prefix ID prefix to filter results by. */ static int print_neg_locations(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int mon_ops, const char *prefix, GListPtr only_rsc, gboolean print_spacer) { GListPtr gIter, gIter2; int rc = pcmk_rc_no_output; /* Print each ban */ for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *location = gIter->data; if (prefix != NULL && !g_str_has_prefix(location->id, prefix)) continue; if (!pcmk__str_in_list(only_rsc, rsc_printable_id(location->rsc_lh)) && !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(location->rsc_lh)))) { continue; } for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) { pe_node_t *node = (pe_node_t *) gIter2->data; if (node->weight < 0) { PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Negative Location Constraints"); out->message(out, "ban", node, location, pcmk_is_set(mon_ops, mon_op_print_clone_detail)); } } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Print node attributes section * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] mon_ops Bitmask of mon_op_*. */ static int print_node_attributes(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer) { GListPtr gIter = NULL; int rc = pcmk_rc_no_output; /* Display each node's attributes */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { struct mon_attr_data data; data.out = out; data.node = (pe_node_t *) gIter->data; data.data_set = data_set; if (data.node && data.node->details && data.node->details->online) { GList *attr_list = NULL; GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, data.node->details->attrs); while (g_hash_table_iter_next (&iter, &key, &value)) { attr_list = append_attr_list(attr_list, key); } if (attr_list == NULL) { continue; } if (!pcmk__str_in_list(only_node, data.node->details->uname)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node Attributes"); out->message(out, "node", data.node, get_resource_display_options(mon_ops), FALSE, NULL, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(mon_ops, mon_op_print_brief), pcmk_is_set(mon_ops, mon_op_group_by_node), only_node, only_rsc); g_list_foreach(attr_list, print_node_attribute, &data); g_list_free(attr_list); out->end_list(out); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Print a section for failed actions * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. */ static int print_failed_actions(pcmk__output_t *out, pe_working_set_t *data_set, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer) { xmlNode *xml_op = NULL; int rc = pcmk_rc_no_output; const char *id = NULL; if (xmlChildElementCount(data_set->failed) == 0) { return rc; } for (xml_op = pcmk__xml_first_child(data_set->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { char *rsc = NULL; if (!pcmk__str_in_list(only_node, crm_element_value(xml_op, XML_ATTR_UNAME))) { continue; } id = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if (parse_op_key(id ? id : ID(xml_op), &rsc, NULL, NULL) == FALSE) { continue; } if (!pcmk__str_in_list(only_rsc, rsc)) { free(rsc); continue; } free(rsc); PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Resource Actions"); out->message(out, "failed-action", xml_op); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } #define CHECK_RC(retcode, retval) \ if (retval == pcmk_rc_ok) { \ retcode = pcmk_rc_ok; \ } /*! * \internal * \brief Top-level printing function for text/curses output. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. + * \param[in] history_rc Result of getting stonith history * \param[in] stonith_history List of stonith actions. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] show Bitmask of mon_show_*. * \param[in] prefix ID prefix to filter results by. */ void print_status(pcmk__output_t *out, pe_working_set_t *data_set, - stonith_history_t *stonith_history, unsigned int mon_ops, - unsigned int show, char *prefix, char *only_node, char *only_rsc) + crm_exit_t history_rc, stonith_history_t *stonith_history, + unsigned int mon_ops, unsigned int show, char *prefix, + char *only_node, char *only_rsc) { GListPtr unames = NULL; GListPtr resources = NULL; unsigned int print_opts = get_resource_display_options(mon_ops); int rc = pcmk_rc_no_output; + bool already_printed_failure = false; CHECK_RC(rc, out->message(out, "cluster-summary", data_set, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(show, mon_show_stack), pcmk_is_set(show, mon_show_dc), pcmk_is_set(show, mon_show_times), pcmk_is_set(show, mon_show_counts), pcmk_is_set(show, mon_show_options))); unames = build_uname_list(data_set, only_node); resources = build_rsc_list(data_set, only_rsc); if (pcmk_is_set(show, mon_show_nodes) && unames) { PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); CHECK_RC(rc, out->message(out, "node-list", data_set->nodes, unames, resources, print_opts, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(mon_ops, mon_op_print_brief), pcmk_is_set(mon_ops, mon_op_group_by_node))); } /* Print resources section, if needed */ if (pcmk_is_set(show, mon_show_resources)) { CHECK_RC(rc, out->message(out, "resource-list", data_set, print_opts, pcmk_is_set(mon_ops, mon_op_group_by_node), pcmk_is_set(mon_ops, mon_op_inactive_resources), pcmk_is_set(mon_ops, mon_op_print_brief), TRUE, unames, resources, rc == pcmk_rc_ok)); } /* print Node Attributes section if requested */ if (pcmk_is_set(show, mon_show_attributes)) { CHECK_RC(rc, print_node_attributes(out, data_set, mon_ops, unames, resources, rc == pcmk_rc_ok)); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_is_set(show, mon_show_operations) || pcmk_is_set(show, mon_show_failcounts)) { CHECK_RC(rc, print_node_summary(out, data_set, pcmk_is_set(show, mon_show_operations), mon_ops, unames, resources, (rc == pcmk_rc_ok))); } /* If there were any failed actions, print them */ if (pcmk_is_set(show, mon_show_failures) && xml_has_children(data_set->failed)) { CHECK_RC(rc, print_failed_actions(out, data_set, unames, resources, rc == pcmk_rc_ok)); } /* Print failed stonith actions */ if (pcmk_is_set(show, mon_show_fence_failed) && pcmk_is_set(mon_ops, mon_op_fence_history)) { - stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, - GINT_TO_POINTER(st_failed)); + if (history_rc == 0) { + stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, + GINT_TO_POINTER(st_failed)); + + if (hp) { + CHECK_RC(rc, out->message(out, "failed-fencing-list", stonith_history, unames, + pcmk_is_set(mon_ops, mon_op_fence_full_history), + rc == pcmk_rc_ok)); + } + } else { + PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); + out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); + out->list_item(out, NULL, "Failed to get fencing history: %s", + crm_exit_str(history_rc)); + out->end_list(out); - if (hp) { - CHECK_RC(rc, out->message(out, "failed-fencing-list", stonith_history, unames, - pcmk_is_set(mon_ops, mon_op_fence_full_history), - rc == pcmk_rc_ok)); + already_printed_failure = true; } } /* Print tickets if requested */ if (pcmk_is_set(show, mon_show_tickets)) { CHECK_RC(rc, print_cluster_tickets(out, data_set, rc == pcmk_rc_ok)); } /* Print negative location constraints if requested */ if (pcmk_is_set(show, mon_show_bans)) { CHECK_RC(rc, print_neg_locations(out, data_set, mon_ops, prefix, resources, rc == pcmk_rc_ok)); } /* Print stonith history */ if (pcmk_is_set(mon_ops, mon_op_fence_history)) { - if (pcmk_is_set(show, mon_show_fence_worked)) { + if (history_rc != 0) { + if (!already_printed_failure) { + PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); + out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); + out->list_item(out, NULL, "Failed to get fencing history: %s", + crm_exit_str(history_rc)); + out->end_list(out); + } + } else if (pcmk_is_set(show, mon_show_fence_worked)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, GINT_TO_POINTER(st_failed)); if (hp) { CHECK_RC(rc, out->message(out, "fencing-list", hp, unames, pcmk_is_set(mon_ops, mon_op_fence_full_history), rc == pcmk_rc_ok)); } } else if (pcmk_is_set(show, mon_show_fence_pending)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); if (hp) { CHECK_RC(rc, out->message(out, "pending-fencing-list", hp, unames, pcmk_is_set(mon_ops, mon_op_fence_full_history), rc == pcmk_rc_ok)); } } } g_list_free_full(unames, free); } /*! * \internal * \brief Top-level printing function for XML output. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. + * \param[in] history_rc Result of getting stonith history * \param[in] stonith_history List of stonith actions. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] show Bitmask of mon_show_*. * \param[in] prefix ID prefix to filter results by. */ void print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set, crm_exit_t history_rc, stonith_history_t *stonith_history, unsigned int mon_ops, unsigned int show, char *prefix, char *only_node, char *only_rsc) { GListPtr unames = NULL; GListPtr resources = NULL; unsigned int print_opts = get_resource_display_options(mon_ops); out->message(out, "cluster-summary", data_set, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(show, mon_show_stack), pcmk_is_set(show, mon_show_dc), pcmk_is_set(show, mon_show_times), pcmk_is_set(show, mon_show_counts), pcmk_is_set(show, mon_show_options)); unames = build_uname_list(data_set, only_node); resources = build_rsc_list(data_set, only_rsc); /*** NODES ***/ if (pcmk_is_set(show, mon_show_nodes)) { out->message(out, "node-list", data_set->nodes, unames, resources, print_opts, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(mon_ops, mon_op_print_brief), pcmk_is_set(mon_ops, mon_op_group_by_node)); } /* Print resources section, if needed */ if (pcmk_is_set(show, mon_show_resources)) { out->message(out, "resource-list", data_set, print_opts, pcmk_is_set(mon_ops, mon_op_group_by_node), pcmk_is_set(mon_ops, mon_op_inactive_resources), FALSE, FALSE, unames, resources, FALSE); } /* print Node Attributes section if requested */ if (pcmk_is_set(show, mon_show_attributes)) { print_node_attributes(out, data_set, mon_ops, unames, resources, FALSE); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_is_set(show, mon_show_operations) || pcmk_is_set(show, mon_show_failcounts)) { print_node_summary(out, data_set, pcmk_is_set(show, mon_show_operations), mon_ops, unames, resources, FALSE); } /* If there were any failed actions, print them */ if (pcmk_is_set(show, mon_show_failures) && xml_has_children(data_set->failed)) { print_failed_actions(out, data_set, unames, resources, FALSE); } /* Print stonith history */ if (pcmk_is_set(show, mon_show_fencing_all) && pcmk_is_set(mon_ops, mon_op_fence_history)) { out->message(out, "full-fencing-list", history_rc, stonith_history, unames, pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE); } /* Print tickets if requested */ if (pcmk_is_set(show, mon_show_tickets)) { print_cluster_tickets(out, data_set, FALSE); } /* Print negative location constraints if requested */ if (pcmk_is_set(show, mon_show_bans)) { print_neg_locations(out, data_set, mon_ops, prefix, resources, FALSE); } g_list_free_full(unames, free); g_list_free_full(resources, free); } /*! * \internal * \brief Top-level printing function for HTML output. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. + * \param[in] history_rc Result of getting stonith history * \param[in] stonith_history List of stonith actions. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] show Bitmask of mon_show_*. * \param[in] prefix ID prefix to filter results by. */ int print_html_status(pcmk__output_t *out, pe_working_set_t *data_set, - stonith_history_t *stonith_history, unsigned int mon_ops, - unsigned int show, char *prefix, char *only_node, - char *only_rsc) + crm_exit_t history_rc, stonith_history_t *stonith_history, + unsigned int mon_ops, unsigned int show, char *prefix, + char *only_node, char *only_rsc) { GListPtr unames = NULL; GListPtr resources = NULL; unsigned int print_opts = get_resource_display_options(mon_ops); + bool already_printed_failure = false; out->message(out, "cluster-summary", data_set, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(show, mon_show_stack), pcmk_is_set(show, mon_show_dc), pcmk_is_set(show, mon_show_times), pcmk_is_set(show, mon_show_counts), pcmk_is_set(show, mon_show_options)); unames = build_uname_list(data_set, only_node); resources = build_rsc_list(data_set, only_rsc); /*** NODE LIST ***/ if (pcmk_is_set(show, mon_show_nodes) && unames) { out->message(out, "node-list", data_set->nodes, unames, resources, print_opts, pcmk_is_set(mon_ops, mon_op_print_clone_detail), pcmk_is_set(mon_ops, mon_op_print_brief), pcmk_is_set(mon_ops, mon_op_group_by_node)); } /* Print resources section, if needed */ if (pcmk_is_set(show, mon_show_resources)) { out->message(out, "resource-list", data_set, print_opts, pcmk_is_set(mon_ops, mon_op_group_by_node), pcmk_is_set(mon_ops, mon_op_inactive_resources), pcmk_is_set(mon_ops, mon_op_print_brief), TRUE, unames, resources, FALSE); } /* print Node Attributes section if requested */ if (pcmk_is_set(show, mon_show_attributes)) { print_node_attributes(out, data_set, mon_ops, unames, resources, FALSE); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_is_set(show, mon_show_operations) || pcmk_is_set(show, mon_show_failcounts)) { print_node_summary(out, data_set, pcmk_is_set(show, mon_show_operations), mon_ops, unames, resources, FALSE); } /* If there were any failed actions, print them */ if (pcmk_is_set(show, mon_show_failures) && xml_has_children(data_set->failed)) { print_failed_actions(out, data_set, unames, resources, FALSE); } /* Print failed stonith actions */ if (pcmk_is_set(show, mon_show_fence_failed) && pcmk_is_set(mon_ops, mon_op_fence_history)) { - stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, - GINT_TO_POINTER(st_failed)); + if (history_rc == 0) { + stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, + GINT_TO_POINTER(st_failed)); - if (hp) { - out->message(out, "failed-fencing-list", stonith_history, unames, - pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE); + if (hp) { + out->message(out, "failed-fencing-list", stonith_history, unames, + pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE); + } + } else { + out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); + out->list_item(out, NULL, "Failed to get fencing history: %s", + crm_exit_str(history_rc)); + out->end_list(out); } } /* Print stonith history */ if (pcmk_is_set(mon_ops, mon_op_fence_history)) { - if (pcmk_is_set(show, mon_show_fence_worked)) { + if (history_rc != 0) { + if (!already_printed_failure) { + out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); + out->list_item(out, NULL, "Failed to get fencing history: %s", + crm_exit_str(history_rc)); + out->end_list(out); + } + } else if (pcmk_is_set(show, mon_show_fence_worked)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, GINT_TO_POINTER(st_failed)); if (hp) { out->message(out, "fencing-list", hp, unames, pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE); } } else if (pcmk_is_set(show, mon_show_fence_pending)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); if (hp) { out->message(out, "pending-fencing-list", hp, unames, pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE); } } } /* Print tickets if requested */ if (pcmk_is_set(show, mon_show_tickets)) { print_cluster_tickets(out, data_set, FALSE); } /* Print negative location constraints if requested */ if (pcmk_is_set(show, mon_show_bans)) { print_neg_locations(out, data_set, mon_ops, prefix, resources, FALSE); } g_list_free_full(unames, free); g_list_free_full(resources, free); return 0; }