diff --git a/tools/crm_mon.c b/tools/crm_mon.c index e820448b09..69ff668b67 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,1656 +1,1657 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include <../lib/pengine/unpack.h> /* GMainLoop *mainloop = NULL; */ void wait_for_refresh(int offset, const char *prefix, int msec); void clean_up(int rc); void crm_diff_update(const char *event, xmlNode *msg); gboolean mon_refresh_display(gpointer user_data); int cib_connect(gboolean full); char *xml_file = NULL; char *as_html_file = NULL; char *pid_file = NULL; char *snmp_target = NULL; gboolean as_console = TRUE;; gboolean simple_status = FALSE; gboolean group_by_node = FALSE; gboolean inactive_resources = FALSE; gboolean web_cgi = FALSE; int reconnect_msec = 5000; gboolean daemonize = FALSE; GMainLoop *mainloop = NULL; guint timer_id = 0; const char *crm_mail_host = NULL; const char *crm_mail_prefix = NULL; const char *crm_mail_from = NULL; const char *crm_mail_to = NULL; cib_t *cib = NULL; xmlNode *current_cib = NULL; gboolean one_shot = FALSE; gboolean has_warnings = FALSE; gboolean print_failcount = FALSE; gboolean print_operations = FALSE; gboolean print_timing = FALSE; gboolean log_diffs = FALSE; gboolean log_updates = FALSE; long last_refresh = 0; crm_trigger_t *refresh_trigger = NULL; /* * 1.3.6.1.4.1.32723 has been assigned to the project by IANA * http://www.iana.org/assignments/enterprise-numbers */ #define PACEMAKER_PREFIX "1.3.6.1.4.1.32723" #define PACEMAKER_TRAP_PREFIX PACEMAKER_PREFIX ".1" #define snmp_crm_trap_oid PACEMAKER_TRAP_PREFIX #define snmp_crm_oid_node PACEMAKER_TRAP_PREFIX ".1" #define snmp_crm_oid_rsc PACEMAKER_TRAP_PREFIX ".2" #define snmp_crm_oid_task PACEMAKER_TRAP_PREFIX ".3" #define snmp_crm_oid_desc PACEMAKER_TRAP_PREFIX ".4" #define snmp_crm_oid_status PACEMAKER_TRAP_PREFIX ".5" #define snmp_crm_oid_rc PACEMAKER_TRAP_PREFIX ".6" #define snmp_crm_oid_trc PACEMAKER_TRAP_PREFIX ".7" #if CURSES_ENABLED # define print_dot() if(as_console) { \ printw("."); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, "."); \ } #else # define print_dot() fprintf(stdout, "."); #endif #if CURSES_ENABLED # define print_as(fmt, args...) if(as_console) { \ printw(fmt, ##args); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, fmt, ##args); \ } #else # define print_as(fmt, args...) fprintf(stdout, fmt, ##args); #endif static void blank_screen(void) { #if CURSES_ENABLED int lpc = 0; for(lpc = 0; lpc < LINES; lpc++) { move(lpc, 0); clrtoeol(); } move(0, 0); refresh(); #endif } static gboolean mon_timer_popped(gpointer data) { int rc = cib_ok; if(timer_id > 0) { g_source_remove(timer_id); } rc = cib_connect(TRUE); if(rc != cib_ok) { print_dot(); timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); } return FALSE; } static void mon_cib_connection_destroy(gpointer user_data) { print_as("Connection to the CIB terminated\n"); if(cib) { print_as("Reconnecting..."); cib->cmds->signoff(cib); timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); } return; } /* * Mainloop signal handler. */ static void mon_shutdown(int nsig) { clean_up(LSB_EXIT_OK); } int cib_connect(gboolean full) { int rc = cib_ok; static gboolean need_pass = TRUE; CRM_CHECK(cib != NULL, return cib_missing); if(cib->state != cib_connected_query && cib->state != cib_connected_command) { crm_debug_4("Connecting to the CIB"); if(need_pass && cib->variant == cib_remote) { need_pass = FALSE; print_as("Password:"); } rc = cib->cmds->signon(cib, crm_system_name, cib_query); if(rc != cib_ok) { return rc; } current_cib = get_cib_copy(cib); mon_refresh_display(NULL); if(full) { if(rc == cib_ok) { rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy); } if(rc == cib_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if(rc != cib_ok) { print_as("Notification setup failed, could not monitor CIB actions"); if(as_console) { sleep(2); } clean_up(-rc); } } } return rc; } static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "This text"}, {"version", 0, 0, 'v', "Version information" }, - {"verbose", 0, 0, 'V', "Increase debug output"}, + {"verbose", 0, 0, 'V', "Increase debug output\n"}, + {"interval", 1, 0, 'i', "Update frequency in seconds" }, + {"one-shot", 0, 0, '1', "\tDisplay the cluster status once on the console and exit (doesnt use ncurses)"}, + {"disable-ncurses",0, 0, 'N', "Disable the use of ncurses"}, + {"daemonize", 0, 0, 'd', "\tRun in the background as a daemon"}, + {"pid-file", 1, 0, 'p', "Daemon pid file location\n"}, + {"group-by-node", 0, 0, 'n', "Group resources by node" }, - {"inactive", 0, 0, 'r', "\tDisplay inactive resources" }, - {"failcounts", 0, 0, 'f', "Display resource fail counts"}, - {"operations", 0, 0, 'o', "Display resource operation history" }, - {"timing-details", 0, 0, 't', "Display resource operation history with timing details" }, + {"failcounts", 0, 0, 'f', "Display resource fail counts"}, + {"operations", 0, 0, 'o', "Display resource operation history" }, + {"timing-details", 0, 0, 't', "Display resource operation history with timing details\n" }, - {"as-html", 1, 0, 'h', "Write cluster status to the named file"}, - {"web-cgi", 0, 0, 'w', "\tWeb mode with output suitable for cgi"}, - {"simple-status", 0, 0, 's', "Display the cluster status once as a simple one line output (suitable for nagios)"}, + {"snmp-traps", 0, 0, 'S', "Send SNMP traps to this station\n"}, - {"snmp-traps", 0, 0, 'S', "Send SNMP traps to this station"}, + {"as-html", 1, 0, 'h', "Write cluster status to the named file"}, + {"web-cgi", 0, 0, 'w', "\tWeb mode with output suitable for cgi"}, + {"simple-status", 0, 0, 's', "Display the cluster status once as a simple one line output (suitable for nagios)\n"}, {"mail-to", 1, 0, 'T'}, {"mail-from", 1, 0, 'F'}, {"mail-host", 1, 0, 'H'}, {"mail-prefix", 1, 0, 'P'}, - {"one-shot", 0, 0, '1', "\tDisplay the cluster status once on the console and exit (doesnt use ncurses)"}, - {"daemonize", 0, 0, 'd', "\tRun in the background as a daemon"}, - {"disable-ncurses",0, 0, 'N'}, - {"pid-file", 1, 0, 'p', "Daemon pid file location"}, {"xml-file", 1, 0, 'x', NULL, 1}, {NULL, 0, 0, 0} }; int main(int argc, char **argv) { int flag; int argerr = 0; int exit_code = 0; int option_index = 0; pid_file = crm_strdup("/tmp/ClusterMon.pid"); crm_log_init(basename(argv[0]), LOG_CRIT, FALSE, FALSE, 0, NULL); crm_set_options("V?i:nrh:dp:s1wx:oftNS:T:F:H:P:v", "[-?|-v|-1|-N|-d -p] [-h|-w|-s] [-S] [-T -F -H -P] [-i] [-n] [-r] [-f] [-o|-t]", long_options, "Provides a summary of cluster's current state.\n Outputs varying levels of detail in a number of different formats.\n"); if (strcmp(crm_system_name, "crm_mon.cgi")==0) { web_cgi = TRUE; one_shot = TRUE; } while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case 'V': cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case 'i': reconnect_msec = crm_get_msec(optarg); break; case 'n': group_by_node = TRUE; break; case 'r': inactive_resources = TRUE; break; case 'd': daemonize = TRUE; break; case 't': print_timing = TRUE; print_operations = TRUE; break; case 'o': print_operations = TRUE; break; case 'f': print_failcount = TRUE; break; case 'p': crm_free(pid_file); pid_file = crm_strdup(optarg); break; case 'x': xml_file = crm_strdup(optarg); one_shot = TRUE; break; case 'h': as_html_file = crm_strdup(optarg); break; case 'w': web_cgi = TRUE; one_shot = TRUE; break; case 's': simple_status = TRUE; one_shot = TRUE; break; case 'S': snmp_target = optarg; break; case 'T': crm_mail_to = optarg; break; case 'F': crm_mail_from = optarg; break; case 'H': crm_mail_host = optarg; break; case 'P': crm_mail_prefix = optarg; break; case '1': one_shot = TRUE; break; case 'N': as_console = FALSE; break; case 'v': case '?': crm_help(flag, LSB_EXIT_OK); break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', LSB_EXIT_GENERIC); } if(one_shot) { as_console = FALSE; } else if(daemonize) { as_console = FALSE; cl_log_enable_stderr(FALSE); if(!as_html_file && !snmp_target && !crm_mail_to) { printf("Looks like you forgot to specify one or more of: --as-html, --mail-to, --snmp-target\n"); crm_help('?', LSB_EXIT_GENERIC); } crm_make_daemon(crm_system_name, TRUE, pid_file); } else if(as_console) { #if CURSES_ENABLED initscr(); cbreak(); noecho(); cl_log_enable_stderr(FALSE); #else one_shot = TRUE; as_console = FALSE; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } crm_info("Starting %s", crm_system_name); if(xml_file != NULL) { current_cib = filename2xml(xml_file); mon_refresh_display(NULL); return exit_code; } if(current_cib == NULL) { cib = cib_new(); if(!one_shot) { print_as("Attempting connection to the cluster..."); } do { exit_code = cib_connect(!one_shot); if(one_shot) { break; } else if(exit_code != cib_ok) { print_dot(); sleep(reconnect_msec/1000); } } while(exit_code == cib_connection); if(exit_code != cib_ok) { print_as("\nConnection to cluster failed: %s\n", cib_error2string(exit_code)); if(as_console) { sleep(2); } clean_up(-exit_code); } } if(one_shot) { return exit_code; } mainloop = g_main_new(FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_run(mainloop); g_main_destroy(mainloop); crm_info("Exiting %s", crm_system_name); clean_up(0); return 0; /* never reached */ } void wait_for_refresh(int offset, const char *prefix, int msec) { int lpc = msec / 1000; struct timespec sleept = {1 , 0}; if(as_console == FALSE) { timer_id = g_timeout_add(msec, mon_timer_popped, NULL); return; } crm_notice("%sRefresh in %ds...", prefix?prefix:"", lpc); while(lpc > 0) { #if CURSES_ENABLED move(offset, 0); /* printw("%sRefresh in \033[01;32m%ds\033[00m...", prefix?prefix:"", lpc); */ printw("%sRefresh in %ds...\n", prefix?prefix:"", lpc); clrtoeol(); refresh(); #endif lpc--; if(lpc == 0) { timer_id = g_timeout_add( 1000, mon_timer_popped, NULL); } else { if (nanosleep(&sleept, NULL) != 0) { return; } } } } #define mon_warn(fmt...) do { \ if (!has_warnings) { \ print_as("Warning:"); \ } else { \ print_as(","); \ } \ print_as(fmt); \ has_warnings = TRUE; \ } while(0) static int print_simple_status(pe_working_set_t *data_set) { node_t *dc = NULL; int nodes_online = 0; int nodes_standby = 0; dc = data_set->dc_node; if(dc == NULL) { mon_warn("No DC "); } slist_iter(node, node_t, data_set->nodes, lpc2, if(node->details->standby && node->details->online) { nodes_standby++; } else if(node->details->online) { nodes_online++; } else { mon_warn("offline node: %s", node->details->uname); } ); if (!has_warnings) { print_as("Ok: %d nodes online", nodes_online); if (nodes_standby > 0) { print_as(", %d standby nodes", nodes_standby); } print_as(", %d resources configured", g_list_length(data_set->resources)); } print_as("\n"); return 0; } extern int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set); static void get_ping_score(node_t *node, pe_working_set_t *data_set) { const char *attr = "pingd"; const char *value = NULL; value = g_hash_table_lookup(node->details->attrs, attr); if(value != NULL) { print_as(" %s=%s", attr, value); } } static void print_date(time_t time) { int lpc = 0; char date_str[26]; asctime_r(localtime(&time), date_str); for(; lpc < 26; lpc++) { if(date_str[lpc] == '\n') { date_str[lpc] = 0; } } print_as("'%s'", date_str); } static void print_rsc_summary(pe_working_set_t *data_set, node_t *node, resource_t *rsc, gboolean all) { gboolean printed = FALSE; time_t last_failure = 0; int failcount = get_failcount(node, rsc, (int*)&last_failure, data_set); if(all || failcount || last_failure > 0) { printed = TRUE; print_as(" %s: migration-threshold=%d", rsc->id, rsc->migration_threshold); } if(failcount > 0) { printed = TRUE; print_as(" fail-count=%d", failcount); } if(last_failure > 0) { printed = TRUE; print_as(" last-failure="); print_date(last_failure); } if(printed) { print_as("\n"); } } static void print_rsc_history(pe_working_set_t *data_set, node_t *node, xmlNode *rsc_entry) { GListPtr op_list = NULL; gboolean print_name = TRUE; GListPtr sorted_op_list = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter(xml_op, xmlNode, sorted_op_list, lpc, const char *value = NULL; const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); const char *interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); int rc = crm_parse_int(op_rc, "0"); if(safe_str_eq(task, CRMD_ACTION_STATUS) && safe_str_eq(interval, "0")) { task = "probe"; } if(rc == 7 && safe_str_eq(task, "probe")) { continue; } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { continue; } if(print_name) { print_name = FALSE; if(rsc == NULL) { print_as("Orphan resource: %s", rsc_id); } else { print_rsc_summary(data_set, node, rsc, TRUE); } } print_as(" + (%s) %s:", call, task); if(safe_str_neq(interval, "0")) { print_as(" interval=%sms", interval); } if(print_timing) { int int_value; const char *attr = "last-rc-change"; value = crm_element_value(xml_op, attr); if(value) { int_value = crm_parse_int(value, NULL); print_as(" %s=", attr); print_date(int_value); } attr = "last-run"; value = crm_element_value(xml_op, attr); if(value) { int_value = crm_parse_int(value, NULL); print_as(" %s=", attr); print_date(int_value); } attr = "exec-time"; value = crm_element_value(xml_op, attr); if(value) { int_value = crm_parse_int(value, NULL); print_as(" %s=%dms", attr, int_value); } attr = "queue-time"; value = crm_element_value(xml_op, attr); if(value) { int_value = crm_parse_int(value, NULL); print_as(" %s=%dms", attr, int_value); } } print_as(" rc=%s (%s)\n", op_rc, execra_code2string(rc)); ); /* no need to free the contents */ g_list_free(sorted_op_list); } static void print_node_summary(pe_working_set_t *data_set, gboolean operations) { xmlNode *lrm_rsc = NULL; xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); if(operations) { print_as("\nOperations:\n"); } else { print_as("\nMigration summary:\n"); } xml_child_iter_filter( cib_status, node_state, XML_CIB_TAG_STATE, node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); if(node == NULL || node->details->online == FALSE){ continue; } print_as("* Node %s: ", crm_element_value(node_state, XML_ATTR_UNAME)); get_ping_score(node, data_set); print_as("\n"); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); xml_child_iter_filter( lrm_rsc, rsc_entry, XML_LRM_TAG_RESOURCE, if(operations) { print_rsc_history(data_set, node, rsc_entry); } else { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if(rsc) { print_rsc_summary(data_set, node, rsc, FALSE); } else { print_as(" %s: orphan\n", rsc_id); } } ); ); } static char * add_list_element(char *list, const char *value) { int len = 0; int last = 0; if(value == NULL) { return list; } if(list) { last = strlen(list); } len = last + 2; /* +1 space, +1 EOS */ len += strlen(value); crm_realloc(list, len); sprintf(list + last, " %s", value); return list; } static int print_status(pe_working_set_t *data_set) { static int updates = 0; node_t *dc = NULL; char *since_epoch = NULL; char *online_nodes = NULL; char *offline_nodes = NULL; xmlNode *dc_version = NULL; xmlNode *quorum_node = NULL; time_t a_time = time(NULL); int configured_resources = 0; int print_opts = pe_print_ncurses; const char *quorum_votes = "unknown"; if(as_console) { blank_screen(); } else { print_opts = pe_print_printf; } updates++; dc = data_set->dc_node; print_as("\n\n============\n"); if(a_time == (time_t)-1) { crm_perror(LOG_ERR,"set_node_tstamp(): Invalid time returned"); return 1; } since_epoch = ctime(&a_time); if(since_epoch != NULL) { print_as("Last updated: %s", since_epoch); } dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); if(dc == NULL) { print_as("Current DC: NONE\n"); } else { const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); if(safe_str_neq(dc->details->uname, dc->details->id)) { print_as("Current DC: %s (%s)", dc->details->uname, dc->details->id); } else { print_as("Current DC: %s", dc->details->uname); } print_as(" - partition %s quorum\n", crm_is_true(quorum)?"with":"WITHOUT"); if(dc_version) { print_as("Version: %s\n", crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE)); } } quorum_node = get_xpath_object("//nvpair[@name='expected-quorum-votes']", data_set->input, LOG_DEBUG); if(quorum_node) { quorum_votes = crm_element_value(quorum_node, XML_NVPAIR_ATTR_VALUE); } slist_iter(rsc, resource_t, data_set->resources, lpc, if(is_not_set(rsc->flags, pe_rsc_orphan)) { configured_resources++; } ); print_as("%d Nodes configured, %s expected votes\n", g_list_length(data_set->nodes), quorum_votes); print_as("%d Resources configured.\n", configured_resources); print_as("============\n\n"); slist_iter(node, node_t, data_set->nodes, lpc2, const char *node_mode = NULL; if(node->details->unclean) { if(node->details->online && node->details->unclean) { node_mode = "UNCLEAN (online)"; } else if(node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if(node->details->pending) { node_mode = "pending"; } else if(node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if(node->details->standby) { if(node->details->online) { node_mode = "standby"; } else { node_mode = "OFFLINE (standby)"; } } else if(node->details->online) { node_mode = "online"; if(group_by_node == FALSE) { online_nodes = add_list_element(online_nodes, node->details->uname); continue; } } else { node_mode = "OFFLINE"; if(group_by_node == FALSE) { offline_nodes = add_list_element(offline_nodes, node->details->uname); continue; } } if(safe_str_eq(node->details->uname, node->details->id)) { print_as("Node %s: %s\n", node->details->uname, node_mode); } else { print_as("Node %s (%s): %s\n", node->details->uname, node->details->id, node_mode); } if(group_by_node) { slist_iter(rsc, resource_t, node->details->running_rsc, lpc2, rsc->fns->print( rsc, "\t", print_opts|pe_print_rsconly, stdout); ); } ); if(online_nodes) { print_as("Online: [%s ]\n", online_nodes); crm_free(online_nodes); } if(offline_nodes) { print_as("OFFLINE: [%s ]\n", offline_nodes); crm_free(offline_nodes); } if(group_by_node == FALSE && inactive_resources) { print_as("\nFull list of resources:\n"); } else if(inactive_resources) { print_as("\nInactive resources:\n"); } if(group_by_node == FALSE || inactive_resources) { print_as("\n"); slist_iter(rsc, resource_t, data_set->resources, lpc2, gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); if(is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { continue; } else if(group_by_node == FALSE) { if(partially_active || inactive_resources) { rsc->fns->print(rsc, NULL, print_opts, stdout); } } else if(is_active == FALSE && inactive_resources) { rsc->fns->print(rsc, NULL, print_opts, stdout); } ); } if(print_operations || print_failcount) { print_node_summary(data_set, print_operations); } if(xml_has_children(data_set->failed)) { print_as("\nFailed actions:\n"); xml_child_iter(data_set->failed, xml_op, int val = 0; const char *id = ID(xml_op); const char *last = crm_element_value(xml_op, "last_run"); const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); const char *status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); val = crm_parse_int(status, "0"); print_as(" %s (node=%s, call=%s, rc=%s, status=%s", id, node, call, rc, op_status2text(val)); if(last) { time_t run_at = crm_parse_int(last, "0"); print_as(", last-run=%s, queued=%sms, exec=%sms\n", ctime(&run_at), crm_element_value(xml_op, "exec_time"), crm_element_value(xml_op, "queue_time")); } val = crm_parse_int(rc, "0"); print_as("): %s\n", execra_code2string(val)); ); } #if CURSES_ENABLED if(as_console) { refresh(); } #endif return 0; } static int print_html_status(pe_working_set_t *data_set, const char *filename, gboolean web_cgi) { FILE *stream; node_t *dc = NULL; static int updates = 0; char *filename_tmp = NULL; if (web_cgi) { stream=stdout; fprintf(stream, "Content-type: text/html\n\n"); } else { filename_tmp = crm_concat(filename, "tmp", '.'); stream = fopen(filename_tmp, "w"); if(stream == NULL) { crm_perror(LOG_ERR,"Cannot open %s for writing", filename_tmp); crm_free(filename_tmp); return -1; } } updates++; dc = data_set->dc_node; fprintf(stream, ""); fprintf(stream, ""); fprintf(stream, "Cluster status"); /* content="%d;url=http://webdesign.about.com" */ fprintf(stream, "", reconnect_msec); fprintf(stream, ""); /*** SUMMARY ***/ fprintf(stream, "

Cluster summary

"); { char *now_str = NULL; time_t now = time(NULL); now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ fprintf(stream, "Last updated: %s
\n", now_str); } if(dc == NULL) { fprintf(stream, "Current DC: NONE
"); } else { fprintf(stream, "Current DC: %s (%s)
", dc->details->uname, dc->details->id); } fprintf(stream, "%d Nodes configured.
", g_list_length(data_set->nodes)); fprintf(stream, "%d Resources configured.
", g_list_length(data_set->resources)); /*** CONFIG ***/ fprintf(stream, "

Config Options

\n"); fprintf(stream, "\n"); fprintf(stream, "\n", data_set->default_resource_stickiness); fprintf(stream, "flags, pe_flag_stonith_enabled)?"enabled":"disabled"); fprintf(stream, "\n", is_set(data_set->flags, pe_flag_symmetric_cluster)?"":"a-"); fprintf(stream, "\n
Default resource stickiness:%d
STONITH of failed nodes:%s
Cluster is:%ssymmetric
No Quorum Policy:"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: fprintf(stream, "Freeze resources"); break; case no_quorum_stop: fprintf(stream, "Stop ALL resources"); break; case no_quorum_ignore: fprintf(stream, "Ignore"); break; case no_quorum_suicide: fprintf(stream, "Suicide"); break; } fprintf(stream, "\n
\n"); /*** NODE LIST ***/ fprintf(stream, "

Node List

\n"); fprintf(stream, "
    \n"); slist_iter(node, node_t, data_set->nodes, lpc2, fprintf(stream, "
  • "); if(node->details->standby_onfail && node->details->online) { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"standby (on-fail)\n"); } else if(node->details->standby && node->details->online) { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"standby\n"); } else if(node->details->standby) { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"OFFLINE (standby)\n"); } else if(node->details->online) { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"online\n"); } else { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"OFFLINE\n"); } if(group_by_node) { fprintf(stream, "
      \n"); slist_iter(rsc, resource_t, node->details->running_rsc, lpc2, fprintf(stream, "
    • "); rsc->fns->print(rsc, NULL, pe_print_html|pe_print_rsconly, stream); fprintf(stream, "
    • \n"); ); fprintf(stream, "
    \n"); } fprintf(stream, "
  • \n"); ); fprintf(stream, "
\n"); if(group_by_node && inactive_resources) { fprintf(stream, "

(Partially) Inactive Resources

\n"); } else if(group_by_node == FALSE) { fprintf(stream, "

Resource List

\n"); } if(group_by_node == FALSE || inactive_resources) { slist_iter(rsc, resource_t, data_set->resources, lpc2, if(group_by_node && rsc->fns->active(rsc, TRUE)) { continue; } rsc->fns->print(rsc, NULL, pe_print_html, stream); ); } fprintf(stream, ""); fflush(stream); fclose(stream); if (!web_cgi) { if(rename(filename_tmp, filename) != 0) { crm_perror(LOG_ERR,"Unable to rename %s->%s", filename_tmp, filename); } crm_free(filename_tmp); } return 0; } #if ENABLE_SNMP #include #include #include #include #include #include #define add_snmp_field(list, oid_string, value) do { \ oid name[MAX_OID_LEN]; \ size_t name_length = MAX_OID_LEN; \ if (snmp_parse_oid(oid_string, name, &name_length)) { \ int s_rc = snmp_add_var(list, name, name_length, 's', (value)); \ if(s_rc != 0) { \ crm_err("Could not add %s=%s rc=%d", oid_string, value, s_rc); \ } else { \ crm_debug_2("Added %s=%s", oid_string, value); \ } \ } else { \ crm_err("Could not parse OID: %s", oid_string); \ } \ } while(0) \ #define add_snmp_field_int(list, oid_string, value) do { \ oid name[MAX_OID_LEN]; \ size_t name_length = MAX_OID_LEN; \ if (snmp_parse_oid(oid_string, name, &name_length)) { \ if(NULL == snmp_pdu_add_variable( \ list, name, name_length, ASN_INTEGER, \ (u_char *) & value, sizeof(value))) { \ crm_err("Could not add %s=%d", oid_string, value); \ } else { \ crm_debug_2("Added %s=%d", oid_string, value); \ } \ } else { \ crm_err("Could not parse OID: %s", oid_string); \ } \ } while(0) \ static int snmp_input(int operation, netsnmp_session *session, int reqid, netsnmp_pdu *pdu, void *magic) { return 1; } static netsnmp_session *crm_snmp_init(const char *target) { static netsnmp_session *session = NULL; if(session) { return session; } if(target == NULL) { return NULL; } if(crm_log_level > LOG_INFO) { char *debug_tokens = crm_strdup("run:shell,snmptrap,tdomain"); debug_register_tokens(debug_tokens); snmp_set_do_debugging(1); } crm_malloc0(session, sizeof(netsnmp_session)); snmp_sess_init(session); session->version = SNMP_VERSION_2c; session->callback = snmp_input; session->callback_magic = NULL; session = snmp_add(session, netsnmp_transport_open_client("snmptrap", target), NULL, NULL); if (session == NULL) { snmp_sess_perror("Could not create snmp transport", session); } return session; } #endif static int send_snmp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { int ret = 1; #if ENABLE_SNMP static oid snmptrap_oid[] = { 1,3,6,1,6,3,1,1,4,1,0 }; static oid sysuptime_oid[] = { 1,3,6,1,2,1,1,3,0 }; netsnmp_pdu *trap_pdu; netsnmp_session *session = crm_snmp_init(snmp_target); trap_pdu = snmp_pdu_create(SNMP_MSG_TRAP2); if ( !trap_pdu ) { crm_err("Failed to create SNMP notification"); return SNMPERR_GENERR; } if(1) { /* send uptime */ char csysuptime[20]; time_t now = time(NULL); sprintf(csysuptime, "%ld", now); snmp_add_var(trap_pdu, sysuptime_oid, sizeof(sysuptime_oid) / sizeof(oid), 't', csysuptime); } /* Indicate what the trap is by setting snmpTrapOid.0 */ ret = snmp_add_var(trap_pdu, snmptrap_oid, sizeof(snmptrap_oid) / sizeof(oid), 'o', snmp_crm_trap_oid); if (ret != 0) { crm_err("Failed set snmpTrapOid.0=%s", snmp_crm_trap_oid); return ret; } /* Add extries to the trap */ add_snmp_field(trap_pdu, snmp_crm_oid_rsc, rsc); add_snmp_field(trap_pdu, snmp_crm_oid_node, node); add_snmp_field(trap_pdu, snmp_crm_oid_task, task); add_snmp_field(trap_pdu, snmp_crm_oid_desc, desc); add_snmp_field_int(trap_pdu, snmp_crm_oid_rc, rc); add_snmp_field_int(trap_pdu, snmp_crm_oid_trc, target_rc); add_snmp_field_int(trap_pdu, snmp_crm_oid_status, status); /* Send and cleanup */ ret = snmp_send(session, trap_pdu); if(ret == 0) { /* error */ snmp_sess_perror("Could not send SNMP trap", session); snmp_free_pdu(trap_pdu); ret = SNMPERR_GENERR; } else { ret = SNMPERR_SUCCESS; } #else crm_err("Sending SNMP traps is not supported by this installation"); #endif return ret; } #if ENABLE_ESMTP #include #include static void print_recipient_status( smtp_recipient_t recipient, const char *mailbox, void *arg) { const smtp_status_t *status; status = smtp_recipient_status (recipient); printf ("%s: %d %s", mailbox, status->code, status->text); } static void event_cb (smtp_session_t session, int event_no, void *arg, ...) { int *ok; va_list alist; va_start(alist, arg); switch(event_no) { case SMTP_EV_CONNECT: case SMTP_EV_MAILSTATUS: case SMTP_EV_RCPTSTATUS: case SMTP_EV_MESSAGEDATA: case SMTP_EV_MESSAGESENT: case SMTP_EV_DISCONNECT: break; case SMTP_EV_WEAK_CIPHER: { int bits = va_arg(alist, long); ok = va_arg(alist, int*); crm_debug("SMTP_EV_WEAK_CIPHER, bits=%d - accepted.", bits); *ok = 1; break; } case SMTP_EV_STARTTLS_OK: crm_debug("SMTP_EV_STARTTLS_OK - TLS started here."); break; case SMTP_EV_INVALID_PEER_CERTIFICATE: { long vfy_result = va_arg(alist, long); ok = va_arg(alist, int*); /* There is a table in handle_invalid_peer_certificate() of mail-file.c */ crm_err("SMTP_EV_INVALID_PEER_CERTIFICATE: %ld", vfy_result); *ok = 1; break; } case SMTP_EV_NO_PEER_CERTIFICATE: ok = va_arg(alist, int*); crm_debug("SMTP_EV_NO_PEER_CERTIFICATE - accepted."); *ok = 1; break; case SMTP_EV_WRONG_PEER_CERTIFICATE: ok = va_arg(alist, int*); crm_debug("SMTP_EV_WRONG_PEER_CERTIFICATE - accepted."); *ok = 1; break; case SMTP_EV_NO_CLIENT_CERTIFICATE: ok = va_arg(alist, int*); crm_debug("SMTP_EV_NO_CLIENT_CERTIFICATE - accepted."); *ok = 1; break; default: crm_debug("Got event: %d - ignored.\n", event_no); } va_end(alist); } #endif #define BODY_MAX 2048 static int send_smtp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { #if ENABLE_ESMTP smtp_session_t session; smtp_message_t message; auth_context_t authctx; struct sigaction sa; int len = 10; int noauth = 1; char crm_mail_body[BODY_MAX]; char *crm_mail_subject = NULL; if(node == NULL) { node = "-"; } if(rsc) { rsc = "-"; } if(desc == NULL) { desc = "-"; } if(crm_mail_to == NULL) { return 1; } if(crm_mail_host == NULL) { crm_mail_host = "localhost:25"; } if(crm_mail_prefix == NULL) { crm_mail_prefix = "Cluster notification"; } crm_debug("Sending '%s' mail to %s via %s", crm_mail_prefix, crm_mail_to, crm_mail_host); len += strlen(crm_mail_prefix); len += strlen(node); len += strlen(rsc); len += strlen(desc); crm_malloc0(crm_mail_subject, len); snprintf(crm_mail_subject, len, "%s: %s event for %s on %s: %s", crm_mail_prefix, task, rsc, node, desc); len = 0; len += snprintf(crm_mail_body, BODY_MAX-len, "%s\n", crm_mail_prefix); len += snprintf(crm_mail_body, BODY_MAX-len, "====\n\n"); if(rc==target_rc) { len += snprintf(crm_mail_body, BODY_MAX-len, "Completed operation %s for resource %s on %s\n", task, rsc, node); } else { len += snprintf(crm_mail_body, BODY_MAX-len, "Operation %s for resource %s on %s failed: %s\n", task, rsc, node, desc); } len += snprintf(crm_mail_body, BODY_MAX-len, "\nDetails:\n"); len += snprintf(crm_mail_body, BODY_MAX-len, "\toperation status: (%d) %s\n", status, op_status2text(status)); if(status == LRM_OP_DONE) { len += snprintf(crm_mail_body, BODY_MAX-len, "\tscript returned: (%d) %s\n", rc, execra_code2string(rc)); len += snprintf(crm_mail_body, BODY_MAX-len, "\texpected return value: (%d) %s\n", target_rc, execra_code2string(target_rc)); } auth_client_init(); session = smtp_create_session(); message = smtp_add_message(session); smtp_starttls_enable (session, Starttls_ENABLED); sa.sa_handler = SIG_IGN; sigemptyset (&sa.sa_mask); sa.sa_flags = 0; sigaction (SIGPIPE, &sa, NULL); smtp_set_server (session, crm_mail_host); authctx = auth_create_context (); auth_set_mechanism_flags (authctx, AUTH_PLUGIN_PLAIN, 0); smtp_set_eventcb(session, event_cb, NULL); /* Now tell libESMTP it can use the SMTP AUTH extension. */ if (!noauth) { smtp_auth_set_context (session, authctx); } /* NULL is ok */ smtp_set_reverse_path (message, crm_mail_from); #if 1 /* In thoery, the last arg can be NULL if we then call smtp_add_recipient() */ smtp_set_header (message, "To", NULL/*phrase*/, crm_mail_to); /* "Phrase" */ #else smtp_add_recipient (message, crm_mail_to); #endif /* Set the Subject: header and override any subject line in the message headers. */ smtp_set_header (message, "Subject", crm_mail_subject); smtp_set_header_option (message, "Subject", Hdr_OVERRIDE, 1); smtp_set_message_str(message, crm_mail_body); if (!smtp_start_session (session)) { char buf[128]; crm_err("SMTP server problem: %s\n", smtp_strerror (smtp_errno (), buf, sizeof buf)); } else { const smtp_status_t *smtp_status = smtp_message_transfer_status(message); crm_info("Send status: %d %s", smtp_status->code, crm_str(smtp_status->text)); smtp_enumerate_recipients (message, print_recipient_status, NULL); } smtp_destroy_session(session); auth_destroy_context(authctx); auth_client_exit(); #endif return 0; } static void handle_rsc_op(xmlNode *rsc_op) { int rc = -1; int status = -1; int action = -1; int interval = 0; int target_rc = -1; int transition_num = -1; gboolean send_trap = TRUE; gboolean send_email = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *node = NULL; const char *magic = NULL; const char *id = ID(rsc_op); char *update_te_uuid = NULL; xmlNode *n = rsc_op; magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if(magic == NULL) { /* non-change */ return; } if(FALSE == decode_transition_magic( magic, &update_te_uuid, &transition_num, &action, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return; } if(parse_op_key(id, &rsc, &task, &interval) == FALSE) { crm_err("Invalid event detected for %s", id); return; } while(n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) { n = n->parent; } node = ID(n); if(node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); return; } /* look up where we expected it to be? */ desc = cib_error2string(cib_ok); if(status == LRM_OP_DONE && target_rc == rc) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if(rc == EXECRA_NOT_RUNNING) { send_trap = FALSE; } } else if(status == LRM_OP_DONE) { desc = execra_code2string(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = op_status2text(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if(send_trap && snmp_target) { send_snmp_trap(node, rsc, task, target_rc, rc, status, desc); } if(send_email && crm_mail_to) { send_smtp_trap(node, rsc, task, target_rc, rc, status, desc); } } void crm_diff_update(const char *event, xmlNode *msg) { int rc = -1; long now = time(NULL); const char *op = NULL; unsigned int log_level = LOG_INFO; xmlNode *diff = NULL; xmlNode *cib_last = NULL; xmlNode *update = get_message_xml(msg, F_CIB_UPDATE); print_dot(); if(msg == NULL) { crm_err("NULL update"); return; } crm_element_value_int(msg, F_CIB_RC, &rc); op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); if(rc < cib_ok) { log_level = LOG_WARNING; do_crm_log(log_level, "[%s] %s ABORTED: %s", event, op, cib_error2string(rc)); return; } if(current_cib != NULL) { cib_last = current_cib; current_cib = NULL; rc = cib_process_diff(op, cib_force_diff, NULL, NULL, diff, cib_last, ¤t_cib, NULL); if(rc != cib_ok) { crm_debug("Update didn't apply, requesting full copy: %s", cib_error2string(rc)); free_xml(current_cib); current_cib = NULL; } } if(current_cib == NULL) { current_cib = get_cib_copy(cib); } if(log_diffs && diff) { log_cib_diff(LOG_DEBUG, diff, op); } if(log_updates && update != NULL) { print_xml_formatted(LOG_DEBUG, "raw_update", update, NULL); } if(diff && (crm_mail_to || snmp_target)) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search( diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_LRM_TAG_RSC_OP); if(xpathObj && xpathObj->nodesetval->nodeNr > 0) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for(lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op); } xmlXPathFreeObject(xpathObj); } } if((now - last_refresh) > (reconnect_msec/1000)) { /* Force a refresh */ mon_refresh_display(NULL); } else { mainloop_set_trigger(refresh_trigger); } free_xml(cib_last); } gboolean mon_refresh_display(gpointer user_data) { xmlNode *cib_copy = copy_xml(current_cib); pe_working_set_t data_set; last_refresh = time(NULL); if(cli_config_update(&cib_copy, NULL) == FALSE) { if(cib) { cib->cmds->signoff(cib); } print_as("Upgrade failed: %s", cib_error2string(cib_dtd_validation)); if(as_console) { sleep(2); } clean_up(LSB_EXIT_GENERIC); return FALSE; } set_working_set_defaults(&data_set); data_set.input = cib_copy; cluster_status(&data_set); if(as_html_file || web_cgi) { if (print_html_status(&data_set, as_html_file, web_cgi) != 0) { fprintf(stderr, "Critical: Unable to output html file\n"); clean_up(LSB_EXIT_GENERIC); } } else if(daemonize) { /* do nothing */ } else if (simple_status) { print_simple_status(&data_set); if (has_warnings) { clean_up(LSB_EXIT_GENERIC); } } else { print_status(&data_set); } cleanup_calculations(&data_set); return TRUE; } /* * De-init ncurses, signoff from the CIB and deallocate memory. */ void clean_up(int rc) { #if ENABLE_SNMP netsnmp_session *session = crm_snmp_init(NULL); if(session) { snmp_close(session); snmp_shutdown("snmpapp"); } #endif #if CURSES_ENABLED if(as_console) { as_console = FALSE; echo(); nocbreak(); endwin(); } #endif if (cib != NULL) { cib->cmds->signoff(cib); cib_delete(cib); cib = NULL; } crm_free(as_html_file); crm_free(xml_file); crm_free(pid_file); if(rc >= 0) { exit(rc); } return; } diff --git a/tools/pingd.c b/tools/pingd.c index 891571d290..885b0c56da 100644 --- a/tools/pingd.c +++ b/tools/pingd.c @@ -1,1184 +1,1184 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SOCKET_H # include #endif #include #include #include #include #include #include #include #include #include #include #ifdef ON_LINUX #include #include # ifndef ICMP_FILTER # define ICMP_FILTER 1 struct icmp_filter { uint32_t data; }; # endif #endif #ifdef HAVE_GETOPT_H # include #endif #ifdef HAVE_GETOPT_H # include #endif #if SUPPORT_HEARTBEAT # include ll_cluster_t *pingd_cluster = NULL; void do_node_walk(ll_cluster_t *hb_cluster); #endif /* GMainLoop *mainloop = NULL; */ GListPtr ping_list = NULL; GMainLoop* mainloop = NULL; GHashTable *ping_nodes = NULL; const char *pingd_attr = "pingd"; gboolean do_filter = FALSE; gboolean need_shutdown = FALSE; gboolean stand_alone = FALSE; gboolean do_updates = TRUE; const char *attr_set = NULL; const char *attr_section = NULL; int attr_dampen = 5000; /* 5s */ int attr_multiplier = 1; int pings_per_host = 2; int ping_timeout = 2; int re_ping_interval = 1000; /* 1s */ int ident; /* our pid */ typedef struct ping_node_s { int fd; /* ping socket */ uint16_t iseq; /* sequence number */ gboolean type; gboolean extra_filters; union { struct sockaddr raw; struct sockaddr_in v4; /* ipv4 ping addr */ struct sockaddr_in6 v6; /* ipv6 ping addr */ } addr; char dest[256]; char *host; } ping_node; void pingd_nstatus_callback( const char *node, const char *status, void *private_data); void pingd_lstatus_callback( const char *node, const char *link, const char *status, void *private_data); void send_update(int active); int process_icmp_error(ping_node *node, struct sockaddr_in *whereto); /* * in_cksum -- * Checksum routine for Internet Protocol family headers (C Version) * This function taken from Mike Muuss' ping program. */ static int in_cksum (u_short *addr, size_t len) { size_t nleft = len; u_short * w = addr; int sum = 0; u_short answer = 0; /* * The IP checksum algorithm is simple: using a 32 bit accumulator (sum) * add sequential 16 bit words to it, and at the end, folding back all * the carry bits from the top 16 bits into the lower 16 bits. */ while (nleft > 1) { sum += *w++; nleft -= 2; } /* Mop up an odd byte, if necessary */ if (nleft == 1) { sum += *(u_char*)w; } /* Add back carry bits from top 16 bits to low 16 bits */ sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ sum += (sum >> 16); /* add carry */ answer = ~sum; /* truncate to 16 bits */ return answer; } static const char *ping_desc(uint8_t type, uint8_t code) { switch(type) { case ICMP_ECHOREPLY: return "Echo Reply"; case ICMP_ECHO: return "Echo Request"; case ICMP_PARAMPROB: return "Bad Parameter"; case ICMP_SOURCEQUENCH: return "Packet lost, slow down"; case ICMP_TSTAMP: return "Timestamp Request"; case ICMP_TSTAMPREPLY: return "Timestamp Reply"; case ICMP_IREQ: return "Information Request"; case ICMP_IREQREPLY: return "Information Reply"; case ICMP_UNREACH: switch(code) { case ICMP_UNREACH_NET: return "Unreachable Network"; case ICMP_UNREACH_HOST: return "Unreachable Host"; case ICMP_UNREACH_PROTOCOL: return "Unreachable Protocol"; case ICMP_UNREACH_PORT: return "Unreachable Port"; case ICMP_UNREACH_NEEDFRAG: return "Unreachable: Fragmentation needed"; case ICMP_UNREACH_SRCFAIL: return "Unreachable Source Route"; case ICMP_UNREACH_NET_UNKNOWN: return "Unknown Network"; case ICMP_UNREACH_HOST_UNKNOWN: return "Unknown Host"; case ICMP_UNREACH_ISOLATED: return "Unreachable: Isolated"; case ICMP_UNREACH_NET_PROHIB: return "Prohibited network"; case ICMP_UNREACH_HOST_PROHIB: return "Prohibited host"; case ICMP_UNREACH_FILTER_PROHIB: return "Unreachable: Prohibited filter"; case ICMP_UNREACH_TOSNET: return "Unreachable: Type of Service and Network"; case ICMP_UNREACH_TOSHOST: return "Unreachable: Type of Service and Host"; case ICMP_UNREACH_HOST_PRECEDENCE: return "Unreachable: Prec vio"; case ICMP_UNREACH_PRECEDENCE_CUTOFF: return "Unreachable: Prec cutoff"; default: crm_err("Unreachable: Unknown subtype: %d", code); return "Unreachable: Unknown Subtype"; } break; case ICMP_REDIRECT: switch(code) { case ICMP_REDIRECT_NET: return "Redirect: Network"; case ICMP_REDIRECT_HOST: return "Redirect: Host"; case ICMP_REDIRECT_TOSNET: return "Redirect: Type of Service and Network"; case ICMP_REDIRECT_TOSHOST: return "Redirect: Type of Service and Host"; default: crm_err("Redirect: Unknown subtype: %d", code); return "Redirect: Unknown Subtype"; } case ICMP_TIMXCEED: switch(code) { case ICMP_TIMXCEED_INTRANS: return "Timeout: TTL"; case ICMP_TIMXCEED_REASS: return "Timeout: Fragmentation reassembly"; default: crm_err("Timeout: Unkown subtype: %d", code); return "Timeout: Unkown Subtype"; } break; default: crm_err("Unknown type: %d", type); return "Unknown type"; } } #ifdef ON_LINUX # define MAX_HOST 1024 int process_icmp_error(ping_node *node, struct sockaddr_in *whereto) { int rc = 0; char buf[512]; struct iovec iov; struct msghdr msg; struct icmphdr icmph; struct sockaddr_in target; struct cmsghdr *cmsg = NULL; struct sock_extended_err *s_err = NULL; iov.iov_base = &icmph; iov.iov_len = sizeof(icmph); msg.msg_name = (void*)⌖ msg.msg_namelen = sizeof(target); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_flags = 0; msg.msg_control = buf; msg.msg_controllen = sizeof(buf); rc = recvmsg(node->fd, &msg, MSG_ERRQUEUE|MSG_DONTWAIT); if (rc < 0 || rc < sizeof(icmph)) { crm_perror(LOG_DEBUG, "No error message: %d", rc); return 0; } for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_RECVERR) { s_err = (struct sock_extended_err *)CMSG_DATA(cmsg); } } CRM_ASSERT(s_err != NULL); if (s_err->ee_origin == SO_EE_ORIGIN_LOCAL) { if (s_err->ee_errno == EMSGSIZE) { crm_info("local error: Message too long, mtu=%u", s_err->ee_info); } else { crm_info("local error: %s", strerror(s_err->ee_errno)); } return 0; } else if (s_err->ee_origin == SO_EE_ORIGIN_ICMP) { char ping_host[MAX_HOST]; struct sockaddr_in *sin = (struct sockaddr_in*)(s_err+1); const char *ping_result = ping_desc(s_err->ee_type, s_err->ee_code); char *target_s = inet_ntoa(*(struct in_addr *)&(target.sin_addr.s_addr)); char *whereto_s = inet_ntoa(*(struct in_addr *)&(whereto->sin_addr.s_addr)); if (ntohs(icmph.un.echo.id) != ident) { /* Result was not for us */ crm_debug("Not our error (ident): %d %d", ntohs(icmph.un.echo.id), ident); return -1; } else if (safe_str_neq(target_s, whereto_s)) { /* Result was not for us */ crm_debug("Not our error (addr): %s %s", target_s, whereto_s); return -1; } else if (icmph.type != ICMP_ECHO) { /* Not an error */ crm_info("Not an error: %d", icmph.type); return -1; } /* snprintf(ping_host, MAX_HOST, "%s", inet_ntoa(*(struct in_addr *)&(sin->sin_addr.s_addr))); */ snprintf(ping_host, MAX_HOST, "%s", inet_ntoa(sin->sin_addr)); if (node->extra_filters == FALSE) { /* Now that we got some sort of reply, add extra filters to * ensure we keep getting the _right_ replies for dead hosts */ struct icmp_filter filt; crm_debug("Installing additional ICMP filters"); node->extra_filters = TRUE; /* only try once */ filt.data = ~((1<fd, SOL_RAW, ICMP_FILTER, (char*)&filt, sizeof(filt)) == -1) { crm_perror(LOG_WARNING, "setsockopt failed: Cannot install ICMP filters for %s", ping_host); } } crm_debug("From %s icmp_seq=%u %s", ping_host, ntohs(icmph.un.echo.sequence), ping_result); } else { crm_debug("else: %d", s_err->ee_origin); } return 0; } #else int process_icmp_error(ping_node *node, struct sockaddr_in *whereto) { /* dummy function */ return 0; } #endif static ping_node *ping_new(const char *host) { ping_node *node; crm_malloc0(node, sizeof(ping_node)); if(strstr(host, ":")) { node->type = AF_INET6; } else { node->type = AF_INET; } node->host = crm_strdup(host); return node; } static gboolean ping_open(ping_node *node) { int ret_ga = 0; char *hostname = NULL; struct addrinfo *res = NULL; struct addrinfo hints; /* getaddrinfo */ bzero(&hints, sizeof(struct addrinfo)); hints.ai_flags = AI_CANONNAME; hints.ai_family = node->type; hints.ai_socktype = SOCK_RAW; if(node->type == AF_INET6) { hints.ai_protocol = IPPROTO_ICMPV6; } else { hints.ai_protocol = IPPROTO_ICMP; } ret_ga = getaddrinfo(node->host, NULL, &hints, &res); if (ret_ga) { crm_warn("getaddrinfo: %s", gai_strerror(ret_ga)); goto bail; } if (res->ai_canonname) { hostname = res->ai_canonname; } else { hostname = node->host; } crm_debug_2("Got address %s for %s", hostname, node->host); if(!res->ai_addr) { crm_warn("getaddrinfo failed: no address"); goto bail; } memcpy(&(node->addr.raw), res->ai_addr, res->ai_addrlen); node->fd = socket(hints.ai_family, hints.ai_socktype, hints.ai_protocol); /* node->fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol); */ if(node->fd < 0) { crm_perror(LOG_WARNING, "Can't open socket to %s", hostname); goto bail; } if(node->type == AF_INET6) { int sockopt; inet_ntop(node->type, &node->addr.v6.sin6_addr, node->dest, sizeof(node->dest)); /* set recv buf for broadcast pings */ sockopt = 48 * 1024; setsockopt(node->fd, SOL_SOCKET, SO_RCVBUF, (char *) &sockopt, sizeof(sockopt)); } else { inet_ntop(node->type, &node->addr.v4.sin_addr, node->dest, sizeof(node->dest)); } if(ping_timeout > 0) { struct timeval timeout_opt; timeout_opt.tv_sec = ping_timeout; timeout_opt.tv_usec = 0; setsockopt(node->fd, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout_opt, sizeof(timeout_opt)); } #ifdef ON_LINUX { int dummy = 1; struct icmp_filter filt; filt.data = ~((1<fd, SOL_RAW, ICMP_FILTER, (char*)&filt, sizeof(filt)) == -1) { crm_perror(LOG_WARNING, "setsockopt failed: Cannot install ICMP filters for %s", node->dest); } setsockopt(node->fd, SOL_IP, IP_RECVERR, (char *)&dummy, sizeof(dummy)); } #endif crm_debug_2("Opened connection to %s", node->dest); freeaddrinfo(res); return TRUE; bail: if(res) { freeaddrinfo(res); } return FALSE; } static gboolean ping_close(ping_node *node) { int tmp_fd = node->fd; node->fd = -1; if (tmp_fd >= 0) { if(close(tmp_fd) < 0) { crm_perror(LOG_ERR,"Could not close ping socket"); } else { tmp_fd = -1; crm_debug_2("Closed connection to %s", node->dest); } } return (tmp_fd == -1); } #define MAXPACKETLEN 131072 #define ICMP6ECHOLEN 8 /* icmp echo header len excluding time */ #define ICMP6ECHOTMLEN 20 #define DEFDATALEN ICMP6ECHOTMLEN #define EXTRA 256 /* for AH and various other headers. weird. */ #define IP6LEN 40 static int dump_v6_echo(ping_node *node, u_char *buf, int bytes, struct msghdr *hdr) { int rc = -1; /* Try again */ int fromlen; char from_host[1024]; struct icmp6_hdr *icp; struct sockaddr *from; if (!hdr || !hdr->msg_name || hdr->msg_namelen != sizeof(struct sockaddr_in6) || ((struct sockaddr *)hdr->msg_name)->sa_family != AF_INET6) { crm_warn("Invalid echo peer"); return rc; } fromlen = hdr->msg_namelen; from = (struct sockaddr *)hdr->msg_name; getnameinfo(from, fromlen, from_host, sizeof(from_host), NULL, 0, NI_NUMERICHOST | NI_NUMERICSERV); if (bytes < (int)sizeof(struct icmp6_hdr)) { crm_warn("Invalid echo packet (too short: %d bytes) from %s", bytes, from_host); return rc; } icp = (struct icmp6_hdr *)buf; if (icp->icmp6_type == ICMP6_ECHO_REPLY && node->iseq == ntohs(icp->icmp6_seq)) { rc = 1; /* Alive */ } else if(icp->icmp6_type != ICMP6_ECHO_REQUEST) { rc = 0; /* Error */ } do_crm_log(rc==0?LOG_WARNING:LOG_DEBUG, "Echo from %s (exp=%d, seq=%d, id=%d, dest=%s, data=%s): %s", from_host, node->iseq, ntohs(icp->icmp6_seq), ntohs(icp->icmp6_id), node->dest, (char*)(buf + ICMP6ECHOLEN), ping_desc(icp->icmp6_type, icp->icmp6_code)); return rc; } static int dump_v4_echo(ping_node *node, u_char *buf, int bytes, struct msghdr *hdr) { int rc = -1; /* Try again */ int iplen, fromlen; char from_host[1024]; struct ip *ip; struct icmp *icp; struct sockaddr *from; if (hdr == NULL || !hdr->msg_name || hdr->msg_namelen != sizeof(struct sockaddr_in) || ((struct sockaddr *)hdr->msg_name)->sa_family != AF_INET) { crm_warn("Invalid echo peer"); return rc; } fromlen = hdr->msg_namelen; from = (struct sockaddr *)hdr->msg_name; getnameinfo(from, fromlen, from_host, sizeof(from_host), NULL, 0, NI_NUMERICHOST | NI_NUMERICSERV); ip = (struct ip*)buf; iplen = ip->ip_hl * 4; if (bytes < (iplen + sizeof(struct icmp))) { crm_warn("Invalid echo packet (too short: %d bytes) from %s", bytes, from_host); return rc; } /* Check the IP header */ icp = (struct icmp*)(buf + iplen); if (icp->icmp_type == ICMP_ECHOREPLY && node->iseq == ntohs(icp->icmp_seq)) { rc = 1; /* Alive */ } else if(icp->icmp_type != ICMP_ECHO) { rc = process_icmp_error(node, (struct sockaddr_in*)from); } /* TODO: Stop logging icmp_id once we're sure everything works */ do_crm_log(LOG_DEBUG_2, "Echo from %s (exp=%d, seq=%d, id=%d, dest=%s, data=%s): %s", from_host, node->iseq, ntohs(icp->icmp_seq), ntohs(icp->icmp_id), node->dest, icp->icmp_data, ping_desc(icp->icmp_type, icp->icmp_code)); return rc; } static int ping_read(ping_node *node, int *lenp) { int bytes; char fromaddr[128]; struct msghdr m; struct cmsghdr *cm; u_char buf[1024]; struct iovec iov[2]; int packlen; u_char *packet; packlen = DEFDATALEN + IP6LEN + ICMP6ECHOLEN + EXTRA; crm_malloc0(packet, packlen); retry: m.msg_name = &fromaddr; m.msg_namelen = sizeof(fromaddr); memset(&iov, 0, sizeof(iov)); iov[0].iov_base = (caddr_t)packet; iov[0].iov_len = packlen; m.msg_iov = iov; m.msg_iovlen = 1; cm = (struct cmsghdr *)buf; m.msg_control = (caddr_t)buf; m.msg_controllen = sizeof(buf); bytes = recvmsg(node->fd, &m, 0); crm_debug_2("Got %d bytes", bytes); if(bytes < 0) { crm_perror(LOG_DEBUG, "Read failed"); if (errno != EAGAIN && errno != EINTR) { process_icmp_error(node, (struct sockaddr_in*)&fromaddr); } } else if (bytes > 0) { int rc = 0; if(node->type == AF_INET6) { rc = dump_v6_echo(node, packet, bytes, &m); } else { rc = dump_v4_echo(node, packet, bytes, &m); } if(rc < 0) { crm_info("Retrying..."); goto retry; } else if(rc > 0) { crm_free(packet); return TRUE; } } else { crm_err("Unexpected reply"); } crm_free(packet); return FALSE; } static int ping_write(ping_node *node, const char *data, size_t size) { struct iovec iov; int rc, bytes, namelen; /* static int ntransmitted = 9; */ struct msghdr smsghdr; u_char outpack[MAXPACKETLEN]; memset(outpack, 0, MAXPACKETLEN); node->iseq++; if(node->type == AF_INET6) { struct icmp6_hdr *icp; namelen = sizeof(struct sockaddr_in6); bytes = ICMP6ECHOLEN + DEFDATALEN; icp = (struct icmp6_hdr *)outpack; icp->icmp6_code = 0; icp->icmp6_cksum = 0; icp->icmp6_type = ICMP6_ECHO_REQUEST; icp->icmp6_id = htons(ident); icp->icmp6_seq = htons(node->iseq); /* Sanity check */ if(ntohs(icp->icmp6_seq) != node->iseq) { crm_debug("Wrapping at %u", node->iseq); node->iseq = ntohs(icp->icmp6_seq); } memcpy(&outpack[ICMP6ECHOLEN], "pingd-v6", 8); } else { struct icmp *icp; namelen = sizeof(struct sockaddr_in); bytes = sizeof(struct icmp) + 11; icp = (struct icmp *)outpack; icp->icmp_code = 0; icp->icmp_cksum = 0; icp->icmp_type = ICMP_ECHO; icp->icmp_id = htons(ident); icp->icmp_seq = htons(node->iseq); /* Sanity check */ if(ntohs(icp->icmp_seq) != node->iseq) { crm_debug("Wrapping at %u", node->iseq); node->iseq = ntohs(icp->icmp_seq); } memcpy(icp->icmp_data, "pingd-v4", 8); icp->icmp_cksum = in_cksum((u_short *)icp, bytes); } memset(&iov, 0, sizeof(struct iovec)); memset(&smsghdr, 0, sizeof(struct msghdr)); smsghdr.msg_name = (caddr_t)&(node->addr); smsghdr.msg_namelen = namelen; iov.iov_base = (caddr_t)outpack; iov.iov_len = bytes; smsghdr.msg_iov = &iov; smsghdr.msg_iovlen = 1; rc = sendmsg(node->fd, &smsghdr, 0); if (rc < 0 || rc != bytes) { crm_perror(LOG_WARNING, "Wrote %d of %d chars", rc, bytes); return FALSE; } crm_debug_2("Sent %d bytes to %s", rc, node->dest); return TRUE; } static void pingd_shutdown(int nsig) { need_shutdown = TRUE; send_update(0); g_hash_table_destroy(ping_nodes); slist_destroy(ping_node, p, ping_list, crm_free(p->host); crm_free(p); ); exit(0); } #if SUPPORT_HEARTBEAT static gboolean pingd_ha_dispatch(IPC_Channel *channel, gpointer user_data) { gboolean stay_connected = TRUE; crm_debug_2("Invoked"); while(pingd_cluster != NULL && IPC_ISRCONN(channel)) { if(pingd_cluster->llc_ops->msgready(pingd_cluster) == 0) { crm_debug_2("no message ready yet"); break; } /* invoke the callbacks but dont block */ pingd_cluster->llc_ops->rcvmsg(pingd_cluster, 0); } if (pingd_cluster == NULL || channel->ch_status != IPC_CONNECT) { if(need_shutdown == FALSE) { crm_crit("Lost connection to heartbeat service."); } else { crm_info("Lost connection to heartbeat service."); } stay_connected = FALSE; } return stay_connected; } static void pingd_ha_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); if(need_shutdown) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); } static gboolean register_with_ha(void) { if(pingd_cluster == NULL) { pingd_cluster = ll_cluster_new("heartbeat"); } if(pingd_cluster == NULL) { crm_err("Cannot create heartbeat object"); return FALSE; } crm_debug("Signing in with Heartbeat"); if (pingd_cluster->llc_ops->signon( pingd_cluster, crm_system_name) != HA_OK) { crm_err("Cannot sign on with heartbeat: %s", pingd_cluster->llc_ops->errmsg(pingd_cluster)); crm_err("REASON: %s", pingd_cluster->llc_ops->errmsg(pingd_cluster)); return FALSE; } do_node_walk(pingd_cluster); crm_debug_3("Be informed of Node Status changes"); if (HA_OK != pingd_cluster->llc_ops->set_nstatus_callback( pingd_cluster, pingd_nstatus_callback, NULL)) { crm_err("Cannot set nstatus callback: %s", pingd_cluster->llc_ops->errmsg(pingd_cluster)); crm_err("REASON: %s", pingd_cluster->llc_ops->errmsg(pingd_cluster)); return FALSE; } if (pingd_cluster->llc_ops->set_ifstatus_callback( pingd_cluster, pingd_lstatus_callback, NULL) != HA_OK) { crm_err("Cannot set if status callback: %s", pingd_cluster->llc_ops->errmsg(pingd_cluster)); return FALSE; } crm_debug_3("Adding channel to mainloop"); G_main_add_IPC_Channel( G_PRIORITY_HIGH, pingd_cluster->llc_ops->ipcchan( pingd_cluster), FALSE, pingd_ha_dispatch, pingd_cluster, pingd_ha_connection_destroy); return TRUE; } void do_node_walk(ll_cluster_t *hb_cluster) { const char *ha_node = NULL; /* Async get client status information in the cluster */ crm_debug_2("Invoked"); crm_debug_3("Requesting an initial dump of CRMD client_status"); hb_cluster->llc_ops->client_status( hb_cluster, NULL, CRM_SYSTEM_CRMD, -1); crm_info("Requesting the list of configured nodes"); hb_cluster->llc_ops->init_nodewalk(hb_cluster); do { const char *ha_node_type = NULL; const char *ha_node_status = NULL; ha_node = hb_cluster->llc_ops->nextnode(hb_cluster); if(ha_node == NULL) { continue; } ha_node_type = hb_cluster->llc_ops->node_type( hb_cluster, ha_node); if(safe_str_neq("ping", ha_node_type)) { crm_debug("Node %s: skipping '%s'", ha_node, ha_node_type); continue; } if(do_filter && g_hash_table_lookup(ping_nodes, ha_node) == NULL) { crm_debug("Filtering: %s", ha_node); continue; } ha_node_status = hb_cluster->llc_ops->node_status( hb_cluster, ha_node); crm_debug("Adding: %s=%s", ha_node, ha_node_status); g_hash_table_replace(ping_nodes, crm_strdup(ha_node), crm_strdup(ha_node_status)); } while(ha_node != NULL); hb_cluster->llc_ops->end_nodewalk(hb_cluster); crm_debug_2("Complete"); send_update(-1); } #endif static gboolean stand_alone_ping(gpointer data) { int num_active = 0; crm_debug_2("Checking connectivity"); slist_iter( ping, ping_node, ping_list, num, if(ping_open(ping)) { int lpc = 0; for(;lpc < pings_per_host; lpc++) { int len = 0; if(ping_write(ping, "test", 4) == FALSE) { crm_info("Node %s is unreachable (write)", ping->host); } else if(ping_read(ping, &len)) { crm_debug("Node %s is alive", ping->host); num_active++; break; } else { crm_info("Node %s is unreachable (read)", ping->host); } sleep(1); } } ping_close(ping); ); send_update(num_active); return TRUE; } static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "This text"}, {"version", 0, 0, 'v', "Version information" }, - {"verbose", 0, 0, 'V', "Increase debug output"}, + {"verbose", 0, 0, 'V', "Increase debug output\n"}, {"daemonize", 0, 0, 'D', "\t\tRun in daemon mode"}, - {"pid-file", 1, 0, 'p', "\tFile in which to store the process' PID"}, - {"node", 1, 0, 'N', "\tDNS name or IP address of a host to check (can be specified more than once"}, + {"pid-file", 1, 0, 'p', "\tFile in which to store the process' PID\n"}, + {"node", 1, 0, 'N', "\tDNS name or IP address of a host to check (can be specified more than once\n"}, {"attr-name", 1, 0, 'a', "\tName of the node attribute to set"}, {"attr-dampen", 1, 0, 'd', "How long to wait for no further changes to occur before updating the CIB with a changed attribute"}, {"attr-section", 1, 0, 'S', "(Advanced) Which part of the CIB to put the attribute in"}, - {"attr-set", 1, 0, 's', "\t(Advanced) Name of the set in which to put the attribute"}, + {"attr-set", 1, 0, 's', "\t(Advanced) Name of the set in which to put the attribute\n"}, {"ping-interval", 1, 0, 'i', "How often, in seconds, to check for node liveliness (default=1)"}, {"ping-attempts", 1, 0, 'n', "Number of ping attempts, per host, before declaring it dead (default=2)"}, {"ping-timeout", 1, 0, 't', "How long, in seconds, to wait before declaring a ping lost (default=2)"}, {"ping-multiplier", 1, 0, 'm', "For every connected node, add to the value set in the CIB"}, {"no-updates", 0, 0, 'U', NULL, 1}, /* Legacy */ {"ping-host", 1, 0, 'h', NULL, 1}, {"value-multiplier", 1, 0, 'm', NULL, 1}, {"interval", 1, 0, 'i', NULL, 1}, {0, 0, 0, 0} }; int main(int argc, char **argv) { int argerr = 0; int flag; const char *pid_file = NULL; gboolean daemonize = FALSE; ping_node *p = NULL; int option_index = 0; pid_file = "/tmp/pingd.pid"; mainloop_add_signal(SIGTERM, pingd_shutdown); ping_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); crm_log_init(basename(argv[0]), LOG_INFO, TRUE, FALSE, argc, argv); crm_set_options("V?vp:a:d:s:S:h:Dm:N:Ui:t:n:", NULL, long_options, "Daemon for checking external connectivity and making the results available to the cluster"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case 'V': cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case 'p': pid_file = optarg; break; case 'a': pingd_attr = optarg; break; case 'N': case 'h': stand_alone = TRUE; crm_debug("Adding ping host %s", optarg); p = ping_new(optarg); ping_list = g_list_append(ping_list, p); break; case 's': attr_set = crm_strdup(optarg); break; case 'm': attr_multiplier = crm_parse_int(optarg, "1"); break; case 'S': attr_section = crm_strdup(optarg); break; case 'd': attr_dampen = crm_get_msec(optarg); break; case 'i': re_ping_interval = crm_get_msec(optarg); break; case 'n': pings_per_host = crm_atoi(optarg, NULL); break; case 't': ping_timeout = crm_atoi(optarg, NULL); break; case 'D': daemonize = TRUE; break; case 'U': cl_log_enable_stderr(TRUE); do_updates = FALSE; break; case 'v': case '?': crm_help(flag, LSB_EXIT_OK); break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); crm_err("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { crm_err("non-option ARGV-elements: "); printf("non-option ARGV-elements: "); while (optind < argc) { crm_err("%s ", argv[optind++]); printf("%s ", argv[optind++]); } printf("\n"); } if (argerr) { crm_help(flag, LSB_EXIT_GENERIC); } crm_make_daemon(crm_system_name, daemonize, pid_file); ident = getpid(); if(do_updates == FALSE) { goto start_ping; } #if SUPPORT_AIS if(is_openais_cluster()) { stand_alone = TRUE; } #endif #if SUPPORT_HEARTBEAT if(stand_alone == FALSE && register_with_ha() == FALSE) { crm_err("HA registration failed"); cl_flush_logs(); exit(LSB_EXIT_GENERIC); } #endif start_ping: if(stand_alone && ping_list == NULL) { crm_err("You must specify a list of hosts to monitor"); exit(LSB_EXIT_GENERIC); } crm_info("Starting %s", crm_system_name); mainloop = g_main_new(FALSE); if(stand_alone) { stand_alone_ping(NULL); g_timeout_add(re_ping_interval, stand_alone_ping, NULL); } g_main_run(mainloop); crm_info("Exiting %s", crm_system_name); return 0; } static void count_ping_nodes(gpointer key, gpointer value, gpointer user_data) { int *num_active = user_data; CRM_CHECK(num_active != NULL, return); if(need_shutdown) { return; } if(safe_str_eq(value, "ping")) { (*num_active)++; } else if(safe_str_eq(value, "up")) { (*num_active)++; } } void send_update(int num_active) { int lpc = 0; static IPC_Channel *attrd = NULL; xmlNode *update = create_xml_node(NULL, __FUNCTION__); crm_xml_add(update, F_TYPE, T_ATTRD); crm_xml_add(update, F_ORIG, crm_system_name); crm_xml_add(update, F_ATTRD_TASK, "update"); crm_xml_add(update, F_ATTRD_ATTRIBUTE, pingd_attr); if(num_active < 0) { num_active = 0; g_hash_table_foreach(ping_nodes, count_ping_nodes, &num_active); } crm_xml_add_int(update, F_ATTRD_VALUE, attr_multiplier*num_active); if(attr_set != NULL) { crm_xml_add(update, F_ATTRD_SET, attr_set); } if(attr_section != NULL) { crm_xml_add(update, F_ATTRD_SECTION, attr_section); } if(attr_dampen > 0) { crm_xml_add_int(update, F_ATTRD_DAMPEN, attr_dampen/1000); } if(do_updates && attrd == NULL) { crm_info("Attempting attrd (re-)registration"); attrd = init_client_ipc_comms_nodispatch(T_ATTRD); for(lpc = 0; attrd == NULL && lpc < 10; lpc++) { sleep(1); crm_debug("attrd registration attempt: %d", lpc); attrd = init_client_ipc_comms_nodispatch(T_ATTRD); } } if(do_updates == FALSE) { crm_log_xml_info(update, "pingd"); } else if(attrd == NULL) { crm_err("Could not send update: %s=%d (Not connected)", pingd_attr, attr_multiplier*num_active); } else if(send_ipc_message(attrd, update) == FALSE) { crm_err("Could not send update: %s=%d (Connection failed)", pingd_attr, attr_multiplier*num_active); attrd = NULL; } else { crm_debug("Sent update: %s=%d (%d active ping nodes)", pingd_attr, attr_multiplier*num_active, num_active); } free_xml(update); } void pingd_nstatus_callback( const char *node, const char * status, void* private_data) { crm_notice("Status update: Ping node %s now has status [%s]", node, status); if(g_hash_table_lookup(ping_nodes, node) != NULL) { g_hash_table_replace( ping_nodes, crm_strdup(node), crm_strdup(status)); send_update(-1); } } void pingd_lstatus_callback(const char *node, const char *lnk, const char *status, void *private) { crm_notice("Status update: Ping node %s now has status [%s]", node, status); pingd_nstatus_callback(node, status, private); }