Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/exec/fsm.h b/exec/fsm.h
index 8406450d..87efd7db 100644
--- a/exec/fsm.h
+++ b/exec/fsm.h
@@ -1,124 +1,131 @@
/*
* Copyright (c) 2010-2012 Red Hat
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FSM_H_DEFINED
#define FSM_H_DEFINED
#include <sys/time.h>
#include <corosync/corotypes.h>
#include "util.h"
struct cs_fsm;
struct cs_fsm_entry;
typedef void (*cs_fsm_event_action_fn)(struct cs_fsm* fsm, int32_t event, void * data);
typedef const char * (*cs_fsm_state_to_str_fn)(struct cs_fsm* fsm, int32_t state);
typedef const char * (*cs_fsm_event_to_str_fn)(struct cs_fsm* fsm, int32_t event);
+
+typedef void (*cs_fsm_cb)(struct cs_fsm *fsm, int cb_event, int32_t curr_state,
+ int32_t next_state, int32_t fsm_event, void *data);
+
#define CS_FSM_NEXT_STATE_SIZE 32
+
+#define CS_FSM_STATE_NONE -1
+
+#define CS_FSM_CB_EVENT_PROCESS_NF 0
+#define CS_FSM_CB_EVENT_STATE_SET 1
+#define CS_FSM_CB_EVENT_STATE_SET_NF 2
+
struct cs_fsm_entry {
int32_t curr_state;
int32_t event;
cs_fsm_event_action_fn handler_fn;
int32_t next_states[CS_FSM_NEXT_STATE_SIZE];
};
struct cs_fsm {
const char *name;
int32_t curr_state;
int32_t curr_entry;
size_t entries;
struct cs_fsm_entry *table;
cs_fsm_state_to_str_fn state_to_str;
cs_fsm_event_to_str_fn event_to_str;
};
/*
* the table entry is defined by the state + event (curr_entry).
* so cs_fsm_process() sets the entry and cs_fsm_state_set()
* sets the new state.
*/
-static inline void cs_fsm_process (struct cs_fsm *fsm, int32_t new_event, void * data)
+static inline void cs_fsm_process (struct cs_fsm *fsm, int32_t new_event, void * data, cs_fsm_cb cb)
{
int32_t i;
for (i = 0; i < fsm->entries; i++) {
if (fsm->table[i].event == new_event &&
fsm->table[i].curr_state == fsm->curr_state) {
assert (fsm->table[i].handler_fn != NULL);
/* set current entry */
fsm->curr_entry = i;
fsm->table[i].handler_fn (fsm, new_event, data);
return;
}
}
- log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
- fsm->name, fsm->event_to_str(fsm, new_event), fsm->state_to_str(fsm, fsm->curr_state));
- corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+
+ if (cb != NULL) {
+ cb(fsm, CS_FSM_CB_EVENT_PROCESS_NF, fsm->curr_state, CS_FSM_STATE_NONE, new_event, data);
+ }
}
-static inline void cs_fsm_state_set (struct cs_fsm* fsm, int32_t next_state, void* data)
+static inline void cs_fsm_state_set (struct cs_fsm* fsm, int32_t next_state, void* data, cs_fsm_cb cb)
{
int i;
struct cs_fsm_entry *entry = &fsm->table[fsm->curr_entry];
if (fsm->curr_state == next_state) {
return;
}
/*
* confirm that "next_state" is in the current entry's next list
*/
for (i = 0; i < CS_FSM_NEXT_STATE_SIZE; i++) {
if (entry->next_states[i] < 0) {
break;
}
if (entry->next_states[i] == next_state) {
- log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
- fsm->name,
- fsm->event_to_str(fsm, entry->event),
- fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
- fsm->state_to_str(fsm, next_state));
+ if (cb != NULL) {
+ cb(fsm, CS_FSM_CB_EVENT_STATE_SET, fsm->curr_state, next_state, entry->event, data);
+ }
fsm->curr_state = next_state;
return;
}
}
- log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
- fsm->name,
- fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
- fsm->state_to_str(fsm, next_state),
- fsm->event_to_str(fsm, entry->event));
- corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ if (cb != NULL) {
+ cb(fsm, CS_FSM_CB_EVENT_STATE_SET_NF, fsm->curr_state, next_state, entry->event, data);
+ }
}
#endif /* FSM_H_DEFINED */
diff --git a/exec/mon.c b/exec/mon.c
index f220abd1..acc38c1a 100644
--- a/exec/mon.c
+++ b/exec/mon.c
@@ -1,503 +1,533 @@
/*
* Copyright (c) 2010-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <unistd.h>
#if defined(HAVE_LIBSTATGRAB)
#include <statgrab.h>
#endif
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/coroapi.h>
#include <corosync/list.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
-#include "../exec/fsm.h"
+#include "fsm.h"
#include "service.h"
LOGSYS_DECLARE_SUBSYS ("MON");
/*
* Service Interfaces required by service_message_handler struct
*/
static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api);
static struct corosync_api_v1 *api;
#define MON_DEFAULT_PERIOD 3000
#define MON_MIN_PERIOD 500
#define MON_MAX_PERIOD (120 * CS_TIME_MS_IN_SEC)
struct corosync_service_engine mon_service_engine = {
.name = "corosync resource monitoring service",
.id = MON_SERVICE,
.priority = 1,
.private_data_size = 0,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.lib_init_fn = NULL,
.lib_exit_fn = NULL,
.lib_engine = NULL,
.lib_engine_count = 0,
.exec_engine = NULL,
.exec_engine_count = 0,
.confchg_fn = NULL,
.exec_init_fn = mon_exec_init_fn,
.exec_dump_fn = NULL
};
static DECLARE_LIST_INIT (confchg_notify);
struct resource_instance {
const char *icmap_path;
const char *name;
corosync_timer_handle_t timer_handle;
void (*update_stats_fn) (void *data);
struct cs_fsm fsm;
uint64_t period;
icmap_value_types_t max_type;
union {
int32_t int32;
double dbl;
} max;
};
static void mem_update_stats_fn (void *data);
static void load_update_stats_fn (void *data);
static struct resource_instance memory_used_inst = {
.name = "memory_used",
.icmap_path = "resources.system.memory_used.",
.update_stats_fn = mem_update_stats_fn,
.max_type = ICMAP_VALUETYPE_INT32,
.max.int32 = INT32_MAX,
.period = MON_DEFAULT_PERIOD,
};
static struct resource_instance load_15min_inst = {
.name = "load_15min",
.icmap_path = "resources.system.load_15min.",
.update_stats_fn = load_update_stats_fn,
.max_type = ICMAP_VALUETYPE_DOUBLE,
.max.dbl = INT32_MAX,
.period = MON_DEFAULT_PERIOD,
};
/*
* F S M
*/
static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
const char * mon_running_str = "running";
const char * mon_failed_str = "failed";
const char * mon_failure_str = "failure";
const char * mon_stopped_str = "stopped";
const char * mon_config_changed_str = "config_changed";
enum mon_resource_state {
MON_S_STOPPED,
MON_S_RUNNING,
MON_S_FAILED
};
enum mon_resource_event {
MON_E_CONFIG_CHANGED,
MON_E_FAILURE
};
struct cs_fsm_entry mon_fsm_table[] = {
{ MON_S_STOPPED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_STOPPED, MON_S_RUNNING, -1} },
{ MON_S_STOPPED, MON_E_FAILURE, NULL, {-1} },
{ MON_S_RUNNING, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
{ MON_S_RUNNING, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} },
{ MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
{ MON_S_FAILED, MON_E_FAILURE, NULL, {-1} },
};
struct corosync_service_engine *mon_get_service_engine_ver0 (void)
{
return (&mon_service_engine);
}
static const char * mon_res_state_to_str(struct cs_fsm* fsm,
int32_t state)
{
switch (state) {
case MON_S_STOPPED:
return mon_stopped_str;
break;
case MON_S_RUNNING:
return mon_running_str;
break;
case MON_S_FAILED:
return mon_failed_str;
break;
}
return NULL;
}
static const char * mon_res_event_to_str(struct cs_fsm* fsm,
int32_t event)
{
switch (event) {
case MON_E_CONFIG_CHANGED:
return mon_config_changed_str;
break;
case MON_E_FAILURE:
return mon_failure_str;
break;
}
return NULL;
}
+static void mon_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
+ int32_t next_state, int32_t fsm_event, void *data)
+{
+ switch (cb_event) {
+ case CS_FSM_CB_EVENT_PROCESS_NF:
+ log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
+ fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET:
+ log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
+ fsm->name,
+ fsm->event_to_str(fsm, fsm_event),
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state));
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET_NF:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
+ fsm->name,
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state),
+ fsm->event_to_str(fsm, fsm_event));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ default:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Can't find callback event!");
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ }
+}
+
static void mon_fsm_state_set (struct cs_fsm* fsm,
enum mon_resource_state next_state, struct resource_instance* inst)
{
enum mon_resource_state prev_state = fsm->curr_state;
const char *state_str;
char key_name[ICMAP_KEYNAME_MAXLEN];
ENTER();
- cs_fsm_state_set(fsm, next_state, inst);
+ cs_fsm_state_set(fsm, next_state, inst, mon_fsm_cb);
if (prev_state == fsm->curr_state) {
return;
}
state_str = mon_res_state_to_str(fsm, fsm->curr_state);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
icmap_set_string(key_name, state_str);
}
static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource_instance * inst = (struct resource_instance *)data;
uint64_t tmp_value;
char key_name[ICMAP_KEYNAME_MAXLEN];
int run_updater;
ENTER();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) == CS_OK) {
if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
log_printf (LOGSYS_LEVEL_DEBUG,
"poll_period changing from:%"PRIu64" to %"PRIu64".",
inst->period, tmp_value);
inst->period = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, inst->name);
}
}
if (inst->timer_handle) {
api->timer_delete(inst->timer_handle);
inst->timer_handle = 0;
}
run_updater = 0;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "max");
if (inst->max_type == ICMAP_VALUETYPE_INT32) {
if (icmap_get_int32(key_name, &inst->max.int32) != CS_OK) {
inst->max.int32 = INT32_MAX;
mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
} else {
run_updater = 1;
}
}
if (inst->max_type == ICMAP_VALUETYPE_DOUBLE) {
if (icmap_get_double(key_name, &inst->max.dbl) != CS_OK) {
inst->max.dbl = INT32_MAX;
mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
} else {
run_updater = 1;
}
}
if (run_updater) {
mon_fsm_state_set (fsm, MON_S_RUNNING, inst);
/*
* run the updater, incase the period has shortened
* and to start the timer.
*/
inst->update_stats_fn (inst);
}
}
void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource_instance * inst = (struct resource_instance *)data;
ENTER();
mon_fsm_state_set (fsm, MON_S_FAILED, inst);
}
static int32_t percent_mem_used_get(void)
{
#if defined(HAVE_LIBSTATGRAB)
sg_mem_stats *mem_stats;
sg_swap_stats *swap_stats;
long long total, freemem;
mem_stats = sg_get_mem_stats();
swap_stats = sg_get_swap_stats();
if (mem_stats == NULL || swap_stats != NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s",
sg_str_error(sg_get_error()));
return -1;
}
total = mem_stats->total + swap_stats->total;
freemem = mem_stats->free + swap_stats->free;
return ((total - freemem) * 100) / total;
#else
#if defined(COROSYNC_LINUX)
char *line_ptr;
char line[512];
unsigned long long value;
FILE *f;
long long total = 0;
long long freemem = 0;
if ((f = fopen("/proc/meminfo", "r")) == NULL) {
return -1;
}
while ((line_ptr = fgets(line, sizeof(line), f)) != NULL) {
if (sscanf(line_ptr, "%*s %llu kB", &value) != 1) {
continue;
}
value *= 1024;
if (strncmp(line_ptr, "MemTotal:", 9) == 0) {
total += value;
} else if (strncmp(line_ptr, "MemFree:", 8) == 0) {
freemem += value;
} else if (strncmp(line_ptr, "SwapTotal:", 10) == 0) {
total += value;
} else if (strncmp(line_ptr, "SwapFree:", 9) == 0) {
freemem += value;
}
}
fclose(f);
return ((total - freemem) * 100) / total;
#else
#error need libstatgrab or linux.
#endif /* COROSYNC_LINUX */
#endif /* HAVE_LIBSTATGRAB */
}
-
static void mem_update_stats_fn (void *data)
{
struct resource_instance * inst = (struct resource_instance *)data;
int32_t new_value;
uint64_t timestamp;
char key_name[ICMAP_KEYNAME_MAXLEN];
new_value = percent_mem_used_get();
if (new_value > 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
icmap_set_uint32(key_name, new_value);
timestamp = cs_timestamp_get();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, timestamp);
if (new_value > inst->max.int32 && inst->fsm.curr_state != MON_S_FAILED) {
- cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst);
+ cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb);
}
}
api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
inst, inst->update_stats_fn, &inst->timer_handle);
}
static double min15_loadavg_get(void)
{
#if defined(HAVE_LIBSTATGRAB)
sg_load_stats *load_stats;
load_stats = sg_get_load_stats ();
if (load_stats == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s",
sg_str_error (sg_get_error()));
return -1;
}
return load_stats->min15;
#else
#if defined(COROSYNC_LINUX)
double loadav[3];
if (getloadavg(loadav,3) < 0) {
return -1;
}
return loadav[2];
#else
#error need libstatgrab or linux.
#endif /* COROSYNC_LINUX */
#endif /* HAVE_LIBSTATGRAB */
}
static void load_update_stats_fn (void *data)
{
struct resource_instance * inst = (struct resource_instance *)data;
uint64_t timestamp;
char key_name[ICMAP_KEYNAME_MAXLEN];
double min15 = min15_loadavg_get();
if (min15 > 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
icmap_set_double(key_name, min15);
timestamp = cs_timestamp_get();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, timestamp);
if (min15 > inst->max.dbl && inst->fsm.curr_state != MON_S_FAILED) {
- cs_fsm_process (&inst->fsm, MON_E_FAILURE, &inst);
+ cs_fsm_process (&inst->fsm, MON_E_FAILURE, &inst, mon_fsm_cb);
}
}
api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
inst, inst->update_stats_fn, &inst->timer_handle);
}
static void mon_key_changed_cb (
int32_t event,
const char *key_name,
struct icmap_notify_value new_value,
struct icmap_notify_value old_value,
void *user_data)
{
struct resource_instance* inst = (struct resource_instance*)user_data;
char *last_key_part;
if (event == ICMAP_TRACK_DELETE && inst) {
log_printf (LOGSYS_LEVEL_WARNING,
"resource \"%s\" deleted from cmap!",
inst->name);
- cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
}
if (event == ICMAP_TRACK_MODIFY) {
last_key_part = strrchr(key_name, '.');
if (last_key_part == NULL)
return ;
last_key_part++;
if (strcmp(last_key_part, "max") == 0 ||
strcmp(last_key_part, "poll_period") == 0) {
ENTER();
- cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
}
}
}
static void mon_instance_init (struct resource_instance* inst)
{
uint64_t tmp_value;
char key_name[ICMAP_KEYNAME_MAXLEN];
icmap_track_t icmap_track = NULL;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
if (inst->max_type == ICMAP_VALUETYPE_INT32) {
icmap_set_int32(key_name, 0);
} else {
icmap_set_double(key_name, 0);
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, 0);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
icmap_set_string(key_name, mon_stopped_str);
inst->fsm.name = inst->name;
inst->fsm.curr_entry = 0;
inst->fsm.curr_state = MON_S_STOPPED;
inst->fsm.table = mon_fsm_table;
inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry);
inst->fsm.state_to_str = mon_res_state_to_str;
inst->fsm.event_to_str = mon_res_event_to_str;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
icmap_set_uint64(key_name, inst->period);
}
else {
if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
inst->period = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, inst->name);
}
}
- cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
icmap_track_add(inst->icmap_path,
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX,
mon_key_changed_cb, inst, &icmap_track);
}
static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
#ifdef HAVE_LIBSTATGRAB
sg_init();
#endif /* HAVE_LIBSTATGRAB */
#ifdef COROSYNC_SOLARIS
logsys_subsys_init();
#endif
api = corosync_api;
mon_instance_init (&memory_used_inst);
mon_instance_init (&load_15min_inst);
return NULL;
}
diff --git a/exec/wd.c b/exec/wd.c
index befa4d0f..cb8b34fc 100644
--- a/exec/wd.c
+++ b/exec/wd.c
@@ -1,709 +1,740 @@
/*
* Copyright (c) 2010-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <sys/reboot.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/coroapi.h>
#include <corosync/list.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
-#include "../exec/fsm.h"
+#include "fsm.h"
#include "service.h"
typedef enum {
WD_RESOURCE_GOOD,
WD_RESOURCE_FAILED,
WD_RESOURCE_STATE_UNKNOWN,
WD_RESOURCE_NOT_MONITORED
} wd_resource_state_t;
struct resource {
char res_path[ICMAP_KEYNAME_MAXLEN];
char *recovery;
char name[CS_MAX_NAME_LENGTH];
time_t last_updated;
struct cs_fsm fsm;
corosync_timer_handle_t check_timer;
uint64_t check_timeout;
icmap_track_t icmap_track;
};
LOGSYS_DECLARE_SUBSYS("WD");
/*
* Service Interfaces required by service_message_handler struct
*/
static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api);
static int wd_exec_exit_fn (void);
static void wd_resource_check_fn (void* resource_ref);
static struct corosync_api_v1 *api;
#define WD_DEFAULT_TIMEOUT_SEC 6
#define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
#define WD_MIN_TIMEOUT_MS 500
#define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC;
static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2);
static int dog = -1;
static corosync_timer_handle_t wd_timer;
static int watchdog_ok = 1;
struct corosync_service_engine wd_service_engine = {
.name = "corosync watchdog service",
.id = WD_SERVICE,
.priority = 1,
.private_data_size = 0,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.lib_init_fn = NULL,
.lib_exit_fn = NULL,
.lib_engine = NULL,
.lib_engine_count = 0,
.exec_engine = NULL,
.exec_engine_count = 0,
.confchg_fn = NULL,
.exec_init_fn = wd_exec_init_fn,
.exec_exit_fn = wd_exec_exit_fn,
.exec_dump_fn = NULL
};
static DECLARE_LIST_INIT (confchg_notify);
/*
* F S M
*/
static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
enum wd_resource_state {
WD_S_RUNNING,
WD_S_FAILED,
WD_S_STOPPED
};
enum wd_resource_event {
WD_E_FAILURE,
WD_E_CONFIG_CHANGED
};
const char * wd_running_str = "running";
const char * wd_failed_str = "failed";
const char * wd_failure_str = "failure";
const char * wd_stopped_str = "stopped";
const char * wd_config_changed_str = "config_changed";
struct cs_fsm_entry wd_fsm_table[] = {
{ WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} },
{ WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} },
{ WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
{ WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} },
{ WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
{ WD_S_FAILED, WD_E_FAILURE, NULL, {-1} },
};
struct corosync_service_engine *wd_get_service_engine_ver0 (void)
{
return (&wd_service_engine);
}
static const char * wd_res_state_to_str(struct cs_fsm* fsm,
int32_t state)
{
switch (state) {
case WD_S_STOPPED:
return wd_stopped_str;
break;
case WD_S_RUNNING:
return wd_running_str;
break;
case WD_S_FAILED:
return wd_failed_str;
break;
}
return NULL;
}
static const char * wd_res_event_to_str(struct cs_fsm* fsm,
int32_t event)
{
switch (event) {
case WD_E_CONFIG_CHANGED:
return wd_config_changed_str;
break;
case WD_E_FAILURE:
return wd_failure_str;
break;
}
return NULL;
}
+static void wd_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
+ int32_t next_state, int32_t fsm_event, void *data)
+{
+ switch (cb_event) {
+ case CS_FSM_CB_EVENT_PROCESS_NF:
+ log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
+ fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET:
+ log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
+ fsm->name,
+ fsm->event_to_str(fsm, fsm_event),
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state));
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET_NF:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
+ fsm->name,
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state),
+ fsm->event_to_str(fsm, fsm_event));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ default:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Unknown callback event!");
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ }
+}
+
/*
* returns (CS_TRUE == OK, CS_FALSE == failed)
*/
static int32_t wd_resource_state_is_ok (struct resource *ref)
{
char* state = NULL;
uint64_t last_updated;
uint64_t my_time;
uint64_t allowed_period;
char key_name[ICMAP_KEYNAME_MAXLEN];
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated");
if (icmap_get_uint64(key_name, &last_updated) != CS_OK) {
/* key does not exist.
*/
return CS_FALSE;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0) {
/* key does not exist.
*/
if (state != NULL)
free(state);
return CS_FALSE;
}
if (last_updated == 0) {
/* initial value */
free(state);
return CS_TRUE;
}
my_time = cs_timestamp_get();
/*
* Here we check that the monitor has written a timestamp within the poll_period
* plus a grace factor of (0.5 * poll_period).
*/
allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2;
if ((last_updated + allowed_period) < my_time) {
log_printf (LOGSYS_LEVEL_ERROR,
"last_updated %"PRIu64" ms too late, period:%"PRIu64".",
(uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)),
ref->check_timeout);
free(state);
return CS_FALSE;
}
if (strcmp (state, wd_failed_str) == 0) {
free(state);
return CS_FALSE;
}
free(state);
return CS_TRUE;
}
static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
{
char *state;
uint64_t tmp_value;
uint64_t next_timeout;
struct resource *ref = (struct resource*)data;
char key_name[ICMAP_KEYNAME_MAXLEN];
next_timeout = ref->check_timeout;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period");
if (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK) {
if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
log_printf (LOGSYS_LEVEL_DEBUG,
"poll_period changing from:%"PRIu64" to %"PRIu64".",
ref->check_timeout, tmp_value);
/*
* To easy in the transition between poll_period's we are going
* to make the first timeout the bigger of the new and old value.
* This is to give the monitoring system time to adjust.
*/
next_timeout = CS_MAX(tmp_value, ref->check_timeout);
ref->check_timeout = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, ref->name);
}
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery");
if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a recovery key.", ref->name);
- cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
return;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a state key.", ref->name);
- cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
return;
}
if (ref->check_timer) {
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
}
if (strcmp(wd_stopped_str, state) == 0) {
- cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
} else {
api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS,
ref, wd_resource_check_fn, &ref->check_timer);
- cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
}
free(state);
}
static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource* ref = (struct resource*)data;
if (ref->check_timer) {
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
}
log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
ref->recovery, (char*)ref->name);
if (strcmp (ref->recovery, "watchdog") == 0 ||
strcmp (ref->recovery, "quit") == 0) {
watchdog_ok = 0;
}
else if (strcmp (ref->recovery, "reboot") == 0) {
reboot(RB_AUTOBOOT);
}
else if (strcmp (ref->recovery, "shutdown") == 0) {
reboot(RB_POWER_OFF);
}
- cs_fsm_state_set(fsm, WD_S_FAILED, data);
+ cs_fsm_state_set(fsm, WD_S_FAILED, data, wd_fsm_cb);
}
static void wd_key_changed(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
struct resource* ref = (struct resource*)user_data;
char *last_key_part;
if (ref == NULL) {
return ;
}
last_key_part = strrchr(key_name, '.');
if (last_key_part == NULL) {
return ;
}
last_key_part++;
if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) {
if (strcmp(last_key_part, "last_updated") == 0 ||
strcmp(last_key_part, "current") == 0) {
return;
}
- cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref);
+ cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref, wd_fsm_cb);
}
if (event == ICMAP_TRACK_DELETE && ref != NULL) {
if (strcmp(last_key_part, "state") != 0) {
return ;
}
log_printf (LOGSYS_LEVEL_WARNING,
"resource \"%s\" deleted from cmap!",
ref->name);
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
icmap_track_delete(ref->icmap_track);
free(ref);
}
}
static void wd_resource_check_fn (void* resource_ref)
{
struct resource* ref = (struct resource*)resource_ref;
if (wd_resource_state_is_ok (ref) == CS_FALSE) {
- cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref);
+ cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref, wd_fsm_cb);
return;
}
api->timer_add_duration(ref->check_timeout*MILLI_2_NANO_SECONDS,
ref, wd_resource_check_fn, &ref->check_timer);
}
/*
* return 0 - fully configured
* return -1 - partially configured
*/
static int32_t wd_resource_create (char *res_path, char *res_name)
{
char *state;
uint64_t tmp_value;
struct resource *ref = calloc (1, sizeof (struct resource));
char key_name[ICMAP_KEYNAME_MAXLEN];
strcpy(ref->res_path, res_path);
ref->check_timeout = WD_DEFAULT_TIMEOUT_MS;
ref->check_timer = 0;
strcpy(ref->name, res_name);
ref->fsm.name = ref->name;
ref->fsm.table = wd_fsm_table;
ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
ref->fsm.curr_entry = 0;
ref->fsm.curr_state = WD_S_STOPPED;
ref->fsm.state_to_str = wd_res_state_to_str;
ref->fsm.event_to_str = wd_res_event_to_str;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
icmap_set_uint64(key_name, ref->check_timeout);
} else {
if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
ref->check_timeout = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, ref->name);
}
}
icmap_track_add(res_path,
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX,
wd_key_changed,
ref, &ref->icmap_track);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery");
if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a recovery key.", ref->name);
return -1;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a state key.", ref->name);
return -1;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
/* key does not exist.
*/
ref->last_updated = 0;
} else {
ref->last_updated = tmp_value;
}
/*
* delay the first check to give the monitor time to start working.
*/
tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS);
api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS,
ref,
wd_resource_check_fn, &ref->check_timer);
- cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
return 0;
}
static void wd_tickle_fn (void* arg)
{
ENTER();
if (watchdog_ok) {
if (dog > 0) {
ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
}
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
}
else {
log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
}
}
static void wd_resource_created_cb(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
char res_name[ICMAP_KEYNAME_MAXLEN];
char res_type[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN];
int res;
if (event != ICMAP_TRACK_ADD) {
return ;
}
res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
if (res != 3) {
return ;
}
if (strcmp(tmp_key, "state") != 0) {
return ;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
wd_resource_create (tmp_key, res_name);
}
static void wd_scan_resources (void)
{
int res_count = 0;
icmap_track_t icmap_track = NULL;
icmap_iter_t iter;
const char *key_name;
int res;
char res_name[ICMAP_KEYNAME_MAXLEN];
char res_type[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN];
ENTER();
iter = icmap_iter_init("resources.");
while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
if (res != 3) {
continue ;
}
if (strcmp(tmp_key, "state") != 0) {
continue ;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
if (wd_resource_create (tmp_key, res_name) == 0) {
res_count++;
}
}
icmap_iter_finalize(iter);
icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
wd_resource_created_cb, NULL, &icmap_track);
icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
wd_resource_created_cb, NULL, &icmap_track);
if (res_count == 0) {
log_printf (LOGSYS_LEVEL_INFO, "no resources configured.");
}
}
static void watchdog_timeout_apply (uint32_t new)
{
struct watchdog_info ident;
uint32_t original_timeout = watchdog_timeout;
if (new == original_timeout) {
return;
}
watchdog_timeout = new;
if (dog > 0) {
ioctl(dog, WDIOC_GETSUPPORT, &ident);
if (ident.options & WDIOF_SETTIMEOUT) {
/* yay! the dog is trained.
*/
ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
}
ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
}
if (watchdog_timeout == new) {
tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2;
/* reset the tickle timer in case it was reduced.
*/
api->timer_delete (wd_timer);
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout);
log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout);
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could not change the Watchdog timeout from %d to %d seconds",
original_timeout, new);
}
}
static int setup_watchdog(void)
{
struct watchdog_info ident;
ENTER();
if (access ("/dev/watchdog", W_OK) != 0) {
log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe <a watchdog>");
dog = -1;
return -1;
}
/* here goes, lets hope they have "Magic Close"
*/
dog = open("/dev/watchdog", O_WRONLY);
if (dog == -1) {
log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't be opened.");
dog = -1;
return -1;
}
/* Right we have the dog.
* Lets see what breed it is.
*/
ioctl(dog, WDIOC_GETSUPPORT, &ident);
log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by corosync.");
log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
watchdog_timeout_apply (watchdog_timeout);
ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
return 0;
}
static void wd_top_level_key_changed(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
uint32_t tmp_value_32;
ENTER();
if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
watchdog_timeout_apply (tmp_value_32);
}
}
else {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
}
}
static void watchdog_timeout_get_initial (void)
{
uint32_t tmp_value_32;
icmap_track_t icmap_track = NULL;
ENTER();
if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
}
else {
if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
watchdog_timeout_apply (tmp_value_32);
} else {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
}
}
icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY,
wd_top_level_key_changed, NULL, &icmap_track);
}
static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
ENTER();
#ifdef COROSYNC_SOLARIS
logsys_subsys_init();
#endif
api = corosync_api;
watchdog_timeout_get_initial();
setup_watchdog();
wd_scan_resources();
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
return NULL;
}
static int wd_exec_exit_fn (void)
{
char magic = 'V';
ENTER();
if (dog > 0) {
log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog.");
write (dog, &magic, 1);
}
return 0;
}

File Metadata

Mime Type
text/x-diff
Expires
Wed, Jun 4, 5:58 AM (6 h, 12 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1854710
Default Alt Text
(41 KB)

Event Timeline