Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4149367
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
41 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/exec/fsm.h b/exec/fsm.h
index 8406450d..87efd7db 100644
--- a/exec/fsm.h
+++ b/exec/fsm.h
@@ -1,124 +1,131 @@
/*
* Copyright (c) 2010-2012 Red Hat
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FSM_H_DEFINED
#define FSM_H_DEFINED
#include <sys/time.h>
#include <corosync/corotypes.h>
#include "util.h"
struct cs_fsm;
struct cs_fsm_entry;
typedef void (*cs_fsm_event_action_fn)(struct cs_fsm* fsm, int32_t event, void * data);
typedef const char * (*cs_fsm_state_to_str_fn)(struct cs_fsm* fsm, int32_t state);
typedef const char * (*cs_fsm_event_to_str_fn)(struct cs_fsm* fsm, int32_t event);
+
+typedef void (*cs_fsm_cb)(struct cs_fsm *fsm, int cb_event, int32_t curr_state,
+ int32_t next_state, int32_t fsm_event, void *data);
+
#define CS_FSM_NEXT_STATE_SIZE 32
+
+#define CS_FSM_STATE_NONE -1
+
+#define CS_FSM_CB_EVENT_PROCESS_NF 0
+#define CS_FSM_CB_EVENT_STATE_SET 1
+#define CS_FSM_CB_EVENT_STATE_SET_NF 2
+
struct cs_fsm_entry {
int32_t curr_state;
int32_t event;
cs_fsm_event_action_fn handler_fn;
int32_t next_states[CS_FSM_NEXT_STATE_SIZE];
};
struct cs_fsm {
const char *name;
int32_t curr_state;
int32_t curr_entry;
size_t entries;
struct cs_fsm_entry *table;
cs_fsm_state_to_str_fn state_to_str;
cs_fsm_event_to_str_fn event_to_str;
};
/*
* the table entry is defined by the state + event (curr_entry).
* so cs_fsm_process() sets the entry and cs_fsm_state_set()
* sets the new state.
*/
-static inline void cs_fsm_process (struct cs_fsm *fsm, int32_t new_event, void * data)
+static inline void cs_fsm_process (struct cs_fsm *fsm, int32_t new_event, void * data, cs_fsm_cb cb)
{
int32_t i;
for (i = 0; i < fsm->entries; i++) {
if (fsm->table[i].event == new_event &&
fsm->table[i].curr_state == fsm->curr_state) {
assert (fsm->table[i].handler_fn != NULL);
/* set current entry */
fsm->curr_entry = i;
fsm->table[i].handler_fn (fsm, new_event, data);
return;
}
}
- log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
- fsm->name, fsm->event_to_str(fsm, new_event), fsm->state_to_str(fsm, fsm->curr_state));
- corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+
+ if (cb != NULL) {
+ cb(fsm, CS_FSM_CB_EVENT_PROCESS_NF, fsm->curr_state, CS_FSM_STATE_NONE, new_event, data);
+ }
}
-static inline void cs_fsm_state_set (struct cs_fsm* fsm, int32_t next_state, void* data)
+static inline void cs_fsm_state_set (struct cs_fsm* fsm, int32_t next_state, void* data, cs_fsm_cb cb)
{
int i;
struct cs_fsm_entry *entry = &fsm->table[fsm->curr_entry];
if (fsm->curr_state == next_state) {
return;
}
/*
* confirm that "next_state" is in the current entry's next list
*/
for (i = 0; i < CS_FSM_NEXT_STATE_SIZE; i++) {
if (entry->next_states[i] < 0) {
break;
}
if (entry->next_states[i] == next_state) {
- log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
- fsm->name,
- fsm->event_to_str(fsm, entry->event),
- fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
- fsm->state_to_str(fsm, next_state));
+ if (cb != NULL) {
+ cb(fsm, CS_FSM_CB_EVENT_STATE_SET, fsm->curr_state, next_state, entry->event, data);
+ }
fsm->curr_state = next_state;
return;
}
}
- log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
- fsm->name,
- fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
- fsm->state_to_str(fsm, next_state),
- fsm->event_to_str(fsm, entry->event));
- corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ if (cb != NULL) {
+ cb(fsm, CS_FSM_CB_EVENT_STATE_SET_NF, fsm->curr_state, next_state, entry->event, data);
+ }
}
#endif /* FSM_H_DEFINED */
diff --git a/exec/mon.c b/exec/mon.c
index f220abd1..acc38c1a 100644
--- a/exec/mon.c
+++ b/exec/mon.c
@@ -1,503 +1,533 @@
/*
* Copyright (c) 2010-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <unistd.h>
#if defined(HAVE_LIBSTATGRAB)
#include <statgrab.h>
#endif
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/coroapi.h>
#include <corosync/list.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
-#include "../exec/fsm.h"
+#include "fsm.h"
#include "service.h"
LOGSYS_DECLARE_SUBSYS ("MON");
/*
* Service Interfaces required by service_message_handler struct
*/
static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api);
static struct corosync_api_v1 *api;
#define MON_DEFAULT_PERIOD 3000
#define MON_MIN_PERIOD 500
#define MON_MAX_PERIOD (120 * CS_TIME_MS_IN_SEC)
struct corosync_service_engine mon_service_engine = {
.name = "corosync resource monitoring service",
.id = MON_SERVICE,
.priority = 1,
.private_data_size = 0,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.lib_init_fn = NULL,
.lib_exit_fn = NULL,
.lib_engine = NULL,
.lib_engine_count = 0,
.exec_engine = NULL,
.exec_engine_count = 0,
.confchg_fn = NULL,
.exec_init_fn = mon_exec_init_fn,
.exec_dump_fn = NULL
};
static DECLARE_LIST_INIT (confchg_notify);
struct resource_instance {
const char *icmap_path;
const char *name;
corosync_timer_handle_t timer_handle;
void (*update_stats_fn) (void *data);
struct cs_fsm fsm;
uint64_t period;
icmap_value_types_t max_type;
union {
int32_t int32;
double dbl;
} max;
};
static void mem_update_stats_fn (void *data);
static void load_update_stats_fn (void *data);
static struct resource_instance memory_used_inst = {
.name = "memory_used",
.icmap_path = "resources.system.memory_used.",
.update_stats_fn = mem_update_stats_fn,
.max_type = ICMAP_VALUETYPE_INT32,
.max.int32 = INT32_MAX,
.period = MON_DEFAULT_PERIOD,
};
static struct resource_instance load_15min_inst = {
.name = "load_15min",
.icmap_path = "resources.system.load_15min.",
.update_stats_fn = load_update_stats_fn,
.max_type = ICMAP_VALUETYPE_DOUBLE,
.max.dbl = INT32_MAX,
.period = MON_DEFAULT_PERIOD,
};
/*
* F S M
*/
static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
const char * mon_running_str = "running";
const char * mon_failed_str = "failed";
const char * mon_failure_str = "failure";
const char * mon_stopped_str = "stopped";
const char * mon_config_changed_str = "config_changed";
enum mon_resource_state {
MON_S_STOPPED,
MON_S_RUNNING,
MON_S_FAILED
};
enum mon_resource_event {
MON_E_CONFIG_CHANGED,
MON_E_FAILURE
};
struct cs_fsm_entry mon_fsm_table[] = {
{ MON_S_STOPPED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_STOPPED, MON_S_RUNNING, -1} },
{ MON_S_STOPPED, MON_E_FAILURE, NULL, {-1} },
{ MON_S_RUNNING, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
{ MON_S_RUNNING, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} },
{ MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
{ MON_S_FAILED, MON_E_FAILURE, NULL, {-1} },
};
struct corosync_service_engine *mon_get_service_engine_ver0 (void)
{
return (&mon_service_engine);
}
static const char * mon_res_state_to_str(struct cs_fsm* fsm,
int32_t state)
{
switch (state) {
case MON_S_STOPPED:
return mon_stopped_str;
break;
case MON_S_RUNNING:
return mon_running_str;
break;
case MON_S_FAILED:
return mon_failed_str;
break;
}
return NULL;
}
static const char * mon_res_event_to_str(struct cs_fsm* fsm,
int32_t event)
{
switch (event) {
case MON_E_CONFIG_CHANGED:
return mon_config_changed_str;
break;
case MON_E_FAILURE:
return mon_failure_str;
break;
}
return NULL;
}
+static void mon_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
+ int32_t next_state, int32_t fsm_event, void *data)
+{
+ switch (cb_event) {
+ case CS_FSM_CB_EVENT_PROCESS_NF:
+ log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
+ fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET:
+ log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
+ fsm->name,
+ fsm->event_to_str(fsm, fsm_event),
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state));
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET_NF:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
+ fsm->name,
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state),
+ fsm->event_to_str(fsm, fsm_event));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ default:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Can't find callback event!");
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ }
+}
+
static void mon_fsm_state_set (struct cs_fsm* fsm,
enum mon_resource_state next_state, struct resource_instance* inst)
{
enum mon_resource_state prev_state = fsm->curr_state;
const char *state_str;
char key_name[ICMAP_KEYNAME_MAXLEN];
ENTER();
- cs_fsm_state_set(fsm, next_state, inst);
+ cs_fsm_state_set(fsm, next_state, inst, mon_fsm_cb);
if (prev_state == fsm->curr_state) {
return;
}
state_str = mon_res_state_to_str(fsm, fsm->curr_state);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
icmap_set_string(key_name, state_str);
}
static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource_instance * inst = (struct resource_instance *)data;
uint64_t tmp_value;
char key_name[ICMAP_KEYNAME_MAXLEN];
int run_updater;
ENTER();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) == CS_OK) {
if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
log_printf (LOGSYS_LEVEL_DEBUG,
"poll_period changing from:%"PRIu64" to %"PRIu64".",
inst->period, tmp_value);
inst->period = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, inst->name);
}
}
if (inst->timer_handle) {
api->timer_delete(inst->timer_handle);
inst->timer_handle = 0;
}
run_updater = 0;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "max");
if (inst->max_type == ICMAP_VALUETYPE_INT32) {
if (icmap_get_int32(key_name, &inst->max.int32) != CS_OK) {
inst->max.int32 = INT32_MAX;
mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
} else {
run_updater = 1;
}
}
if (inst->max_type == ICMAP_VALUETYPE_DOUBLE) {
if (icmap_get_double(key_name, &inst->max.dbl) != CS_OK) {
inst->max.dbl = INT32_MAX;
mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
} else {
run_updater = 1;
}
}
if (run_updater) {
mon_fsm_state_set (fsm, MON_S_RUNNING, inst);
/*
* run the updater, incase the period has shortened
* and to start the timer.
*/
inst->update_stats_fn (inst);
}
}
void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource_instance * inst = (struct resource_instance *)data;
ENTER();
mon_fsm_state_set (fsm, MON_S_FAILED, inst);
}
static int32_t percent_mem_used_get(void)
{
#if defined(HAVE_LIBSTATGRAB)
sg_mem_stats *mem_stats;
sg_swap_stats *swap_stats;
long long total, freemem;
mem_stats = sg_get_mem_stats();
swap_stats = sg_get_swap_stats();
if (mem_stats == NULL || swap_stats != NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s",
sg_str_error(sg_get_error()));
return -1;
}
total = mem_stats->total + swap_stats->total;
freemem = mem_stats->free + swap_stats->free;
return ((total - freemem) * 100) / total;
#else
#if defined(COROSYNC_LINUX)
char *line_ptr;
char line[512];
unsigned long long value;
FILE *f;
long long total = 0;
long long freemem = 0;
if ((f = fopen("/proc/meminfo", "r")) == NULL) {
return -1;
}
while ((line_ptr = fgets(line, sizeof(line), f)) != NULL) {
if (sscanf(line_ptr, "%*s %llu kB", &value) != 1) {
continue;
}
value *= 1024;
if (strncmp(line_ptr, "MemTotal:", 9) == 0) {
total += value;
} else if (strncmp(line_ptr, "MemFree:", 8) == 0) {
freemem += value;
} else if (strncmp(line_ptr, "SwapTotal:", 10) == 0) {
total += value;
} else if (strncmp(line_ptr, "SwapFree:", 9) == 0) {
freemem += value;
}
}
fclose(f);
return ((total - freemem) * 100) / total;
#else
#error need libstatgrab or linux.
#endif /* COROSYNC_LINUX */
#endif /* HAVE_LIBSTATGRAB */
}
-
static void mem_update_stats_fn (void *data)
{
struct resource_instance * inst = (struct resource_instance *)data;
int32_t new_value;
uint64_t timestamp;
char key_name[ICMAP_KEYNAME_MAXLEN];
new_value = percent_mem_used_get();
if (new_value > 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
icmap_set_uint32(key_name, new_value);
timestamp = cs_timestamp_get();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, timestamp);
if (new_value > inst->max.int32 && inst->fsm.curr_state != MON_S_FAILED) {
- cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst);
+ cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb);
}
}
api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
inst, inst->update_stats_fn, &inst->timer_handle);
}
static double min15_loadavg_get(void)
{
#if defined(HAVE_LIBSTATGRAB)
sg_load_stats *load_stats;
load_stats = sg_get_load_stats ();
if (load_stats == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s",
sg_str_error (sg_get_error()));
return -1;
}
return load_stats->min15;
#else
#if defined(COROSYNC_LINUX)
double loadav[3];
if (getloadavg(loadav,3) < 0) {
return -1;
}
return loadav[2];
#else
#error need libstatgrab or linux.
#endif /* COROSYNC_LINUX */
#endif /* HAVE_LIBSTATGRAB */
}
static void load_update_stats_fn (void *data)
{
struct resource_instance * inst = (struct resource_instance *)data;
uint64_t timestamp;
char key_name[ICMAP_KEYNAME_MAXLEN];
double min15 = min15_loadavg_get();
if (min15 > 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
icmap_set_double(key_name, min15);
timestamp = cs_timestamp_get();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, timestamp);
if (min15 > inst->max.dbl && inst->fsm.curr_state != MON_S_FAILED) {
- cs_fsm_process (&inst->fsm, MON_E_FAILURE, &inst);
+ cs_fsm_process (&inst->fsm, MON_E_FAILURE, &inst, mon_fsm_cb);
}
}
api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
inst, inst->update_stats_fn, &inst->timer_handle);
}
static void mon_key_changed_cb (
int32_t event,
const char *key_name,
struct icmap_notify_value new_value,
struct icmap_notify_value old_value,
void *user_data)
{
struct resource_instance* inst = (struct resource_instance*)user_data;
char *last_key_part;
if (event == ICMAP_TRACK_DELETE && inst) {
log_printf (LOGSYS_LEVEL_WARNING,
"resource \"%s\" deleted from cmap!",
inst->name);
- cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
}
if (event == ICMAP_TRACK_MODIFY) {
last_key_part = strrchr(key_name, '.');
if (last_key_part == NULL)
return ;
last_key_part++;
if (strcmp(last_key_part, "max") == 0 ||
strcmp(last_key_part, "poll_period") == 0) {
ENTER();
- cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
}
}
}
static void mon_instance_init (struct resource_instance* inst)
{
uint64_t tmp_value;
char key_name[ICMAP_KEYNAME_MAXLEN];
icmap_track_t icmap_track = NULL;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
if (inst->max_type == ICMAP_VALUETYPE_INT32) {
icmap_set_int32(key_name, 0);
} else {
icmap_set_double(key_name, 0);
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, 0);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
icmap_set_string(key_name, mon_stopped_str);
inst->fsm.name = inst->name;
inst->fsm.curr_entry = 0;
inst->fsm.curr_state = MON_S_STOPPED;
inst->fsm.table = mon_fsm_table;
inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry);
inst->fsm.state_to_str = mon_res_state_to_str;
inst->fsm.event_to_str = mon_res_event_to_str;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
icmap_set_uint64(key_name, inst->period);
}
else {
if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
inst->period = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, inst->name);
}
}
- cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
icmap_track_add(inst->icmap_path,
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX,
mon_key_changed_cb, inst, &icmap_track);
}
static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
#ifdef HAVE_LIBSTATGRAB
sg_init();
#endif /* HAVE_LIBSTATGRAB */
#ifdef COROSYNC_SOLARIS
logsys_subsys_init();
#endif
api = corosync_api;
mon_instance_init (&memory_used_inst);
mon_instance_init (&load_15min_inst);
return NULL;
}
diff --git a/exec/wd.c b/exec/wd.c
index befa4d0f..cb8b34fc 100644
--- a/exec/wd.c
+++ b/exec/wd.c
@@ -1,709 +1,740 @@
/*
* Copyright (c) 2010-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <sys/reboot.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/coroapi.h>
#include <corosync/list.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
-#include "../exec/fsm.h"
+#include "fsm.h"
#include "service.h"
typedef enum {
WD_RESOURCE_GOOD,
WD_RESOURCE_FAILED,
WD_RESOURCE_STATE_UNKNOWN,
WD_RESOURCE_NOT_MONITORED
} wd_resource_state_t;
struct resource {
char res_path[ICMAP_KEYNAME_MAXLEN];
char *recovery;
char name[CS_MAX_NAME_LENGTH];
time_t last_updated;
struct cs_fsm fsm;
corosync_timer_handle_t check_timer;
uint64_t check_timeout;
icmap_track_t icmap_track;
};
LOGSYS_DECLARE_SUBSYS("WD");
/*
* Service Interfaces required by service_message_handler struct
*/
static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api);
static int wd_exec_exit_fn (void);
static void wd_resource_check_fn (void* resource_ref);
static struct corosync_api_v1 *api;
#define WD_DEFAULT_TIMEOUT_SEC 6
#define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
#define WD_MIN_TIMEOUT_MS 500
#define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC;
static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2);
static int dog = -1;
static corosync_timer_handle_t wd_timer;
static int watchdog_ok = 1;
struct corosync_service_engine wd_service_engine = {
.name = "corosync watchdog service",
.id = WD_SERVICE,
.priority = 1,
.private_data_size = 0,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.lib_init_fn = NULL,
.lib_exit_fn = NULL,
.lib_engine = NULL,
.lib_engine_count = 0,
.exec_engine = NULL,
.exec_engine_count = 0,
.confchg_fn = NULL,
.exec_init_fn = wd_exec_init_fn,
.exec_exit_fn = wd_exec_exit_fn,
.exec_dump_fn = NULL
};
static DECLARE_LIST_INIT (confchg_notify);
/*
* F S M
*/
static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
enum wd_resource_state {
WD_S_RUNNING,
WD_S_FAILED,
WD_S_STOPPED
};
enum wd_resource_event {
WD_E_FAILURE,
WD_E_CONFIG_CHANGED
};
const char * wd_running_str = "running";
const char * wd_failed_str = "failed";
const char * wd_failure_str = "failure";
const char * wd_stopped_str = "stopped";
const char * wd_config_changed_str = "config_changed";
struct cs_fsm_entry wd_fsm_table[] = {
{ WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} },
{ WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} },
{ WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
{ WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} },
{ WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
{ WD_S_FAILED, WD_E_FAILURE, NULL, {-1} },
};
struct corosync_service_engine *wd_get_service_engine_ver0 (void)
{
return (&wd_service_engine);
}
static const char * wd_res_state_to_str(struct cs_fsm* fsm,
int32_t state)
{
switch (state) {
case WD_S_STOPPED:
return wd_stopped_str;
break;
case WD_S_RUNNING:
return wd_running_str;
break;
case WD_S_FAILED:
return wd_failed_str;
break;
}
return NULL;
}
static const char * wd_res_event_to_str(struct cs_fsm* fsm,
int32_t event)
{
switch (event) {
case WD_E_CONFIG_CHANGED:
return wd_config_changed_str;
break;
case WD_E_FAILURE:
return wd_failure_str;
break;
}
return NULL;
}
+static void wd_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
+ int32_t next_state, int32_t fsm_event, void *data)
+{
+ switch (cb_event) {
+ case CS_FSM_CB_EVENT_PROCESS_NF:
+ log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
+ fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET:
+ log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
+ fsm->name,
+ fsm->event_to_str(fsm, fsm_event),
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state));
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET_NF:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
+ fsm->name,
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state),
+ fsm->event_to_str(fsm, fsm_event));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ default:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Unknown callback event!");
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ }
+}
+
/*
* returns (CS_TRUE == OK, CS_FALSE == failed)
*/
static int32_t wd_resource_state_is_ok (struct resource *ref)
{
char* state = NULL;
uint64_t last_updated;
uint64_t my_time;
uint64_t allowed_period;
char key_name[ICMAP_KEYNAME_MAXLEN];
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated");
if (icmap_get_uint64(key_name, &last_updated) != CS_OK) {
/* key does not exist.
*/
return CS_FALSE;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0) {
/* key does not exist.
*/
if (state != NULL)
free(state);
return CS_FALSE;
}
if (last_updated == 0) {
/* initial value */
free(state);
return CS_TRUE;
}
my_time = cs_timestamp_get();
/*
* Here we check that the monitor has written a timestamp within the poll_period
* plus a grace factor of (0.5 * poll_period).
*/
allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2;
if ((last_updated + allowed_period) < my_time) {
log_printf (LOGSYS_LEVEL_ERROR,
"last_updated %"PRIu64" ms too late, period:%"PRIu64".",
(uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)),
ref->check_timeout);
free(state);
return CS_FALSE;
}
if (strcmp (state, wd_failed_str) == 0) {
free(state);
return CS_FALSE;
}
free(state);
return CS_TRUE;
}
static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
{
char *state;
uint64_t tmp_value;
uint64_t next_timeout;
struct resource *ref = (struct resource*)data;
char key_name[ICMAP_KEYNAME_MAXLEN];
next_timeout = ref->check_timeout;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period");
if (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK) {
if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
log_printf (LOGSYS_LEVEL_DEBUG,
"poll_period changing from:%"PRIu64" to %"PRIu64".",
ref->check_timeout, tmp_value);
/*
* To easy in the transition between poll_period's we are going
* to make the first timeout the bigger of the new and old value.
* This is to give the monitoring system time to adjust.
*/
next_timeout = CS_MAX(tmp_value, ref->check_timeout);
ref->check_timeout = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, ref->name);
}
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery");
if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a recovery key.", ref->name);
- cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
return;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a state key.", ref->name);
- cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
return;
}
if (ref->check_timer) {
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
}
if (strcmp(wd_stopped_str, state) == 0) {
- cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
} else {
api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS,
ref, wd_resource_check_fn, &ref->check_timer);
- cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
}
free(state);
}
static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource* ref = (struct resource*)data;
if (ref->check_timer) {
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
}
log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
ref->recovery, (char*)ref->name);
if (strcmp (ref->recovery, "watchdog") == 0 ||
strcmp (ref->recovery, "quit") == 0) {
watchdog_ok = 0;
}
else if (strcmp (ref->recovery, "reboot") == 0) {
reboot(RB_AUTOBOOT);
}
else if (strcmp (ref->recovery, "shutdown") == 0) {
reboot(RB_POWER_OFF);
}
- cs_fsm_state_set(fsm, WD_S_FAILED, data);
+ cs_fsm_state_set(fsm, WD_S_FAILED, data, wd_fsm_cb);
}
static void wd_key_changed(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
struct resource* ref = (struct resource*)user_data;
char *last_key_part;
if (ref == NULL) {
return ;
}
last_key_part = strrchr(key_name, '.');
if (last_key_part == NULL) {
return ;
}
last_key_part++;
if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) {
if (strcmp(last_key_part, "last_updated") == 0 ||
strcmp(last_key_part, "current") == 0) {
return;
}
- cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref);
+ cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref, wd_fsm_cb);
}
if (event == ICMAP_TRACK_DELETE && ref != NULL) {
if (strcmp(last_key_part, "state") != 0) {
return ;
}
log_printf (LOGSYS_LEVEL_WARNING,
"resource \"%s\" deleted from cmap!",
ref->name);
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
icmap_track_delete(ref->icmap_track);
free(ref);
}
}
static void wd_resource_check_fn (void* resource_ref)
{
struct resource* ref = (struct resource*)resource_ref;
if (wd_resource_state_is_ok (ref) == CS_FALSE) {
- cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref);
+ cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref, wd_fsm_cb);
return;
}
api->timer_add_duration(ref->check_timeout*MILLI_2_NANO_SECONDS,
ref, wd_resource_check_fn, &ref->check_timer);
}
/*
* return 0 - fully configured
* return -1 - partially configured
*/
static int32_t wd_resource_create (char *res_path, char *res_name)
{
char *state;
uint64_t tmp_value;
struct resource *ref = calloc (1, sizeof (struct resource));
char key_name[ICMAP_KEYNAME_MAXLEN];
strcpy(ref->res_path, res_path);
ref->check_timeout = WD_DEFAULT_TIMEOUT_MS;
ref->check_timer = 0;
strcpy(ref->name, res_name);
ref->fsm.name = ref->name;
ref->fsm.table = wd_fsm_table;
ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
ref->fsm.curr_entry = 0;
ref->fsm.curr_state = WD_S_STOPPED;
ref->fsm.state_to_str = wd_res_state_to_str;
ref->fsm.event_to_str = wd_res_event_to_str;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
icmap_set_uint64(key_name, ref->check_timeout);
} else {
if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
ref->check_timeout = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, ref->name);
}
}
icmap_track_add(res_path,
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX,
wd_key_changed,
ref, &ref->icmap_track);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery");
if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a recovery key.", ref->name);
return -1;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a state key.", ref->name);
return -1;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
/* key does not exist.
*/
ref->last_updated = 0;
} else {
ref->last_updated = tmp_value;
}
/*
* delay the first check to give the monitor time to start working.
*/
tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS);
api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS,
ref,
wd_resource_check_fn, &ref->check_timer);
- cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref);
+ cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
return 0;
}
static void wd_tickle_fn (void* arg)
{
ENTER();
if (watchdog_ok) {
if (dog > 0) {
ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
}
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
}
else {
log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
}
}
static void wd_resource_created_cb(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
char res_name[ICMAP_KEYNAME_MAXLEN];
char res_type[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN];
int res;
if (event != ICMAP_TRACK_ADD) {
return ;
}
res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
if (res != 3) {
return ;
}
if (strcmp(tmp_key, "state") != 0) {
return ;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
wd_resource_create (tmp_key, res_name);
}
static void wd_scan_resources (void)
{
int res_count = 0;
icmap_track_t icmap_track = NULL;
icmap_iter_t iter;
const char *key_name;
int res;
char res_name[ICMAP_KEYNAME_MAXLEN];
char res_type[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN];
ENTER();
iter = icmap_iter_init("resources.");
while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
if (res != 3) {
continue ;
}
if (strcmp(tmp_key, "state") != 0) {
continue ;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
if (wd_resource_create (tmp_key, res_name) == 0) {
res_count++;
}
}
icmap_iter_finalize(iter);
icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
wd_resource_created_cb, NULL, &icmap_track);
icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
wd_resource_created_cb, NULL, &icmap_track);
if (res_count == 0) {
log_printf (LOGSYS_LEVEL_INFO, "no resources configured.");
}
}
static void watchdog_timeout_apply (uint32_t new)
{
struct watchdog_info ident;
uint32_t original_timeout = watchdog_timeout;
if (new == original_timeout) {
return;
}
watchdog_timeout = new;
if (dog > 0) {
ioctl(dog, WDIOC_GETSUPPORT, &ident);
if (ident.options & WDIOF_SETTIMEOUT) {
/* yay! the dog is trained.
*/
ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
}
ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
}
if (watchdog_timeout == new) {
tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2;
/* reset the tickle timer in case it was reduced.
*/
api->timer_delete (wd_timer);
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout);
log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout);
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could not change the Watchdog timeout from %d to %d seconds",
original_timeout, new);
}
}
static int setup_watchdog(void)
{
struct watchdog_info ident;
ENTER();
if (access ("/dev/watchdog", W_OK) != 0) {
log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe <a watchdog>");
dog = -1;
return -1;
}
/* here goes, lets hope they have "Magic Close"
*/
dog = open("/dev/watchdog", O_WRONLY);
if (dog == -1) {
log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't be opened.");
dog = -1;
return -1;
}
/* Right we have the dog.
* Lets see what breed it is.
*/
ioctl(dog, WDIOC_GETSUPPORT, &ident);
log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by corosync.");
log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
watchdog_timeout_apply (watchdog_timeout);
ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
return 0;
}
static void wd_top_level_key_changed(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
uint32_t tmp_value_32;
ENTER();
if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
watchdog_timeout_apply (tmp_value_32);
}
}
else {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
}
}
static void watchdog_timeout_get_initial (void)
{
uint32_t tmp_value_32;
icmap_track_t icmap_track = NULL;
ENTER();
if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
}
else {
if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
watchdog_timeout_apply (tmp_value_32);
} else {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
}
}
icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY,
wd_top_level_key_changed, NULL, &icmap_track);
}
static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
ENTER();
#ifdef COROSYNC_SOLARIS
logsys_subsys_init();
#endif
api = corosync_api;
watchdog_timeout_get_initial();
setup_watchdog();
wd_scan_resources();
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
return NULL;
}
static int wd_exec_exit_fn (void)
{
char magic = 'V';
ENTER();
if (dog > 0) {
log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog.");
write (dog, &magic, 1);
}
return 0;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Wed, Jun 4, 5:58 AM (6 h, 12 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1854710
Default Alt Text
(41 KB)
Attached To
Mode
rC Corosync
Attached
Detach File
Event Timeline
Log In to Comment