Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3152324
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
26 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/heartbeat/sfex b/heartbeat/sfex
index ecc986c44..7176a7422 100755
--- a/heartbeat/sfex
+++ b/heartbeat/sfex
@@ -1,286 +1,261 @@
#!/bin/sh
#
# Shared Disk File EXclusiveness (SF-EX) OCF RA.
# prevent a destruction of data on shared disk file system
# due to Split-Brain.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
# Copyright (c) 2007 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
#
# NOTE:
# As a prerequisite for running SF-EX, one device should be
# initialized as below.
#
# sfex_init [-n <numlocks>] <device>
#
# Example:
#
# /usr/sbin/sfex_init -n 10 /dev/sdb1
#
# if further information is necessary, See README.
#
#######################################################################
# Initialization:
# switching ocf-shellfuncs path
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs
unset LC_ALL; export LC_ALL
unset LANGUAGE; export LANGUAGE
#######################################################################
SFEX_DAEMON=${HA_BIN}/sfex_daemon
-SFEX_DAEMON_RE=${HA_BIN}/[s]fex_daemon
usage() {
cat <<END
usage: $0 {start|stop|monitor|meta-data}
END
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="sfex">
<version>1.3</version>
<longdesc lang="en">
Resource script for SF-EX. It manages a shared storage medium exclusively .
</longdesc>
<shortdesc lang="en">Manages exclusive acess to shared storage using Shared Disk File EXclusiveness (SF-EX)</shortdesc>
<parameters>
<parameter name="device" unique="0" required="1">
<longdesc lang="en">
Block device path that stores exclusive control data.
</longdesc>
<shortdesc lang="en">block device</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="index" unique="0" required="0">
<longdesc lang="en">
Location in block device where exclusive control data is stored. 1 or more is specified. Default is 1.
</longdesc>
<shortdesc lang="en">index</shortdesc>
<content type="integer" default="1" />
</parameter>
<parameter name="collision_timeout" unique="0" required="0">
<longdesc lang="en">
Waiting time when a collision of lock acquisition is detected. Default is 1 second.
</longdesc>
<shortdesc lang="en">waiting time for lock acquisition</shortdesc>
<content type="integer" default="1" />
</parameter>
<parameter name="monitor_interval" unique="0" required="0">
<longdesc lang="en">
Monitor interval(sec). Default is 10 seconds
</longdesc>
<shortdesc lang="en">monitor interval</shortdesc>
<content type="integer" default="10" />
</parameter>
<parameter name="lock_timeout" unique="0" required="0">
<longdesc lang="en">
Valid term of lock(sec). Default is 20 seconds.
</longdesc>
<shortdesc lang="en">Valid term of lock</shortdesc>
<content type="integer" default="20" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="600" />
<action name="stop" timeout="10" />
<action name="monitor" depth="0" timeout="10" interval="10" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#
# START: Exclusive control starts.
#
# It loops permanently until the lock can be acquired when locked with
# the other node. In this case, the reception of the stop signal by the
# timeout time passage set to CIB becomes the only stop opportunity.
#
sfex_start() {
- ocf_log info "sfex_daemon: starting..."
+ ocf_log info "sfex_daemon: starting..."
sfex_monitor
if [ $? -eq $OCF_SUCCESS ]; then
ocf_log info "sfex_daemon already started."
return $OCF_SUCCESS
fi
- $SFEX_DAEMON -i $INDEX -c $COLLISION_TIMEOUT -t $LOCK_TIMEOUT -m $MONITOR_INTERVAL -r ${OCF_RESOURCE_INSTANCE} -d ${HA_RSCTMP}/pid-of-${OCF_RESOURCE_INSTANCE} $DEVICE
+ $SFEX_DAEMON -i $INDEX -c $COLLISION_TIMEOUT -t $LOCK_TIMEOUT -m $MONITOR_INTERVAL -r ${OCF_RESOURCE_INSTANCE} $DEVICE
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log err "sfex_daemon failed to start"
+ ocf_log err "sfex_daemon failed to start."
return $OCF_ERR_GENERIC
fi
sleep 2
sfex_monitor
if [ $? -eq $OCF_SUCCESS ]; then
ocf_log info "sfex_daemon: started."
return $OCF_SUCCESS
fi
- ocf_log err "sfex_daemon failed to write pid file in ${HA_RSCTMP}/pid-of-${OCF_RESOURCE_INSTANCE}"
+ ocf_log err "Can't find a sfex_daemon process. Starting a sfex_daemon failed."
return $OCF_ERR_GENERIC
-
}
#
# STOP: stop exclusive control
#
sfex_stop() {
- ocf_log info "sfex_daemon: stopping..."
+ ocf_log info "sfex_daemon: stopping..."
- # Confirming whether the PID file exists.
- # If the PID file is lost, then a specific sfex_daemon cannot be stopped.
- if [ ! -f "${HA_RSCTMP}/pid-of-${OCF_RESOURCE_INSTANCE}" ]; then
- ps -ef | grep "$SFEX_DAEMON_RE.*$DEVICE" ||
- return $OCF_SUCCESS
- ocf_log err "Cannot stop sfex_daemon because PID file lost."
- return $OCF_ERR_GENERIC
- fi
+ # Check the sfex daemon has already stopped.
+ sfex_monitor
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ ocf_log info "sfex_daemon already stopped."
+ return $OCF_SUCCESS
+ fi
- # Check the sfex daemon has already stopped.
- sfex_monitor
- if [ $? -eq $OCF_NOT_RUNNING ]; then
- ocf_log info "sfex_daemon already stopped."
- # Delete PID file.
- /bin/rm -f ${HA_RSCTMP}/pid-of-${OCF_RESOURCE_INSTANCE}
- return $OCF_SUCCESS
- fi
-
- # Stop sfex daemon by sending SIGTERM signal.
- /bin/kill `cat ${HA_RSCTMP}/pid-of-${OCF_RESOURCE_INSTANCE}`
- rc=$?
- if [ $rc -ne 0 ]; then
- ocf_log err "sfex_daemon failed to stop"
- return $rc
- fi
+ # Stop sfex daemon by sending SIGTERM signal.
+ pid=`/usr/bin/pgrep -f "$SFEX_DAEMON .* ${OCF_RESOURCE_INSTANCE} "`
+ /bin/kill $pid
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log err "sfex_daemon failed to stop"
+ return $rc
+ fi
#sfex could be in state D if the device is gone, and then not terminate.
#Wait and check again if the daemon is already properly shutdown.
sleep 4
sfex_monitor
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "sfex_daemon failed to stop"
return $rc
fi
- ocf_log info "sfex_daemon: stopped."
- # Delete PID file.
- /bin/rm -f ${HA_RSCTMP}/pid-of-${OCF_RESOURCE_INSTANCE}
- return $OCF_SUCCESS
+ ocf_log info "sfex_daemon: stopped."
+ return $OCF_SUCCESS
}
sfex_monitor() {
- ocf_log debug "sfex_monitor: started..."
-
-# if [ "${OCF_RESKEY_CRM_meta_interval:-0}" -eq "0" ]; then
-# # in case of probe, monitor operation is surely treated as
-# # under suspension. This will call start operation.
-# ocf_log info "probe..."
-# return $OCF_NOT_RUNNING
-# fi
+ ocf_log debug "sfex_monitor: started..."
- # Confirming whether the PID file exists.
- if [ -f "${HA_RSCTMP}/pid-of-${OCF_RESOURCE_INSTANCE}" ]; then
- # Confirming whether the sfex_daemon process exists.
- if /usr/bin/pgrep -f "$SFEX_DAEMON" | grep \
- `cat ${HA_RSCTMP}/pid-of-${OCF_RESOURCE_INSTANCE}` > /dev/null 2>&1; then
- ocf_log debug "sfex_monitor: complete. sfex_daemon is running."
- return $OCF_SUCCESS
- fi
- fi
+ # Find a sfex_daemon process using daemon name and resource name.
+ if /usr/bin/pgrep -f "$SFEX_DAEMON .* ${OCF_RESOURCE_INSTANCE} " > /dev/null 2>&1; then
+ ocf_log debug "sfex_monitor: complete. sfex_daemon is running."
+ return $OCF_SUCCESS
+ fi
- ocf_log debug "sfex_monitor: complete. sfex_daemon is not running."
- return $OCF_NOT_RUNNING
+ ocf_log debug "sfex_monitor: complete. sfex_daemon is not running."
+ return $OCF_NOT_RUNNING
}
#
# main process
#
# check arguments
if [ $# -ne 1 ]; then
- usage
- exit $OCF_ERR_ARGS
+ usage
+ exit $OCF_ERR_ARGS
fi
OP=$1
# These operations do not require instance parameters
case $OP in
- meta-data)
- meta_data
- exit $OCF_SUCCESS
- ;;
- usage)
- usage
- exit $OCF_SUCCESS
- ;;
+ meta-data)
+ meta_data
+ exit $OCF_SUCCESS
+ ;;
+ usage)
+ usage
+ exit $OCF_SUCCESS
+ ;;
esac
# check parameters
DEVICE=$OCF_RESKEY_device
INDEX=${OCF_RESKEY_index:-1}
COLLISION_TIMEOUT=${OCF_RESKEY_collision_timeout:-1}
LOCK_TIMEOUT=${OCF_RESKEY_lock_timeout:-20}
MONITOR_INTERVAL=${OCF_RESKEY_monitor_interval:-10}
sfex_validate () {
if [ -z "$DEVICE" ]; then
- ocf_log err "Please set OCF_RESKEY_device to device for sfex meta-data"
- exit $OCF_ERR_ARGS
+ ocf_log err "Please set OCF_RESKEY_device to device for sfex meta-data"
+ exit $OCF_ERR_ARGS
fi
if [ ! -w "$DEVICE" ]; then
- ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
- exit $OCF_ERR_ARGS
+ ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
+ exit $OCF_ERR_ARGS
fi
}
if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then
- ocf_log err "THIS RA DO NOT SUPPORT CLONE MODE!"
- exit $OCF_ERR_CONFIGURED
+ ocf_log err "THIS RA DO NOT SUPPORT CLONE MODE!"
+ exit $OCF_ERR_CONFIGURED
fi
case $OP in
- start)
- sfex_start
- ;;
- stop)
- sfex_stop
- ;;
- monitor)
- sfex_monitor
- ;;
- validate-all)
- sfex_validate
- ;;
- *)
- exit $OCF_ERR_UNIMPLEMENTED
- ;;
+ start)
+ sfex_start
+ ;;
+ stop)
+ sfex_stop
+ ;;
+ monitor)
+ sfex_monitor
+ ;;
+ validate-all)
+ sfex_validate
+ ;;
+ *)
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
esac
exit $?
diff --git a/tools/sfex.h b/tools/sfex.h
index 652df96e3..9ecb528d7 100644
--- a/tools/sfex.h
+++ b/tools/sfex.h
@@ -1,184 +1,183 @@
/*-------------------------------------------------------------------------
*
* Shared Disk File EXclusiveness Control Program(SF-EX)
*
* sfex.h --- Primary include file for SF-EX *.c files.
*
* Copyright (c) 2007 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* $Id$
*
*-------------------------------------------------------------------------*/
#ifndef SFEX_H
#define SFEX_H
#include <clplumbing/cl_log.h>
#include <clplumbing/coredumps.h>
#include <clplumbing/realtime.h>
-#include <clplumbing/cl_pidfile.h>
#include <stdint.h>
/* version, revision */
/* These numbers are integer and, max number is 999.
If these numbers change, version numbers in the configure.ac
(AC_INIT, AM_INIT_AUTOMAKE) must change together.
*/
#define SFEX_VERSION 1
#define SFEX_REVISION 3
#if 0
#ifndef TRUE
# define TRUE 1
#endif
#ifndef FALSE
# define FALSE 0
#endif
#ifndef MIN
# define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef MAX
# define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
#endif
/* for Linux >= 2.6, the alignment should be 512
for Linux < 2.6, the alignment should be sysconf(_SC_PAGESIZE)
we default to _SC_PAGESIZE
*/
#define SFEX_ODIRECT_ALIGNMENT sysconf(_SC_PAGESIZE)
/*
* sfex_controldata --- control data
*
* This is allocated the head of sfex mata-data area.
*
* magic number --- 4 bytes. This is fixed in {0x01, 0x1f, 0x71, 0x7f}.
*
* version number --- 4 bytes. This is printable integer number and
* range is from 0 to 999. This must be left-justify, null(0x00) padding, and
* make a last byte null.
*
* revision number --- 4 bytes. This is printable integer number and
* range is from 0 to 999. This must be left-justify, null(0x00) padding, and
* make a last byte null.
*
* blocksize --- 8bytes. This is printable integer number and range is from
* 512 to 9999999. This must be left-justify, null(0x00) padding, and make a
* last byte null. This is a size of control data and lock data(one lock data
* size when there are plural), and it is shown by number of bytes.
* For avoiding partial writing, usually block size is set 512 byte etc.
* If you use direct I/O(if you spacificate --enable-directio for configure
* script), note that this value is used for input and output buffer alignment.
* (In the Linux kernel 2.6, if this value is not 512 multibles, direct I/O
* does not work)
* number of locks --- 4 bytes. This is printable integer number and range
* is from 1 to 999. This must be left-justify, null(0x00) padding, and make
* a last byte null. This is the number of locks following this control data.
*
* padding --- The size of this member depend on blocksize. It is adjusted so
* that the whole of the control data including this padding area becomes
* blocksize. The contents of padding area are all 0x00.
*/
typedef struct sfex_controldata {
char magic[4]; /* magic number */
int version; /* version number */
int revision; /* revision number */
size_t blocksize; /* block size */
int numlocks; /* number of locks */
} sfex_controldata;
typedef struct sfex_controldata_ondisk {
uint8_t magic[4];
uint8_t version[4];
uint8_t revision[4];
uint8_t blocksize[8];
uint8_t numlocks[4];
} sfex_controldata_ondisk;
/*
* sfex_lockdata --- lock data
*
* This data(number is sfex_controldata.numlocks) are allocated behind of
* sfex_controldata in the sfex meta-data area. The meaning of each member
* and the storage method to mata data area are following;
*
* lock status --- 1 byte. printable character. Content is either one of
* following;
* SFEX_STATUS_UNLOCK: It show the status that no node locks.
* SFEX_STATUS_LOCK: It show the status that nodename node is holding lock.
* (But there is an exception. Refer to explanation of "count" member.)
*
* increment counter --- 4 bytes. This is printable integer number and range
* is from 1 to 999. This must be left-justify, null(0x00) padding, and make
* a last byte null. The node holding a lock increments this counter
* periodically. If this counter does not increment for a certain period of
* time, we consider that the lock is invalid. If it overflow, return to 0.
* Initial value is 0.
*
* node name --- 256bytes. This is printable string. This must be left-justify,
* null(0x00) padding, and make a last byte null. This is node name that update
* lock data last. The node name must be same to get uname(2). Initial values
* are white spaces.
*
* padding --- The size of this member depend on blocksize. It is adjusted so
* that the whole of the control data including this padding area becomes
* blocksize. The contents of padding area are all 0x00.
*/
typedef struct sfex_lockdata {
char status; /* status of lock */
int count; /* increment counter */
char nodename[256]; /* node name */
} sfex_lockdata;
typedef struct sfex_lockdata_ondisk {
uint8_t status;
uint8_t count[4];
uint8_t nodename[256];
} sfex_lockdata_ondisk;
/* character for lock status. This is used in sfex_lockdata.status */
#define SFEX_STATUS_UNLOCK 'u' /* unlock */
#define SFEX_STATUS_LOCK 'l' /* lock */
/* features of each member of control data and lock data */
#define SFEX_MAGIC "SFEX"
#define SFEX_MIN_NUMLOCKS 1
#define SFEX_MAX_NUMLOCKS 999
#define SFEX_MIN_COUNT 0
#define SFEX_MAX_COUNT 999
#define SFEX_MAX_NODENAME (sizeof(((sfex_lockdata *)0)->nodename) - 1)
/* update macro for increment counter */
#define SFEX_NEXT_COUNT(c) (c >= SFEX_MAX_COUNT ? c - SFEX_MAX_COUNT : c + 1)
/* extern variables */
extern const char *progname;
extern char *nodename;
extern unsigned long sector_size;
#define SFEX_LOG_ERR(args...) cl_log(LOG_ERR, args)
#define SFEX_LOG_INFO(args...) cl_log(LOG_INFO, args)
#if 0
#define SFEX_LOG_ERR(args...) do {fprintf(stderr, args);} while (0)
#define SFEX_LOG_INFO(args...) do {fprintf(stderr, args);} while (0)
#endif
#endif /* SFEX_H */
diff --git a/tools/sfex_daemon.c b/tools/sfex_daemon.c
index b37a0893f..d9ad15a30 100644
--- a/tools/sfex_daemon.c
+++ b/tools/sfex_daemon.c
@@ -1,376 +1,360 @@
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <limits.h>
#include <sys/mman.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <syslog.h>
#include "sfex.h"
#include "sfex_lib.h"
static int sysrq_fd;
static int lock_index = 1; /* default 1st lock */
static time_t collision_timeout = 1; /* default 1 sec */
static time_t lock_timeout = 60; /* default 60 sec */
time_t unlock_timeout = 60;
static time_t monitor_interval = 10;
static sfex_controldata cdata;
static sfex_lockdata ldata;
static sfex_lockdata ldata_new;
static const char *device;
const char *progname;
char *nodename;
static const char *rsc_id = "sfex";
-static const char *rscpidfile;
static void usage(FILE *dist) {
fprintf(dist, "usage: %s [-i <index>] [-c <collision_timeout>] [-t <lock_timeout>] <device>\n", progname);
}
static int lock_index_check(void)
{
if (read_controldata(&cdata) == -1) {
SFEX_LOG_ERR("%s\n", "read_controldata failed in lock_index_check");
return -1;
}
#ifdef SFEX_DEBUG
SFEX_LOG_INFO("version: %d\n", cdata.version);
SFEX_LOG_INFO("revision: %d\n", cdata.revision);
SFEX_LOG_INFO("blocksize: %d\n", cdata.blocksize);
SFEX_LOG_INFO("numlocks: %d\n", cdata.numlocks);
#endif
if (lock_index > cdata.numlocks) {
SFEX_LOG_ERR("%s: ERROR: index %d is too large. %d locks are stored.\n",
progname, lock_index, cdata.numlocks);
return -1;
/*exit(EXIT_FAILURE);*/
}
if (cdata.blocksize != sector_size) {
SFEX_LOG_ERR("%s: ERROR: sector_size is not the same as the blocksize.\n", progname);
return -1;
}
return 0;
}
static void acquire_lock(void)
{
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
SFEX_LOG_ERR("%s\n", "read_lockdata failed in acquire_lock");
exit(EXIT_FAILURE);
}
if ((ldata.status == SFEX_STATUS_LOCK) && (strncmp(nodename, (const char*)(ldata.nodename), sizeof(ldata.nodename)))) {
unsigned int t = lock_timeout;
while (t > 0)
t = sleep(t);
read_lockdata(&cdata, &ldata_new, lock_index);
if (ldata.count != ldata_new.count) {
SFEX_LOG_ERR("%s", "can\'t acquire lock: the lock's already hold by some other node.\n");
exit(2);
}
}
/* The lock acquisition is possible because it was not updated. */
ldata.status = SFEX_STATUS_LOCK;
ldata.count = SFEX_NEXT_COUNT(ldata.count);
strncpy((char*)(ldata.nodename), nodename, sizeof(ldata.nodename));
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
SFEX_LOG_ERR("%s", "write_lockdata failed\n");
exit(EXIT_FAILURE);
}
/* detect the collision of lock */
/* The collision occurs when two or more nodes do the reservation
processing of the lock at the same time. It waits for collision_timeout
seconds to detect this,and whether the superscription of lock data by
another node is done is checked. If the superscription was done by
another node, the lock acquisition with the own node is given up.
*/
{
unsigned int t = collision_timeout;
while (t > 0)
t = sleep(t);
if (read_lockdata(&cdata, &ldata_new, lock_index) == -1) {
SFEX_LOG_ERR("%s", "read_lockdata failed\n");
}
if (strncmp((char*)(ldata.nodename), (const char*)(ldata_new.nodename), sizeof(ldata.nodename))) {
SFEX_LOG_ERR("%s", "can\'t acquire lock: collision detected in the air.\n");
exit(2);
}
}
/* extension of lock */
/* Validly time of the lock is extended. It is because of spending at
the collision_timeout seconds to detect the collision. */
ldata.count = SFEX_NEXT_COUNT(ldata.count);
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
SFEX_LOG_ERR("%s\n", "write_lockdata failed");
exit(EXIT_FAILURE);
}
SFEX_LOG_ERR("%s", "lock acquired\n");
}
static void error_todo (void)
{
if (fork() == 0) {
execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL);
} else {
exit(EXIT_FAILURE);
}
}
static void failure_todo(void)
{
#ifdef SFEX_TESTING
exit(EXIT_FAILURE);
#else
/*execl("/usr/sbin/crm_resource", "crm_resource", "-F", "-r", rsc_id, "-H", nodename, NULL); */
int ret;
ret = write(sysrq_fd, "b\n", 2);
if (ret == -1) {
SFEX_LOG_ERR("%s\n", strerror(errno));
}
close(sysrq_fd);
exit(EXIT_FAILURE);
#endif
}
static void update_lock(void)
{
/* read lock data */
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
error_todo();
exit(EXIT_FAILURE);
}
/* check current lock status */
/* if own node is not locking, lock update is failed */
if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
SFEX_LOG_ERR("can't update lock.\n");
failure_todo();
exit(EXIT_FAILURE);
}
/* lock update */
ldata.count = SFEX_NEXT_COUNT(ldata.count);
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
error_todo();
exit(EXIT_FAILURE);
}
}
static void release_lock(void)
{
/* The only thing I care about in release_lock(), is to terminate the process */
/* read lock data */
if (read_lockdata(&cdata, &ldata, lock_index) == -1) {
exit(EXIT_FAILURE);
}
/* check current lock status */
/* if own node is not locking, we judge that lock has been released already */
if (ldata.status != SFEX_STATUS_LOCK || strncmp((const char*)(ldata.nodename), nodename, sizeof(ldata.nodename))) {
SFEX_LOG_ERR("lock was already released.\n");
exit(1);
}
/* lock release */
ldata.status = SFEX_STATUS_UNLOCK;
if (write_lockdata(&cdata, &ldata, lock_index) == -1) {
/*FIXME: We are going to self-stop */
exit(EXIT_FAILURE);
}
SFEX_LOG_INFO("lock released\n");
}
static void quit_handler(int signo, siginfo_t *info, void *context)
{
SFEX_LOG_INFO("quit_handler\n");
release_lock();
exit(EXIT_SUCCESS);
}
int main(int argc, char *argv[])
{
int ret;
progname = get_progname(argv[0]);
nodename = get_nodename();
#if 0
openlog("SFex Daemon", LOG_PID|LOG_CONS|LOG_NDELAY, LOG_USER);
#endif
/* read command line option */
opterr = 0;
while (1) {
- int c = getopt(argc, argv, "hi:c:t:m:n:r:d:");
+ int c = getopt(argc, argv, "hi:c:t:m:n:r:");
if (c == -1)
break;
switch (c) {
case 'h': /* help*/
usage(stdout);
exit(0);
case 'i': /* -i <index> */
{
unsigned long l = strtoul(optarg, NULL, 10);
if (l < SFEX_MIN_NUMLOCKS || l > SFEX_MAX_NUMLOCKS) {
SFEX_LOG_ERR(
"%s: ERROR: index %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
progname, optarg,
(unsigned long)SFEX_MIN_NUMLOCKS,
(unsigned long)SFEX_MAX_NUMLOCKS);
exit(4);
}
lock_index = l;
}
break;
case 'c': /* -c <collision_timeout> */
{
unsigned long l = strtoul(optarg, NULL, 10);
if (l < 1 || l > INT_MAX) {
SFEX_LOG_ERR(
"%s: ERROR: collision_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
progname, optarg,
(unsigned long)1,
(unsigned long)INT_MAX);
exit(4);
}
collision_timeout = l;
}
break;
case 'm': /* -m <monitor_interval> */
{
unsigned long l = strtoul(optarg, NULL, 10);
if (l < 1 || l > INT_MAX) {
SFEX_LOG_ERR(
"%s: ERROR: monitor_interval %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
progname, optarg,
(unsigned long)1,
(unsigned long)INT_MAX);
exit(4);
}
monitor_interval = l;
}
break;
case 't': /* -t <lock_timeout> */
{
unsigned long l = strtoul(optarg, NULL, 10);
if (l < 1 || l > INT_MAX) {
SFEX_LOG_ERR(
"%s: ERROR: lock_timeout %s is out of range or invalid. it must be integer value between %lu and %lu.\n",
progname, optarg,
(unsigned long)1,
(unsigned long)INT_MAX);
exit(4);
}
lock_timeout = l;
}
break;
case 'n':
{
free(nodename);
if (strlen(optarg) > SFEX_MAX_NODENAME) {
SFEX_LOG_ERR("%s: ERROR: nodename %s is too long. must be less than %d byte.\n",
progname, optarg,
(unsigned int)SFEX_MAX_NODENAME);
exit(EXIT_FAILURE);
}
nodename = strdup(optarg);
}
break;
case 'r':
{
rsc_id = strdup(optarg);
}
break;
- case 'd':
- {
- rscpidfile = strdup(optarg);
- }
- break;
case '?': /* error */
usage(stderr);
exit(4);
}
}
/* check parameter except the option */
if (optind >= argc) {
SFEX_LOG_ERR("%s: ERROR: no device specified.\n", progname);
usage(stderr);
exit(EXIT_FAILURE);
} else if (optind + 1 < argc) {
SFEX_LOG_ERR("%s: ERROR: too many arguments.\n", progname);
usage(stderr);
exit(EXIT_FAILURE);
}
device = argv[optind];
- if (rscpidfile == NULL) {
- SFEX_LOG_ERR("%s: ERROR: Directory for saving pid file is not specified.\n", progname);
- exit(EXIT_FAILURE);
- }
-
prepare_lock(device);
#if !SFEX_TESTING
sysrq_fd = open("/proc/sysrq-trigger", O_WRONLY);
if (sysrq_fd == -1) {
SFEX_LOG_ERR("failed to open /proc/sysrq-trigger due to %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
#endif
ret = lock_index_check();
if (ret == -1)
exit(EXIT_FAILURE);
{
struct sigaction sig_act;
sigemptyset (&sig_act.sa_mask);
sig_act.sa_flags = SA_SIGINFO;
sig_act.sa_sigaction = quit_handler;
ret = sigaction(SIGTERM, &sig_act, NULL);
if (ret == -1) {
SFEX_LOG_ERR("sigaction failed\n");
exit(EXIT_FAILURE);
}
}
SFEX_LOG_INFO("Starting SFeX Daemon...\n");
/* acquire lock first.*/
acquire_lock();
if (daemon(0, 1) != 0) {
cl_perror("%s::%d: daemon() failed.", __FUNCTION__, __LINE__);
release_lock();
exit(EXIT_FAILURE);
}
- if (cl_lock_pidfile(rscpidfile) < 0) {
- SFEX_LOG_ERR("Creating pidfile failed.");
- release_lock();
- exit(EXIT_FAILURE);
- }
cl_make_realtime(-1, -1, 128, 128);
SFEX_LOG_INFO("SFeX Daemon started.\n");
while (1) {
sleep (monitor_interval);
update_lock();
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Feb 24, 8:21 PM (5 h, 52 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1464435
Default Alt Text
(26 KB)
Attached To
Mode
rR Resource Agents
Attached
Detach File
Event Timeline
Log In to Comment