Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/cman/cman_tool/cman_tool.h b/cman/cman_tool/cman_tool.h
index fa55edbd1..7476e1f6a 100644
--- a/cman/cman_tool/cman_tool.h
+++ b/cman/cman_tool/cman_tool.h
@@ -1,99 +1,100 @@
#ifndef __CMAN_TOOL_DOT_H__
#define __CMAN_TOOL_DOT_H__
#include <sys/types.h>
#include <sys/uio.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/utsname.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <netdb.h>
#include <limits.h>
#include <unistd.h>
extern char *prog_name;
#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif
#define die(fmt, args...) \
do { \
fprintf(stderr, "%s: ", prog_name); \
fprintf(stderr, fmt "\n", ##args); \
exit(EXIT_FAILURE); \
} while (0)
#define DEFAULT_VOTES 1
#define MAX_INTERFACES 10
#define MAX_FORMAT_OPTS 10
#define MAX_NODE_NAME_LEN 65
#define MAX_MCAST_NAME_LEN 256
#define MAX_PATH_LEN 256
#define DEBUG_STARTUP_ONLY 32
enum format_opt
{
FMT_NONE,
FMT_ID,
FMT_NAME,
FMT_TYPE,
FMT_ADDR,
};
struct commandline
{
int operation;
int num_nodenames;
char *multicast_addr;
char *nodenames[MAX_INTERFACES];
char *interfaces[MAX_INTERFACES];
char *override_nodename;
char *key_filename;
char *filename;
char *format_opts;
char *config_lcrso;
int votes;
int expected_votes;
int two_node;
int port;
char clustername[MAX_CLUSTER_NAME_LEN];
int remove;
int force;
int verbose;
int nodeid;
int timeout;
unsigned int config_version;
int config_version_opt;
int votes_opt;
int expected_votes_opt;
int port_opt;
int nodeid_opt;
int clustername_opt;
int wait_opt;
int wait_quorate_opt;
int fence_opt;
int addresses_opt;
int noconfig_opt;
int nosetpri_opt;
+ int noopenais_opt;
};
typedef struct commandline commandline_t;
int join(commandline_t *comline, char *envp[]);
char *cman_error(int err);
#endif /* __CMAN_TOOL_DOT_H__ */
diff --git a/cman/cman_tool/join.c b/cman/cman_tool/join.c
index 98ed2f264..72462d24e 100644
--- a/cman/cman_tool/join.c
+++ b/cman/cman_tool/join.c
@@ -1,251 +1,254 @@
#include <sys/wait.h>
#include <stdint.h>
#include <signal.h>
#include <netinet/in.h>
#include "libcman.h"
#include "cman_tool.h"
#define MAX_ARGS 128
static char *argv[MAX_ARGS];
static char *envp[MAX_ARGS];
static void be_daemon(int close_stderr)
{
int devnull = open("/dev/null", O_RDWR);
if (devnull == -1) {
perror("Can't open /dev/null");
exit(3);
}
/* Detach ourself from the calling environment */
if (close(0) || close(1)) {
die("Error closing terminal FDs");
}
if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0) {
die("Error setting terminal FDs to /dev/null: %m");
}
if (close_stderr) {
if (close(2)) {
die("Error closing stderr FD");
}
if (!dup2(devnull, 2) < 0) {
die("Error setting stderr FD to /dev/null: %m");
}
}
setsid();
}
int join(commandline_t *comline, char *main_envp[])
{
int i, err;
int envptr = 0;
int argvptr = 0;
char scratch[1024];
cman_handle_t h;
int status;
pid_t corosync_pid;
int p[2];
/*
* If we can talk to cman then we're already joined (or joining);
*/
h = cman_admin_init(NULL);
if (h)
die("Node is already active");
/* Set up environment variables for override */
if (comline->multicast_addr) {
snprintf(scratch, sizeof(scratch), "CMAN_MCAST_ADDR=%s", comline->multicast_addr);
envp[envptr++] = strdup(scratch);
}
if (comline->votes_opt) {
snprintf(scratch, sizeof(scratch), "CMAN_VOTES=%d", comline->votes);
envp[envptr++] = strdup(scratch);
}
if (comline->expected_votes_opt) {
snprintf(scratch, sizeof(scratch), "CMAN_EXPECTEDVOTES=%d", comline->expected_votes);
envp[envptr++] = strdup(scratch);
}
if (comline->port) {
snprintf(scratch, sizeof(scratch), "CMAN_IP_PORT=%d", comline->port);
envp[envptr++] = strdup(scratch);
}
if (comline->nodeid) {
snprintf(scratch, sizeof(scratch), "CMAN_NODEID=%d", comline->nodeid);
envp[envptr++] = strdup(scratch);
}
if (comline->clustername_opt) {
snprintf(scratch, sizeof(scratch), "CMAN_CLUSTER_NAME=%s", comline->clustername);
envp[envptr++] = strdup(scratch);
}
if (comline->nodenames[0]) {
snprintf(scratch, sizeof(scratch), "CMAN_NODENAME=%s", comline->nodenames[0]);
envp[envptr++] = strdup(scratch);
}
if (comline->key_filename) {
snprintf(scratch, sizeof(scratch), "CMAN_KEYFILE=%s", comline->key_filename);
envp[envptr++] = strdup(scratch);
}
if (comline->two_node) {
snprintf(scratch, sizeof(scratch), "CMAN_2NODE=true");
envp[envptr++] = strdup(scratch);
}
if (comline->verbose ^ DEBUG_STARTUP_ONLY) {
snprintf(scratch, sizeof(scratch), "CMAN_DEBUGLOG=%d", comline->verbose);
envp[envptr++] = strdup(scratch);
}
if (comline->noconfig_opt) {
envp[envptr++] = strdup("CMAN_NOCONFIG=true");
- envp[envptr++] = strdup("COROSYNC_DEFAULT_CONFIG_IFACE=cmanpreconfig");
+ snprintf(scratch, sizeof(scratch), "COROSYNC_DEFAULT_CONFIG_IFACE=cmanpreconfig%s",
+ comline->noopenais_opt?"":":openaisserviceenable");
+ envp[envptr++] = strdup(scratch);
}
else {
- snprintf(scratch, sizeof(scratch), "COROSYNC_DEFAULT_CONFIG_IFACE=%s:cmanpreconfig", comline->config_lcrso);
+ snprintf(scratch, sizeof(scratch), "COROSYNC_DEFAULT_CONFIG_IFACE=%s:cmanpreconfig%s", comline->config_lcrso,
+ comline->noopenais_opt?"":":openaisserviceenable");
envp[envptr++] = strdup(scratch);
}
/* Copy any COROSYNC_* env variables to the new daemon */
i=0;
while (i < MAX_ARGS && main_envp[i]) {
if (strncmp(main_envp[i], "COROSYNC_", 9) == 0)
envp[envptr++] = main_envp[i];
i++;
}
/* Create a pipe to monitor cman startup progress */
if (pipe(p) < 0)
die("unable to create pipe: %s", strerror(errno));
fcntl(p[1], F_SETFD, 0); /* Don't close on exec */
snprintf(scratch, sizeof(scratch), "CMAN_PIPE=%d", p[1]);
envp[envptr++] = strdup(scratch);
envp[envptr++] = NULL;
argv[0] = "corosync";
if (comline->verbose & ~DEBUG_STARTUP_ONLY)
argv[++argvptr] = "-f";
if (comline->nosetpri_opt)
argv[++argvptr] = "-p";
argv[++argvptr] = NULL;
/* Fork/exec cman */
switch ( (corosync_pid = fork()) )
{
case -1:
die("fork of corosync daemon failed: %s", strerror(errno));
case 0: /* child */
close(p[0]);
if (comline->verbose & DEBUG_STARTUP_ONLY) {
fprintf(stderr, "Starting %s", COROSYNCBIN);
for (i=0; i< argvptr; i++) {
fprintf(stderr, " %s", argv[i]);
}
fprintf(stderr, "\n");
for (i=0; i<envptr-1; i++) {
fprintf(stderr, "%s\n", envp[i]);
}
}
be_daemon(!(comline->verbose & ~DEBUG_STARTUP_ONLY));
execve(COROSYNCBIN, argv, envp);
/* exec failed - tell the parent process */
sprintf(scratch, "execve of " COROSYNCBIN " failed: %s", strerror(errno));
err = write(p[1], scratch, strlen(scratch));
exit(1);
break;
default: /* parent */
break;
}
/* Give the daemon a chance to start up, and monitor the pipe FD for messages */
i = 0;
close(p[1]);
/* Wait for the process to start or die */
sleep(1);
do {
fd_set fds;
struct timeval tv={1, 0};
char message[1024];
FD_ZERO(&fds);
FD_SET(p[0], &fds);
status = select(p[0]+1, &fds, NULL, NULL, &tv);
/* Did we get an error? */
if (status == 1) {
int len;
if ((len = read(p[0], message, sizeof(message)) > 0)) {
/* Success! get the new PID of double-forked corosync */
if (sscanf(message, "SUCCESS: %d", &corosync_pid) == 1) {
if (comline->verbose & DEBUG_STARTUP_ONLY)
fprintf(stderr, "corosync running, process ID is %d\n", corosync_pid);
status = 0;
}
else {
fprintf(stderr, "cman not started: %s\n", message);
}
break;
}
else if (len < 0 && errno == EINTR) {
continue;
}
else { /* Error or EOF - check the child status */
int pidstatus;
status = waitpid(corosync_pid, &pidstatus, WNOHANG);
if (status == -1 && errno == ECHILD) {
fprintf(stderr, "cman not started\n");
break;
}
if (status == 0 && pidstatus != 0) {
if (WIFEXITED(pidstatus))
fprintf(stderr, "corosync died with status: %d\n", WEXITSTATUS(pidstatus));
if (WIFSIGNALED(pidstatus))
fprintf(stderr, "corosync died with signal: %d\n", WTERMSIG(pidstatus));
status = -1;
break;
}
else {
status = 0; /* Try to connect */
}
}
}
} while (status != 0);
close(p[0]);
/* If corosync has started, try to connect to cman ... if it's still there */
if (status == 0) {
do {
if (status == 0) {
if (kill(corosync_pid, 0) < 0) {
die("corosync died during startup\n");
}
h = cman_admin_init(NULL);
if (!h && comline->verbose & DEBUG_STARTUP_ONLY)
{
fprintf(stderr, "waiting for corosync to start\n");
}
}
sleep (1);
} while (!h && ++i < 100);
}
if (!h)
die("corosync daemon didn't start");
if ((comline->verbose & DEBUG_STARTUP_ONLY) && !cman_is_active(h))
fprintf(stderr, "corosync started, but not joined the cluster yet.\n");
cman_finish(h);
return 0;
}
diff --git a/cman/cman_tool/main.c b/cman/cman_tool/main.c
index 91d2e4402..4c6d6679b 100644
--- a/cman/cman_tool/main.c
+++ b/cman/cman_tool/main.c
@@ -1,1062 +1,1067 @@
#include <inttypes.h>
#include <unistd.h>
#include <signal.h>
#include <time.h>
#include <netinet/in.h>
#include "copyright.cf"
#include "libcman.h"
#include "cman_tool.h"
#define DEFAULT_CONFIG_MODULE "xmlconfig"
-#define OPTION_STRING ("m:n:v:e:2p:c:r:i:N:t:o:k:F:C:VPwfqah?Xd::")
+#define OPTION_STRING ("m:n:v:e:2p:c:r:i:N:t:o:k:F:C:VAPwfqah?Xd::")
#define OP_JOIN 1
#define OP_LEAVE 2
#define OP_EXPECTED 3
#define OP_VOTES 4
#define OP_KILL 5
#define OP_VERSION 6
#define OP_WAIT 7
#define OP_STATUS 8
#define OP_NODES 9
#define OP_SERVICES 10
#define OP_DEBUG 11
#define OP_DUMP_OBJDB 12
static void print_usage(int subcmd)
{
printf("Usage:\n");
printf("\n");
printf("%s <join|leave|kill|expected|votes|version|wait|status|nodes|services|debug> [options]\n",
prog_name);
printf("\n");
printf("Options:\n");
printf(" -h Print this help, then exit\n");
printf(" -V Print program version information, then exit\n");
printf(" -d Enable debug output\n");
printf("\n");
if (!subcmd || subcmd == OP_JOIN) {
printf("join\n");
printf(" Cluster & node information is taken from configuration modules.\n");
printf(" These switches are provided to allow those values to be overridden.\n");
printf(" Use them with extreme care.\n\n");
printf(" -m <addr> Multicast address to use\n");
printf(" -v <votes> Number of votes this node has\n");
printf(" -e <votes> Number of expected votes for the cluster\n");
printf(" -p <port> UDP port number for cman communications\n");
printf(" -n <nodename> The name of this node (defaults to hostname)\n");
printf(" -c <clustername> The name of the cluster to join\n");
printf(" -N <id> Node id\n");
printf(" -C <module> Config file reader (default: " DEFAULT_CONFIG_MODULE ")\n");
printf(" -w Wait until node has joined a cluster\n");
printf(" -q Wait until the cluster is quorate\n");
printf(" -t Maximum time (in seconds) to wait\n");
printf(" -k <file> Private key file for AIS communications\n");
printf(" -P Don't set aisexec to realtime priority\n");
printf(" -X Use internal cman defaults for configuration\n");
+ printf(" -A Don't load openais services\n");
printf("\n");
}
if (!subcmd || subcmd == OP_WAIT) {
printf("wait Wait until the node is a member of a cluster\n");
printf(" -q Wait until the cluster is quorate\n");
printf(" -t Maximum time (in seconds) to wait\n");
printf("\n");
}
if (!subcmd || subcmd == OP_LEAVE) {
printf("leave\n");
printf(" -w If cluster is in transition, wait and keep trying\n");
printf(" -t Maximum time (in seconds) to wait\n");
printf(" remove Tell other nodes to ajust quorum downwards if necessary\n");
printf(" force Leave even if cluster subsystems are active\n");
printf("\n");
}
if (!subcmd || subcmd == OP_KILL) {
printf("kill\n");
printf(" -n <nodename> The name of the node to kill (can specify multiple times)\n");
printf("\n");
}
if (!subcmd || subcmd == OP_EXPECTED) {
printf("expected\n");
printf(" -e <votes> New number of expected votes for the cluster\n");
printf("\n");
}
if (!subcmd || subcmd == OP_VOTES) {
printf("votes\n");
printf(" -v <votes> New number of votes for this node\n");
printf("\n");
}
if (!subcmd || subcmd == OP_STATUS) {
printf("status Show local record of cluster status\n");
printf("\n");
}
if (!subcmd || subcmd == OP_NODES) {
printf("nodes Show local record of cluster nodes\n");
printf(" -f Also show when node was last fenced\n");
printf(" -a Also show node address(es)\n");
printf(" -n <nodename> Only show information for specific node\n");
printf(" -F <format> Specify output format (see man page)\n");
printf("\n");
}
if (!subcmd || subcmd == OP_SERVICES) {
printf("services Show local record of cluster services\n");
printf("\n");
}
if (!subcmd || subcmd == OP_VERSION) {
printf("version\n");
printf(" -r <config> A new config version to set on all members\n");
printf("\n");
}
}
static void sigalarm_handler(int sig)
{
fprintf(stderr, "Timed-out waiting for cluster\n");
exit(2);
}
static cman_handle_t open_cman_handle(int priv)
{
cman_handle_t h;
if (priv)
h = cman_admin_init(NULL);
else
h = cman_init(NULL);
if (!h)
{
if (errno == EACCES)
die("Cannot open connection to cman, permission denied.");
else
die("Cannot open connection to cman, is it running ?");
}
return h;
}
static void print_address(char *addr)
{
char buf[INET6_ADDRSTRLEN];
struct sockaddr_storage *ss = (struct sockaddr_storage *)addr;
struct sockaddr_in *sin = (struct sockaddr_in *)addr;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
void *saddr;
if (ss->ss_family == AF_INET6)
saddr = &sin6->sin6_addr;
else
saddr = &sin->sin_addr;
inet_ntop(ss->ss_family, saddr, buf, sizeof(buf));
printf("%s", buf);
}
static char *membership_state(char *buf, int buflen, int node_state)
{
switch (node_state) {
case 1:
strncpy(buf, "Joining", buflen);
break;
case 2:
strncpy(buf, "Cluster-Member", buflen);
break;
case 3:
strncpy(buf, "Not-in-Cluster", buflen);
break;
case 4:
strncpy(buf, "Leaving", buflen);
break;
default:
sprintf(buf, "Unknown: code=%d", node_state);
break;
}
return buf;
}
static void show_status(void)
{
cman_cluster_t info;
cman_version_t v;
cman_handle_t h;
cman_node_t node;
char info_buf[PIPE_BUF];
char tmpbuf[1024];
cman_extra_info_t *einfo = (cman_extra_info_t *)info_buf;
cman_qdev_info_t qinfo;
int quorate;
int i;
int j;
int portnum;
char *addrptr;
h = open_cman_handle(0);
if (cman_get_cluster(h, &info) < 0)
die("Error getting cluster info: %s\n", cman_error(errno));
if (cman_get_version(h, &v) < 0)
die("Error getting cluster version: %s\n", cman_error(errno));
if (cman_get_extra_info(h, einfo, sizeof(info_buf)) < 0)
die("Error getting extra info: %s\n", cman_error(errno));
quorate = cman_is_quorate(h);
printf("Version: %d.%d.%d\n", v.cv_major, v.cv_minor, v.cv_patch);
printf("Config Version: %d\n", v.cv_config);
printf("Cluster Name: %s\n", info.ci_name);
printf("Cluster Id: %d\n", info.ci_number);
printf("Cluster Member: Yes\n");
printf("Cluster Generation: %d\n", info.ci_generation);
printf("Membership state: %s\n", membership_state(tmpbuf, sizeof(tmpbuf),
einfo->ei_node_state));
printf("Nodes: %d\n", einfo->ei_members);
printf("Expected votes: %d\n", einfo->ei_expected_votes);
if (cman_get_quorum_device(h, &qinfo) == 0 && qinfo.qi_state == 2)
printf("Quorum device votes: %d\n", qinfo.qi_votes);
printf("Total votes: %d\n", einfo->ei_total_votes);
printf("Node votes: %d\n", einfo->ei_node_votes);
printf("Quorum: %d %s\n", einfo->ei_quorum, quorate?" ":"Activity blocked");
printf("Active subsystems: %d\n", cman_get_subsys_count(h));
printf("Flags:");
if (einfo->ei_flags & CMAN_EXTRA_FLAG_2NODE)
printf(" 2node");
if (einfo->ei_flags & CMAN_EXTRA_FLAG_SHUTDOWN)
printf(" Shutdown");
if (einfo->ei_flags & CMAN_EXTRA_FLAG_ERROR)
printf(" Error");
if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED)
printf(" DisallowedNodes");
if (einfo->ei_flags & CMAN_EXTRA_FLAG_DIRTY)
printf(" Dirty");
printf(" \n");
printf("Ports Bound: ");
portnum = 0;
for (i=0; i<32; i++) {
for (j=0; j<8; j++) {
if ((einfo->ei_ports[i] >> j) & 1)
printf("%d ", portnum);
portnum++;
}
}
printf(" \n");
node.cn_name[0] = 0;
if (cman_get_node(h, CMAN_NODEID_US, &node) == 0) {
printf("Node name: %s\n", node.cn_name);
printf("Node ID: %d\n", node.cn_nodeid);
}
printf("Multicast addresses: ");
addrptr = einfo->ei_addresses;
for (i=0; i < einfo->ei_num_addresses; i++) {
print_address(addrptr);
printf(" ");
addrptr += sizeof(struct sockaddr_storage);
}
printf("\n");
printf("Node addresses: ");
for (i=0; i < einfo->ei_num_addresses; i++) {
print_address(addrptr);
printf(" ");
addrptr += sizeof(struct sockaddr_storage);
}
printf("\n");
if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED) {
int count;
int numnodes;
cman_node_t *nodes;
count = cman_get_node_count(h);
nodes = malloc(sizeof(cman_node_t) * count);
if (cman_get_disallowed_nodes(h, count, &numnodes, nodes) == 0) {
printf("Disallowed nodes: ");
for (i=0; i<numnodes; i++) {
printf("%s ", nodes[i].cn_name);
}
printf("\n");
}
}
cman_finish(h);
}
static int node_compare(const void *va, const void *vb)
{
const cman_node_t *a = va;
const cman_node_t *b = vb;
return a->cn_nodeid - b->cn_nodeid;
}
static int node_filter(commandline_t *comline, const char *node)
{
int i;
for (i = 0; i < comline->num_nodenames; i++) {
if (strcmp(comline->nodenames[i], node) == 0) {
return TRUE;
}
}
return FALSE;
}
static int get_format_opt(const char *opt)
{
if (!opt)
return FMT_NONE;
if (!strcmp(opt, "id"))
return FMT_ID;
if (!strcmp(opt, "name"))
return FMT_NAME;
if (!strcmp(opt, "type"))
return FMT_TYPE;
if (!strcmp(opt, "addr"))
return FMT_ADDR;
return FMT_NONE;
}
static void print_node(commandline_t *comline, cman_handle_t h, int *format, struct cman_node *node)
{
char member_type;
struct tm *ftime;
struct tm *jtime;
char jstring[1024];
int i,j,k;
if (comline->num_nodenames > 0) {
if (node_filter(comline, node->cn_name) == 0) {
return;
}
}
switch (node->cn_member) {
case 0:
member_type = 'X';
break;
case 1:
member_type = 'M';
break;
case 2:
member_type = 'd';
break;
default:
member_type = '?';
break;
}
jtime = localtime(&node->cn_jointime.tv_sec);
if (node->cn_jointime.tv_sec && node->cn_member)
strftime(jstring, sizeof(jstring), "%F %H:%M:%S", jtime);
else
strcpy(jstring, " ");
if (!comline->format_opts) {
printf("%4d %c %5d %s %s\n",
node->cn_nodeid, member_type,
node->cn_incarnation, jstring, node->cn_name);
}
if (comline->fence_opt && !comline->format_opts) {
char agent[255];
uint64_t fence_time;
int fenced;
if (!cman_get_fenceinfo(h, node->cn_nodeid, &fence_time, &fenced, agent)) {
if (fence_time) {
time_t fence_time_t = (time_t)fence_time;
ftime = localtime(&fence_time_t);
strftime(jstring, sizeof(jstring), "%F %H:%M:%S", ftime);
printf(" Last fenced: %-15s by %s\n", jstring, agent);
}
if (!node->cn_member && node->cn_incarnation && !fenced) {
printf(" Node has not been fenced since it went down\n");
}
}
}
int numaddrs;
struct cman_node_address addrs[MAX_INTERFACES];
if (comline->addresses_opt || comline->format_opts) {
if (!cman_get_node_addrs(h, node->cn_nodeid, MAX_INTERFACES, &numaddrs, addrs) &&
numaddrs)
{
if (!comline->format_opts) {
printf(" Addresses: ");
for (i = 0; i < numaddrs; i++)
{
print_address(addrs[i].cna_address);
printf(" ");
}
printf("\n");
}
}
}
if (comline->format_opts) {
for (j = 0; j < MAX_FORMAT_OPTS; j++) {
switch (format[j]) {
case FMT_NONE:
break;
case FMT_ID:
printf("%d ", node->cn_nodeid);
break;
case FMT_NAME:
printf("%s ", node->cn_name);
break;
case FMT_TYPE:
printf("%c ", member_type);
break;
case FMT_ADDR:
for (k = 0; k < numaddrs; k++) {
print_address(addrs[k].cna_address);
if (k != (numaddrs - 1)) {
printf(",");
}
}
printf(" ");
break;
default:
break;
}
}
printf("\n");
}
}
static void show_nodes(commandline_t *comline)
{
cman_handle_t h;
int count;
int i;
int j;
int numnodes;
int dis_count;
int format[MAX_FORMAT_OPTS];
cman_node_t *dis_nodes;
cman_node_t *nodes;
h = open_cman_handle(0);
count = cman_get_node_count(h);
if (count < 0)
die("cman_get_node_count failed: %s", cman_error(errno));
count += 2; /* Extra space! */
nodes = malloc(sizeof(cman_node_t) * count);
if (!nodes)
die("cannot allocate memory for nodes list\n");
if (comline->format_opts != NULL) {
char *format_str = comline->format_opts;
char *format_tmp;
for (i = 0; i < MAX_FORMAT_OPTS; i++) {
format_tmp = strtok(format_str, ",");
format_str = NULL;
format[i] = get_format_opt(format_tmp);
}
}
if (cman_get_nodes(h, count, &numnodes, nodes) < 0)
die("cman_get_nodes failed: %s", cman_error(errno));
/* Get Disallowed nodes, so we can show them as such */
dis_nodes = malloc(sizeof(cman_node_t) * count);
if (cman_get_disallowed_nodes(h, count, &dis_count, dis_nodes) == 0) {
for (i = 0; i < numnodes; i++) {
for (j = 0; j < dis_count; j++) {
if (dis_nodes[j].cn_nodeid == nodes[i].cn_nodeid)
nodes[i].cn_member = 2;
}
}
}
/* Sort by nodeid to be friendly */
qsort(nodes, numnodes, sizeof(cman_node_t), node_compare);
if (dis_count) {
printf("NOTE: There are %d disallowed nodes,\n", dis_count);
printf(" members list may seem inconsistent across the cluster\n");
}
if (!comline->format_opts) {
printf("Node Sts Inc Joined Name\n");
}
/* Print nodes */
for (i = 0; i < numnodes; i++) {
print_node(comline, h, format, &nodes[i]);
}
free(nodes);
free(dis_nodes);
cman_finish(h);
}
static int show_services(void)
{
return system("group_tool ls");
}
char *cman_error(int err)
{
char *die_error;
switch (errno) {
case ENOTCONN:
die_error = "Cluster software not started";
break;
case ENOENT:
die_error = "Node is not yet a cluster member";
break;
default:
die_error = strerror(errno);
break;
}
return die_error;
}
static void leave(commandline_t *comline)
{
cman_handle_t h;
int result;
int flags = 0;
h = open_cman_handle(1);
/* "cman_tool leave remove" adjusts quorum downward */
if (comline->remove)
flags |= CMAN_SHUTDOWN_REMOVED;
if (comline->force)
flags |= CMAN_SHUTDOWN_ANYWAY;
if (comline->wait_opt && comline->timeout) {
signal(SIGALRM, sigalarm_handler);
alarm(comline->timeout);
}
result = cman_shutdown(h, flags);
if (result) {
die("Error leaving cluster: %s", cman_error(errno));
}
cman_finish(h);
}
static void set_expected(commandline_t *comline)
{
cman_handle_t h;
int result;
h = open_cman_handle(1);
if ((result = cman_set_expected_votes(h, comline->expected_votes)))
die("can't set expected votes: %s", cman_error(errno));
cman_finish(h);
}
static void set_votes(commandline_t *comline)
{
cman_handle_t h;
int result;
int nodeid;
struct cman_node node;
h = open_cman_handle(1);
if (!comline->num_nodenames) {
nodeid = 0; /* This node */
}
else {
/* Resolve node name into a number */
node.cn_nodeid = 0;
strcpy(node.cn_name, comline->nodenames[0]);
if (cman_get_node(h, node.cn_nodeid, &node))
die("Can't set votes for node %s : %s\n", node.cn_name, strerror(errno));
nodeid = node.cn_nodeid;
}
if ((result = cman_set_votes(h, comline->votes, nodeid)))
die("can't set votes: %s", cman_error(errno));
cman_finish(h);
}
static void version(commandline_t *comline)
{
struct cman_version ver;
cman_handle_t h;
int result;
h = open_cman_handle(1);
if ((result = cman_get_version(h, &ver)))
die("can't get version: %s", cman_error(errno));
if (!comline->config_version) {
printf("%d.%d.%d config %d\n", ver.cv_major, ver.cv_minor, ver.cv_patch,
ver.cv_config);
goto out;
}
ver.cv_config = comline->config_version;
if ((result = cman_set_version(h, &ver)))
die("can't set version: %s", cman_error(errno));
out:
cman_finish(h);
}
static int cluster_wait(commandline_t *comline)
{
cman_handle_t h;
int ret = 0;
h = open_cman_handle(0);
if (comline->wait_quorate_opt) {
while (cman_is_quorate(h) <= 0) {
sleep(1);
}
}
else {
while (cman_get_node_count(h) < 0) {
sleep(1);
}
}
cman_finish(h);
return ret;
}
static void kill_node(commandline_t *comline)
{
cman_handle_t h;
int i;
struct cman_node node;
if (!comline->num_nodenames) {
die("No node name specified\n");
}
h = open_cman_handle(1);
for (i=0; i<comline->num_nodenames; i++) {
/* Resolve node name into a number */
node.cn_nodeid = 0;
strcpy(node.cn_name, comline->nodenames[i]);
if (cman_get_node(h, node.cn_nodeid, &node)) {
fprintf(stderr, "Can't kill node %s : %s\n", node.cn_name, strerror(errno));
continue;
}
if (cman_kill_node(h, node.cn_nodeid))
perror("kill node failed");
}
cman_finish(h);
}
static void set_debuglog(commandline_t *comline)
{
cman_handle_t h;
h = open_cman_handle(1);
if (cman_set_debuglog(h, comline->verbose))
perror("setting debuglog failed");
cman_finish(h);
}
#ifdef DEBUG
static void dump_objdb(commandline_t *comline)
{
cman_handle_t h;
h = open_cman_handle(1);
if (cman_dump_objdb(h, comline->filename))
perror("dump objdb failed");
cman_finish(h);
}
#endif
static int get_int_arg(char argopt, char *arg)
{
char *tmp;
int val;
val = strtol(arg, &tmp, 10);
if (tmp == arg || tmp != arg + strlen(arg))
die("argument to %c (%s) is not an integer", argopt, arg);
if (val < 0)
die("argument to %c cannot be negative", argopt);
return val;
}
static void decode_arguments(int argc, char *argv[], commandline_t *comline)
{
int cont = TRUE;
int optchar, i;
int show_help = 0;
comline->config_lcrso=DEFAULT_CONFIG_MODULE;
while (cont) {
optchar = getopt(argc, argv, OPTION_STRING);
switch (optchar) {
case 'm':
comline->multicast_addr = strdup(optarg);
break;
case 'f':
comline->fence_opt = 1;
break;
case 'a':
comline->addresses_opt = 1;
break;
case 'n':
i = comline->num_nodenames;
if (i >= MAX_INTERFACES)
die("maximum of %d node names allowed",
MAX_INTERFACES);
if (strlen(optarg) > MAX_NODE_NAME_LEN)
die("maximum node name length is %d",
MAX_NODE_NAME_LEN);
comline->nodenames[i] = strdup(optarg);
comline->num_nodenames++;
break;
case 'o':
comline->override_nodename = strdup(optarg);
break;
case 'k':
comline->key_filename = strdup(optarg);
break;
case 'C':
comline->config_lcrso = strdup(optarg);
break;
case 'r':
comline->config_version = get_int_arg(optchar, optarg);
comline->config_version_opt = TRUE;
break;
case 'v':
comline->votes = get_int_arg(optchar, optarg);
comline->votes_opt = TRUE;
break;
case 'e':
comline->expected_votes = get_int_arg(optchar, optarg);
comline->expected_votes_opt = TRUE;
break;
case '2':
comline->two_node = TRUE;
break;
case 'p':
comline->port = get_int_arg(optchar, optarg);
comline->port_opt = TRUE;
break;
case 'N':
comline->nodeid = get_int_arg(optchar, optarg);
comline->nodeid_opt = TRUE;
break;
case 'c':
if (strlen(optarg) > MAX_NODE_NAME_LEN-1)
die("maximum cluster name length is %d",
MAX_CLUSTER_NAME_LEN-1);
strcpy(comline->clustername, optarg);
comline->clustername_opt = TRUE;
break;
case 'F':
comline->format_opts = strdup(optarg);
break;
case 'V':
printf("cman_tool %s (built %s %s)\n",
RELEASE_VERSION, __DATE__, __TIME__);
printf("%s\n", REDHAT_COPYRIGHT);
exit(EXIT_SUCCESS);
break;
case 'h':
show_help = 1;
break;
case ':':
case '?':
fprintf(stderr, "Please use '-h' for usage.\n");
exit(EXIT_FAILURE);
break;
case 'd':
if (optarg)
comline->verbose = atoi(optarg);
else
comline->verbose = 255;
break;
case 'w':
comline->wait_opt = TRUE;
break;
case 'q':
comline->wait_quorate_opt = TRUE;
break;
case 't':
comline->timeout = get_int_arg(optchar, optarg);
break;
case EOF:
cont = FALSE;
break;
case 'X':
comline->noconfig_opt = TRUE;
break;
+ case 'A':
+ comline->noopenais_opt = TRUE;
+ break;
+
case 'P':
comline->nosetpri_opt = TRUE;
break;
default:
die("unknown option: %c", optchar);
break;
};
}
while (optind < argc) {
if (strcmp(argv[optind], "join") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_JOIN;
} else if (strcmp(argv[optind], "leave") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_LEAVE;
} else if (strcmp(argv[optind], "expected") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_EXPECTED;
} else if (strcmp(argv[optind], "votes") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_VOTES;
} else if (strcmp(argv[optind], "kill") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_KILL;
} else if (strcmp(argv[optind], "version") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_VERSION;
} else if (strcmp(argv[optind], "wait") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_WAIT;
} else if (strcmp(argv[optind], "status") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_STATUS;
} else if (strcmp(argv[optind], "nodes") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_NODES;
} else if (strcmp(argv[optind], "services") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_SERVICES;
} else if (strcmp(argv[optind], "debug") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_DEBUG;
#ifdef DEBUG
} else if (strcmp(argv[optind], "dump-db") == 0) {
if (comline->operation)
die("can't specify two operations");
comline->operation = OP_DUMP_OBJDB;
if (!argv[optind+1])
die("no filename given");
comline->filename = strdup(argv[optind+1]);
if (comline->filename[0] != '/')
die("dump filename must be an absolute path");
optind++;
#endif
} else if (strcmp(argv[optind], "remove") == 0) {
comline->remove = TRUE;
} else if (strcmp(argv[optind], "force") == 0) {
comline->force = TRUE;
} else
die("unknown option %s", argv[optind]);
optind++;
}
if (show_help) {
print_usage(comline->operation);
exit(EXIT_SUCCESS);
}
if (!comline->operation)
die("no operation specified");
}
static void check_arguments(commandline_t *comline)
{
if (comline->two_node && comline->expected_votes != 1)
die("expected_votes value (%d) invalid in two node mode",
comline->expected_votes);
if (comline->port_opt &&
(comline->port <= 0 || comline->port > 65535))
die("Port must be a number between 1 and 65535");
/* This message looks like it contradicts the condition but
a nodeid of zero simply means "assign one for me" and is a
perfectly reasonable override */
if (comline->nodeid < 0 || comline->nodeid > 4096)
die("Node id must be between 1 and 4096");
if (strlen(comline->clustername) > MAX_CLUSTER_NAME_LEN) {
die("Cluster name must be < %d characters long",
MAX_CLUSTER_NAME_LEN);
}
if (comline->timeout && !comline->wait_opt && !comline->wait_quorate_opt)
die("timeout is only appropriate with wait");
}
int main(int argc, char *argv[], char *envp[])
{
commandline_t comline;
int ret;
prog_name = argv[0];
memset(&comline, 0, sizeof(commandline_t));
decode_arguments(argc, argv, &comline);
switch (comline.operation) {
case OP_JOIN:
check_arguments(&comline);
if (comline.timeout) {
signal(SIGALRM, sigalarm_handler);
alarm(comline.timeout);
}
join(&comline, envp);
if (comline.wait_opt || comline.wait_quorate_opt) {
do {
ret = cluster_wait(&comline);
if (ret == ENOTCONN)
join(&comline, envp);
} while (ret == ENOTCONN);
}
break;
case OP_LEAVE:
leave(&comline);
break;
case OP_EXPECTED:
set_expected(&comline);
break;
case OP_VOTES:
set_votes(&comline);
break;
case OP_KILL:
kill_node(&comline);
break;
case OP_VERSION:
version(&comline);
break;
case OP_WAIT:
if (comline.timeout) {
signal(SIGALRM, sigalarm_handler);
alarm(comline.timeout);
}
cluster_wait(&comline);
break;
case OP_STATUS:
show_status();
break;
case OP_NODES:
show_nodes(&comline);
break;
case OP_SERVICES:
if (show_services() < 0) {
fprintf(stderr, "Unable to invoke group_tool\n");
exit(EXIT_FAILURE);
}
break;
case OP_DEBUG:
set_debuglog(&comline);
break;
#ifdef DEBUG
case OP_DUMP_OBJDB:
dump_objdb(&comline);
break;
#endif
}
exit(EXIT_SUCCESS);
}
char *prog_name;
diff --git a/cman/daemon/cman-preconfig.c b/cman/daemon/cman-preconfig.c
index f90e9a70a..4e691f49d 100644
--- a/cman/daemon/cman-preconfig.c
+++ b/cman/daemon/cman-preconfig.c
@@ -1,1034 +1,996 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/utsname.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/errno.h>
#include <netdb.h>
#define SYSLOG_NAMES
#include <sys/syslog.h>
#include <ifaddrs.h>
#include <arpa/inet.h>
/* corosync headers */
#include <corosync/engine/logsys.h>
#include <corosync/lcr/lcr_comp.h>
#include <corosync/engine/objdb.h>
#include <corosync/engine/config.h>
#include "cman.h"
#define OBJDB_API struct objdb_iface_ver0
#include "cnxman-socket.h"
#include "nodelist.h"
#include "logging.h"
#define MAX_PATH_LEN PATH_MAX
static unsigned int debug_mask;
static int cmanpre_readconfig(struct objdb_iface_ver0 *objdb, char **error_string);
static char *nodename_env;
static int expected_votes;
static int votes;
static int num_interfaces;
static int startup_pipe;
static unsigned int cluster_id;
static char nodename[MAX_CLUSTER_MEMBER_NAME_LEN];
static int nodeid;
static int two_node;
static unsigned int disable_openais;
static unsigned int portnum;
static int num_nodenames;
static char *key_filename;
static char *mcast_name;
static char *cluster_name;
static char error_reason[1024] = { '\0' };
static unsigned int cluster_parent_handle;
/*
* Exports the interface for the service
*/
static struct config_iface_ver0 cmanpreconfig_iface_ver0 = {
.config_readconfig = cmanpre_readconfig
};
static struct lcr_iface ifaces_ver0[2] = {
{
.name = "cmanpreconfig",
.version = 0,
.versions_replace = 0,
.versions_replace_count = 0,
.dependencies = 0,
.dependency_count = 0,
.constructor = NULL,
.destructor = NULL,
.interfaces = NULL,
}
};
static struct lcr_comp cmanpre_comp_ver0 = {
.iface_count = 1,
.ifaces = ifaces_ver0,
};
__attribute__ ((constructor)) static void cmanpre_comp_register(void) {
lcr_interfaces_set(&ifaces_ver0[0], &cmanpreconfig_iface_ver0);
lcr_component_register(&cmanpre_comp_ver0);
}
static char *facility_name_get (unsigned int facility)
{
unsigned int i;
for (i = 0; facilitynames[i].c_name != NULL; i++) {
if (facility == facilitynames[i].c_val) {
return (facilitynames[i].c_name);
}
}
return (NULL);
}
static char *priority_name_get (unsigned int priority)
{
unsigned int i;
for (i = 0; prioritynames[i].c_name != NULL; i++) {
if (priority == prioritynames[i].c_val) {
return (prioritynames[i].c_name);
}
}
return (NULL);
}
#define LOCALHOST_IPV4 "127.0.0.1"
#define LOCALHOST_IPV6 "::1"
/* Compare two addresses */
static int ipaddr_equal(struct sockaddr_storage *addr1, struct sockaddr_storage *addr2)
{
int addrlen = 0;
struct sockaddr *saddr1 = (struct sockaddr *)addr1;
struct sockaddr *saddr2 = (struct sockaddr *)addr2;
if (saddr1->sa_family != saddr2->sa_family)
return 0;
if (saddr1->sa_family == AF_INET) {
addrlen = sizeof(struct sockaddr_in);
}
if (saddr1->sa_family == AF_INET6) {
addrlen = sizeof(struct sockaddr_in6);
}
assert(addrlen);
if (memcmp(saddr1, saddr2, addrlen) == 0)
return 1;
else
return 0;
}
/* Build a localhost ip_address */
static int get_localhost(int family, struct sockaddr_storage *localhost)
{
char *addr_text;
struct addrinfo *ainfo;
struct addrinfo ahints;
int ret;
if (family == AF_INET) {
addr_text = LOCALHOST_IPV4;
} else {
addr_text = LOCALHOST_IPV6;
}
memset(&ahints, 0, sizeof(ahints));
ahints.ai_socktype = SOCK_DGRAM;
ahints.ai_protocol = IPPROTO_UDP;
ahints.ai_family = family;
/* Lookup the nodename address */
ret = getaddrinfo(addr_text, NULL, &ahints, &ainfo);
if (ret)
return -1;
memset(localhost, 0, sizeof(struct sockaddr_storage));
memcpy(localhost, ainfo->ai_addr, ainfo->ai_addrlen);
freeaddrinfo(ainfo);
return 0;
}
/* Return the address family of an IP[46] name */
static int address_family(char *addr, struct sockaddr_storage *ssaddr)
{
struct addrinfo *ainfo;
struct addrinfo ahints;
int family;
int ret;
memset(&ahints, 0, sizeof(ahints));
ahints.ai_socktype = SOCK_DGRAM;
ahints.ai_protocol = IPPROTO_UDP;
/* Lookup the nodename address */
ret = getaddrinfo(addr, NULL, &ahints, &ainfo);
if (ret)
return -1;
memset(ssaddr, 0, sizeof(struct sockaddr_storage));
memcpy(ssaddr, ainfo->ai_addr, ainfo->ai_addrlen);
family = ainfo->ai_family;
freeaddrinfo(ainfo);
return family;
}
/* Find the "CMAN" logger_subsys object. Or create one if it does not
exist
*/
static unsigned int find_cman_logger(struct objdb_iface_ver0 *objdb, unsigned int object_handle)
{
unsigned int subsys_handle;
unsigned int find_handle;
char *str;
objdb->object_find_create(object_handle, "logger_subsys", strlen("logger_subsys"), &find_handle);
while (!objdb->object_find_next(object_handle, &subsys_handle)) {
if (objdb_get_string(objdb, subsys_handle, "subsys", &str)) {
continue;
}
if (strcmp(str, CMAN_NAME) == 0)
return subsys_handle;
}
objdb->object_find_destroy(find_handle);
/* We can't find it ... create one */
if (objdb->object_create(object_handle, &subsys_handle,
"logger_subsys", strlen("logger_subsys")) == 0) {
objdb->object_key_create(subsys_handle, "subsys", strlen("subsys"),
CMAN_NAME, strlen(CMAN_NAME)+1);
}
return subsys_handle;
}
static int add_ifaddr(struct objdb_iface_ver0 *objdb, char *mcast, char *ifaddr, int portnum)
{
unsigned int totem_object_handle;
unsigned int find_handle;
unsigned int interface_object_handle;
struct sockaddr_storage if_addr, localhost, mcast_addr;
char tmp[132];
int ret = 0;
/* Check the families match */
if (address_family(mcast, &mcast_addr) !=
address_family(ifaddr, &if_addr)) {
sprintf(error_reason, "Node address family does not match multicast address family");
return -1;
}
/* Check it's not bound to localhost, sigh */
get_localhost(if_addr.ss_family, &localhost);
if (ipaddr_equal(&localhost, &if_addr)) {
sprintf(error_reason, "Node address is localhost, please choose a real host address");
return -1;
}
objdb->object_find_create(OBJECT_PARENT_HANDLE, "totem", strlen("totem"), &find_handle);
if (objdb->object_find_next(find_handle, &totem_object_handle)) {
objdb->object_create(OBJECT_PARENT_HANDLE, &totem_object_handle,
"totem", strlen("totem"));
}
objdb->object_find_destroy(find_handle);
if (objdb->object_create(totem_object_handle, &interface_object_handle,
"interface", strlen("interface")) == 0) {
sprintf(tmp, "%d", num_interfaces);
objdb->object_key_create(interface_object_handle, "ringnumber", strlen("ringnumber"),
tmp, strlen(tmp)+1);
objdb->object_key_create(interface_object_handle, "bindnetaddr", strlen("bindnetaddr"),
ifaddr, strlen(ifaddr)+1);
objdb->object_key_create(interface_object_handle, "mcastaddr", strlen("mcastaddr"),
mcast, strlen(mcast)+1);
sprintf(tmp, "%d", portnum);
objdb->object_key_create(interface_object_handle, "mcastport", strlen("mcastport"),
tmp, strlen(tmp)+1);
num_interfaces++;
}
return ret;
}
static uint16_t generate_cluster_id(char *name)
{
int i;
int value = 0;
for (i=0; i<strlen(name); i++) {
value <<= 1;
value += name[i];
}
sprintf(error_reason, "Generated cluster id for '%s' is %d\n", name, value & 0xFFFF);
return value & 0xFFFF;
}
static char *default_mcast(char *nodename, uint16_t cluster_id)
{
struct addrinfo *ainfo;
struct addrinfo ahints;
int ret;
int family;
static char addr[132];
memset(&ahints, 0, sizeof(ahints));
/* Lookup the the nodename address and use it's IP type to
default a multicast address */
ret = getaddrinfo(nodename, NULL, &ahints, &ainfo);
if (ret) {
sprintf(error_reason, "Can't determine address family of nodename %s\n", nodename);
write_cman_pipe("Can't determine address family of nodename");
return NULL;
}
family = ainfo->ai_family;
freeaddrinfo(ainfo);
if (family == AF_INET) {
snprintf(addr, sizeof(addr), "239.192.%d.%d", cluster_id >> 8, cluster_id % 0xFF);
return addr;
}
if (family == AF_INET6) {
snprintf(addr, sizeof(addr), "ff15::%x", cluster_id);
return addr;
}
return NULL;
}
static int verify_nodename(struct objdb_iface_ver0 *objdb, char *nodename)
{
char nodename2[MAX_CLUSTER_MEMBER_NAME_LEN+1];
char nodename3[MAX_CLUSTER_MEMBER_NAME_LEN+1];
char *str, *dot = NULL;
struct ifaddrs *ifa, *ifa_list;
struct sockaddr *sa;
unsigned int nodes_handle;
unsigned int find_handle = 0;
int error;
/* nodename is either from commandline or from uname */
if (nodelist_byname(objdb, cluster_parent_handle, nodename))
return 0;
/* If nodename was from uname, try a domain-less version of it */
strcpy(nodename2, nodename);
dot = strchr(nodename2, '.');
if (dot) {
*dot = '\0';
if (nodelist_byname(objdb, cluster_parent_handle, nodename2)) {
strcpy(nodename, nodename2);
return 0;
}
}
/* If nodename (from uname) is domain-less, try to match against
cluster.conf names which may have domainname specified */
nodes_handle = nodeslist_init(objdb, cluster_parent_handle, &find_handle);
do {
int len;
if (objdb_get_string(objdb, nodes_handle, "name", &str)) {
sprintf(error_reason, "Cannot get node name");
break;
}
strcpy(nodename3, str);
dot = strchr(nodename3, '.');
if (dot)
len = dot-nodename3;
else
len = strlen(nodename3);
if (strlen(nodename2) == len &&
!strncmp(nodename2, nodename3, len)) {
strcpy(nodename, str);
return 0;
}
nodes_handle = nodeslist_next(objdb, find_handle);
} while (nodes_handle);
objdb->object_find_destroy(find_handle);
/* The cluster.conf names may not be related to uname at all,
they may match a hostname on some network interface.
NOTE: This is IPv4 only */
error = getifaddrs(&ifa_list);
if (error)
return -1;
for (ifa = ifa_list; ifa; ifa = ifa->ifa_next) {
/* Restore this */
strcpy(nodename2, nodename);
sa = ifa->ifa_addr;
if (!sa || sa->sa_family != AF_INET)
continue;
error = getnameinfo(sa, sizeof(*sa), nodename2,
sizeof(nodename2), NULL, 0, 0);
if (error)
goto out;
if (nodelist_byname(objdb, cluster_parent_handle, nodename2)) {
strcpy(nodename, nodename2);
goto out;
}
/* truncate this name and try again */
dot = strchr(nodename2, '.');
if (!dot)
continue;
*dot = '\0';
if (nodelist_byname(objdb, cluster_parent_handle, nodename2)) {
strcpy(nodename, nodename2);
goto out;
}
/* See if it's the IP address that's in cluster.conf */
error = getnameinfo(sa, sizeof(*sa), nodename2,
sizeof(nodename2), NULL, 0, NI_NUMERICHOST);
if (error)
goto out;
if (nodelist_byname(objdb, cluster_parent_handle, nodename2)) {
strcpy(nodename, nodename2);
goto out;
}
}
error = -1;
out:
freeifaddrs(ifa_list);
return error;
}
/* Get any environment variable overrides */
static int get_env_overrides()
{
if (getenv("CMAN_CLUSTER_NAME")) {
cluster_name = strdup(getenv("CMAN_CLUSTER_NAME"));
}
nodename_env = getenv("CMAN_NODENAME");
expected_votes = 0;
if (getenv("CMAN_EXPECTEDVOTES")) {
expected_votes = atoi(getenv("CMAN_EXPECTEDVOTES"));
if (expected_votes < 1) {
expected_votes = 0;
}
}
/* optional port */
if (getenv("CMAN_IP_PORT")) {
portnum = atoi(getenv("CMAN_IP_PORT"));
}
/* optional security key filename */
if (getenv("CMAN_KEYFILE")) {
key_filename = strdup(getenv("CMAN_KEYFILE"));
if (key_filename == NULL) {
write_cman_pipe("Cannot allocate memory for key filename");
return -1;
}
}
/* find our own number of votes */
if (getenv("CMAN_VOTES")) {
votes = atoi(getenv("CMAN_VOTES"));
}
/* nodeid */
if (getenv("CMAN_NODEID")) {
nodeid = atoi(getenv("CMAN_NODEID"));
}
if (getenv("CMAN_MCAST_ADDR")) {
mcast_name = getenv("CMAN_MCAST_ADDR");
}
if (getenv("CMAN_2NODE")) {
two_node = 1;
expected_votes = 1;
votes = 1;
}
if (getenv("CMAN_DEBUGLOG")) {
debug_mask = atoi(getenv("CMAN_DEBUGLOG"));
}
return 0;
}
static int get_nodename(struct objdb_iface_ver0 *objdb)
{
char *nodeid_str;
unsigned int object_handle;
unsigned int find_handle;
unsigned int node_object_handle;
unsigned int alt_object;
int error;
if (!getenv("CMAN_NOCONFIG")) {
/* our nodename */
if (nodename_env != NULL) {
if (strlen(nodename_env) >= sizeof(nodename)) {
sprintf(error_reason, "Overridden node name %s is too long", nodename);
write_cman_pipe("Overridden node name is too long");
error = -1;
goto out;
}
strcpy(nodename, nodename_env);
if (!(node_object_handle = nodelist_byname(objdb, cluster_parent_handle, nodename))) {
sprintf(error_reason, "Overridden node name %s is not in CCS", nodename);
write_cman_pipe("Overridden node name is not in CCS");
error = -1;
goto out;
}
} else {
struct utsname utsname;
error = uname(&utsname);
if (error) {
sprintf(error_reason, "cannot get node name, uname failed");
write_cman_pipe("Can't determine local node name");
error = -1;
goto out;
}
if (strlen(utsname.nodename) >= sizeof(nodename)) {
sprintf(error_reason, "node name from uname is too long");
write_cman_pipe("Can't determine local node name");
error = -1;
goto out;
}
strcpy(nodename, utsname.nodename);
}
if (verify_nodename(objdb, nodename))
return -1;
}
/* Add <cman> bits to pass down to the main module*/
if ( (node_object_handle = nodelist_byname(objdb, cluster_parent_handle, nodename))) {
if (objdb_get_string(objdb, node_object_handle, "nodeid", &nodeid_str)) {
sprintf(error_reason, "This node has no nodeid in cluster.conf");
write_cman_pipe("This node has no nodeid in cluster.conf");
return -1;
}
}
objdb->object_find_create(cluster_parent_handle, "cman", strlen("cman"), &find_handle);
if (objdb->object_find_next(find_handle, &object_handle) == 0) {
unsigned int mcast_handle;
unsigned int find_handle2;
if (!mcast_name) {
objdb->object_find_create(object_handle, "multicast", strlen("multicast"), &find_handle2);
if (objdb->object_find_next(find_handle2, &mcast_handle) == 0) {
objdb_get_string(objdb, mcast_handle, "addr", &mcast_name);
}
objdb->object_find_destroy(find_handle2);
}
if (!mcast_name) {
mcast_name = default_mcast(nodename, cluster_id);
}
/* See if the user wants our default set of openais services (default=yes) */
objdb_get_int(objdb, object_handle, "disable_openais", &disable_openais, 0);
objdb->object_key_create(object_handle, "nodename", strlen("nodename"),
nodename, strlen(nodename)+1);
}
objdb->object_find_destroy(find_handle);
nodeid = atoi(nodeid_str);
error = 0;
/* optional port */
if (!portnum) {
objdb_get_int(objdb, object_handle, "port", &portnum, DEFAULT_PORT);
}
if (add_ifaddr(objdb, mcast_name, nodename, portnum))
return -1;
/* Get all alternative node names */
num_nodenames = 1;
objdb->object_find_create(node_object_handle,"altname", strlen("altname"), &find_handle);
while (objdb->object_find_next(find_handle, &alt_object) == 0) {
unsigned int port;
char *nodename;
char *mcast;
if (objdb_get_string(objdb, alt_object, "name", &nodename)) {
continue;
}
objdb_get_int(objdb, alt_object, "port", &port, portnum);
if (objdb_get_string(objdb, alt_object, "mcast", &mcast)) {
mcast = mcast_name;
}
if (add_ifaddr(objdb, mcast, nodename, portnum))
return -1;
num_nodenames++;
}
objdb->object_find_destroy(find_handle);
out:
return error;
}
/* These are basically cman overrides to the totem config bits */
static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
{
unsigned int logger_object_handle;
char *logstr;
char *logfacility;
unsigned int object_handle;
unsigned int find_handle;
char tmp[256];
/* "totem" key already exists, because we have added the interfaces by now */
objdb->object_find_create(OBJECT_PARENT_HANDLE,"totem", strlen("totem"), &find_handle);
if (objdb->object_find_next(find_handle, &object_handle) == 0)
{
char *value;
objdb->object_key_create(object_handle, "version", strlen("version"),
"2", 2);
sprintf(tmp, "%d", nodeid);
objdb->object_key_create(object_handle, "nodeid", strlen("nodeid"),
tmp, strlen(tmp)+1);
objdb->object_key_create(object_handle, "vsftype", strlen("vsftype"),
"none", strlen("none")+1);
/* Set the token timeout is 10 seconds, but don't overrride anything that
might be in cluster.conf */
if (objdb_get_string(objdb, object_handle, "token", &value)) {
objdb->object_key_create(object_handle, "token", strlen("token"),
"10000", strlen("10000")+1);
}
if (objdb_get_string(objdb, object_handle, "token_retransmits_before_loss_const", &value)) {
objdb->object_key_create(object_handle, "token_retransmits_before_loss_const",
strlen("token_retransmits_before_loss_const"),
"20", strlen("20")+1);
}
/* Extend consensus & join timeouts per bz#214290 */
if (objdb_get_string(objdb, object_handle, "join", &value)) {
objdb->object_key_create(object_handle, "join", strlen("join"),
"60", strlen("60")+1);
}
if (objdb_get_string(objdb, object_handle, "consensus", &value)) {
objdb->object_key_create(object_handle, "consensus", strlen("consensus"),
"4800", strlen("4800")+1);
}
/* Set RRP mode appropriately */
if (objdb_get_string(objdb, object_handle, "rrp_mode", &value)) {
if (num_interfaces > 1) {
objdb->object_key_create(object_handle, "rrp_mode", strlen("rrp_mode"),
"active", strlen("active")+1);
}
else {
objdb->object_key_create(object_handle, "rrp_mode", strlen("rrp_mode"),
"none", strlen("none")+1);
}
}
if (objdb_get_string(objdb, object_handle, "secauth", &value)) {
sprintf(tmp, "%d", 1);
objdb->object_key_create(object_handle, "secauth", strlen("secauth"),
tmp, strlen(tmp)+1);
}
/* optional security key filename */
if (!key_filename) {
objdb_get_string(objdb, object_handle, "keyfile", &key_filename);
}
else {
objdb->object_key_create(object_handle, "keyfile", strlen("keyfile"),
key_filename, strlen(key_filename)+1);
}
if (!key_filename) {
/* Use the cluster name as key,
* This isn't a good isolation strategy but it does make sure that
* clusters on the same port/multicast by mistake don't actually interfere
* and that we have some form of encryption going.
*/
int keylen;
memset(tmp, 0, sizeof(tmp));
strcpy(tmp, cluster_name);
/* Key length must be a multiple of 4 */
keylen = (strlen(cluster_name)+4) & 0xFC;
objdb->object_key_create(object_handle, "key", strlen("key"),
tmp, keylen);
}
}
objdb->object_find_destroy(find_handle);
/* Make sure mainconfig doesn't stomp on our logging options */
objdb->object_find_create(OBJECT_PARENT_HANDLE, "logging", strlen("logging"), &find_handle);
if (objdb->object_find_next(find_handle, &object_handle)) {
objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
"logging", strlen("logging"));
}
objdb->object_find_destroy(find_handle);
logfacility = facility_name_get(SYSLOGFACILITY);
logger_object_handle = find_cman_logger(objdb, object_handle);
if (objdb_get_string(objdb, object_handle, "syslog_facility", &logstr)) {
objdb->object_key_create(object_handle, "syslog_facility", strlen("syslog_facility"),
logfacility, strlen(logfacility)+1);
}
if (objdb_get_string(objdb, object_handle, "to_file", &logstr)) {
objdb->object_key_create(object_handle, "to_file", strlen("to_file"),
"yes", strlen("yes")+1);
}
if (objdb_get_string(objdb, object_handle, "logfile", &logstr)) {
objdb->object_key_create(object_handle, "logfile", strlen("logfile"),
LOGDIR "/cman.log", strlen(LOGDIR "/cman.log")+1);
}
if (debug_mask) {
objdb->object_key_create(object_handle, "to_stderr", strlen("to_stderr"),
"yes", strlen("yes")+1);
objdb->object_key_create(logger_object_handle, "debug", strlen("debug"),
"on", strlen("on")+1);
objdb->object_key_create(logger_object_handle, "syslog_level", strlen("syslog_level"),
"debug", strlen("debug")+1);
}
else {
char *loglevel;
loglevel = priority_name_get(SYSLOGLEVEL);
objdb->object_key_create(logger_object_handle, "syslog_level", strlen("syslog_level"),
loglevel, strlen(loglevel)+1);
}
/* Don't run under user "ais" */
objdb->object_find_create(OBJECT_PARENT_HANDLE, "aisexec", strlen("aisexec"), &find_handle);
if (objdb->object_find_next(find_handle, &object_handle) != 0) {
objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
"aisexec", strlen("aisexec"));
}
objdb->object_find_destroy(find_handle);
objdb->object_key_create(object_handle, "user", strlen("user"),
"root", strlen("root") + 1);
objdb->object_key_create(object_handle, "group", strlen("group"),
"root", strlen("root") + 1);
objdb->object_find_create(cluster_parent_handle, "cman", strlen("cman"), &find_handle);
if (objdb->object_find_next(find_handle, &object_handle) == 0)
{
char str[255];
sprintf(str, "%d", cluster_id);
objdb->object_key_create(object_handle, "cluster_id", strlen("cluster_id"),
str, strlen(str) + 1);
if (two_node) {
sprintf(str, "%d", 1);
objdb->object_key_create(object_handle, "two_node", strlen("two_node"),
str, strlen(str) + 1);
}
if (debug_mask) {
sprintf(str, "%d", debug_mask);
objdb->object_key_create(object_handle, "debug_mask", strlen("debug_mask"),
str, strlen(str) + 1);
}
}
objdb->object_find_destroy(find_handle);
/* Make sure we load our alter-ego - the main cman module */
objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
"service", strlen("service"));
objdb->object_key_create(object_handle, "name", strlen("name"),
"corosync_cman", strlen("corosync_cman") + 1);
objdb->object_key_create(object_handle, "ver", strlen("ver"),
"0", 2);
-
- /* Load some other useful openais services too */
- if (!disable_openais) {
- objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
- "service", strlen("service"));
- objdb->object_key_create(object_handle, "name", strlen("name"),
- "openais_ckpt", strlen("openais_ckpt") + 1);
- objdb->object_key_create(object_handle, "ver", strlen("ver"),
- "0", 2);
-
- objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
- "service", strlen("service"));
- objdb->object_key_create(object_handle, "name", strlen("name"),
- "openais_evt", strlen("openais_evt") + 1);
- objdb->object_key_create(object_handle, "ver", strlen("ver"),
- "0", 2);
-
- objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
- "service", strlen("service"));
- objdb->object_key_create(object_handle, "name", strlen("name"),
- "openais_msg", strlen("openais_msg") + 1);
- objdb->object_key_create(object_handle, "ver", strlen("ver"),
- "0", 2);
-
- objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
- "service", strlen("service"));
- objdb->object_key_create(object_handle, "name", strlen("name"),
- "openais_clm", strlen("openais_clm") + 1);
- objdb->object_key_create(object_handle, "ver", strlen("ver"),
- "0", 2);
-
- objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
- "service", strlen("service"));
- objdb->object_key_create(object_handle, "name", strlen("name"),
- "openais_lck", strlen("openais_lck") + 1);
- objdb->object_key_create(object_handle, "ver", strlen("ver"),
- "0", 2);
- }
}
/* If ccs is not available then use some defaults */
static int set_noccs_defaults(struct objdb_iface_ver0 *objdb)
{
char tmp[255];
unsigned int object_handle;
unsigned int find_handle;
/* Enforce key */
key_filename = NOCCS_KEY_FILENAME;
if (!cluster_name)
cluster_name = DEFAULT_CLUSTER_NAME;
if (!cluster_id)
cluster_id = generate_cluster_id(cluster_name);
if (!nodename_env) {
int error;
struct utsname utsname;
error = uname(&utsname);
if (error) {
sprintf(error_reason, "cannot get node name, uname failed");
write_cman_pipe("Can't determine local node name");
return -1;
}
nodename_env = (char *)&utsname.nodename;
}
strcpy(nodename, nodename_env);
num_nodenames = 1;
if (!mcast_name) {
mcast_name = default_mcast(nodename, cluster_id);
}
/* This will increase as nodes join the cluster */
if (!expected_votes)
expected_votes = 1;
if (!votes)
votes = 1;
if (!portnum)
portnum = DEFAULT_PORT;
/* Invent a node ID */
if (!nodeid) {
struct addrinfo *ainfo;
struct addrinfo ahints;
int ret;
memset(&ahints, 0, sizeof(ahints));
ret = getaddrinfo(nodename, NULL, &ahints, &ainfo);
if (ret) {
sprintf(error_reason, "Can't determine address family of nodename %s\n", nodename);
write_cman_pipe("Can't determine address family of nodename");
return -1;
}
if (ainfo->ai_family == AF_INET) {
struct sockaddr_in *addr = (struct sockaddr_in *)ainfo->ai_addr;
memcpy(&nodeid, &addr->sin_addr, sizeof(int));
}
if (ainfo->ai_family == AF_INET6) {
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)ainfo->ai_addr;
memcpy(&nodeid, &addr->sin6_addr.s6_addr32[3], sizeof(int));
}
freeaddrinfo(ainfo);
}
/* Write a local <clusternode> entry to keep the rest of the code happy */
objdb->object_create(cluster_parent_handle, &object_handle,
"clusternodes", strlen("clusternodes"));
objdb->object_create(object_handle, &object_handle,
"clusternode", strlen("clusternode"));
objdb->object_key_create(object_handle, "name", strlen("name"),
nodename, strlen(nodename)+1);
sprintf(tmp, "%d", votes);
objdb->object_key_create(object_handle, "votes", strlen("votes"),
tmp, strlen(tmp)+1);
sprintf(tmp, "%d", nodeid);
objdb->object_key_create(object_handle, "nodeid", strlen("nodeid"),
tmp, strlen(tmp)+1);
/* Write the default cluster name & ID in here too */
objdb->object_key_create(cluster_parent_handle, "name", strlen("name"),
cluster_name, strlen(cluster_name)+1);
objdb->object_find_create(cluster_parent_handle, "cman", strlen("cman"), &find_handle);
if (objdb->object_find_next(find_handle, &object_handle) == 0) {
objdb->object_create(cluster_parent_handle, &object_handle,
"cman", strlen("cman"));
}
sprintf(tmp, "%d", cluster_id);
objdb->object_key_create(object_handle, "cluster_id", strlen("cluster_id"),
tmp, strlen(tmp)+1);
sprintf(tmp, "%d", expected_votes);
objdb->object_key_create(object_handle, "expected_votes", strlen("expected_votes"),
tmp, strlen(tmp)+1);
objdb->object_find_destroy(find_handle);
return 0;
}
static int get_cman_globals(struct objdb_iface_ver0 *objdb)
{
unsigned int object_handle;
unsigned int find_handle;
objdb_get_string(objdb, cluster_parent_handle, "name", &cluster_name);
/* Get the <cman> bits that override <totem> bits */
objdb->object_find_create(cluster_parent_handle, "cman", strlen("cman"), &find_handle);
if (objdb->object_find_next(find_handle, &object_handle) == 0) {
if (!portnum)
objdb_get_int(objdb, object_handle, "port", &portnum, DEFAULT_PORT);
if (!key_filename)
objdb_get_string(objdb, object_handle, "keyfile", &key_filename);
if (!cluster_id)
objdb_get_int(objdb, object_handle, "cluster_id", &cluster_id, 0);
if (!cluster_id)
cluster_id = generate_cluster_id(cluster_name);
}
objdb->object_find_destroy(find_handle);
return 0;
}
static int cmanpre_readconfig(struct objdb_iface_ver0 *objdb, char **error_string)
{
int ret = 0;
unsigned int object_handle;
unsigned int find_handle;
if (getenv("CMAN_PIPE"))
startup_pipe = atoi(getenv("CMAN_PIPE"));
objdb->object_find_create(OBJECT_PARENT_HANDLE, "cluster", strlen("cluster"), &find_handle);
objdb->object_find_next(find_handle, &cluster_parent_handle);
objdb->object_find_destroy(find_handle);
objdb->object_find_create(cluster_parent_handle, "cman", strlen("cman"), &find_handle);
if (objdb->object_find_next(find_handle, &object_handle)) {
objdb->object_create(cluster_parent_handle, &object_handle,
"cman", strlen("cman"));
}
objdb->object_find_destroy(find_handle);
get_env_overrides();
if (getenv("CMAN_NOCONFIG"))
ret = set_noccs_defaults(objdb);
else
ret = get_cman_globals(objdb);
if (!ret) {
ret = get_nodename(objdb);
add_cman_overrides(objdb);
}
if (!ret) {
sprintf (error_reason, "%s", "Successfully parsed cman config\n");
}
else {
if (error_reason[0] == '\0')
sprintf (error_reason, "%s", "Error parsing cman config\n");
}
*error_string = error_reason;
return ret;
}
/* Write an error message down the CMAN startup pipe so
that cman_tool can display it */
int write_cman_pipe(char *message)
{
if (startup_pipe)
return write(startup_pipe, message, strlen(message)+1);
return 0;
}
diff --git a/cman/man/cman_tool.8 b/cman/man/cman_tool.8
index 06d264ae3..f0cb90d32 100644
--- a/cman/man/cman_tool.8
+++ b/cman/man/cman_tool.8
@@ -1,396 +1,401 @@
.TH CMAN_TOOL 8 "Nov 8 2007" "Cluster utilities"
.SH NAME
cman_tool \- Cluster Management Tool
.SH SYNOPSIS
.B cman_tool join | leave | kill | expected | votes | version | wait | status | nodes | services | debug [options]
.br
.SH DESCRIPTION
.PP
.B cman_tool
is a program that manages the cluster management subsystem CMAN. cman_tool
can be used to join the node to a cluster, leave the cluster, kill another
cluster node or change the value of expected votes of a cluster.
.br
Be careful that you understand the consequences of the commands issued via cman_tool
as they can affect all nodes in your cluster. Most of the time the cman_tool
will only be invoked from your startup and shutdown scripts.
.br
.SH SUBCOMMANDS
.TP
.I join
This is the main use of cman_tool. It instructs the cluster manager to attempt
to join an existing cluster or (if no existing cluster exists) then to form
a new one on its own.
.br
If no options are given to this command then it will take the cluster
configuration information from cluster.conf. However, it is possible to provide
all the information on the command-line or to override cluster.conf values by using
the command line.
.TP
.I leave
Tells CMAN to leave the cluster. You cannot do this if there are subsystems
(eg DLM, GFS) active. You should dismount all GFS filesystems,
shutdown CLVM, fenced and anything else using the cluster manager before
using
.B cman_tool leave.
Look at 'cman_tool status' and group_tool to see how many (and which)
subsystems are active.
.br
When a node leaves the cluster, the remaining nodes recalculate quorum and this
may block cluster activity if the required number of votes is not present.
If this node is to be down for an extended period of time and you need to
keep the cluster running, add the
.B remove
option, and the remaining nodes will recalculate quorum such that activity
can continue.
.TP
.I kill
Tells CMAN to kill another node in the cluster. This will cause the local
node to send a "KILL" message to that node and it will shut down. Recovery
will occur for the killed node as if it had failed. This is a sort of remote
version of "leave force" so only use if if you really know what you are doing.
.TP
.I expected
Tells CMAN a new value of expected votes and instructs it to recalculate
quorum based on this value.
.br
Use this option if your cluster has lost quorum due to nodes failing and
you need to get it running again in a hurry.
.TP
.I version
Used alone this will report the major, minor, patch and config versions
used by CMAN (also displayed in 'cman_tool status'). It can also be used
with -r to set a new config version on all cluster members.
.TP
.I wait
Waits until the node is a member of the cluster and then returns.
.TP
.I status
Displays the local view of the cluster status.
.TP
.I nodes
Displays the local view of the cluster nodes.
.TP
.I services
Displays the local view of subsystems using cman (deprecated, group_tool
should be used instead).
.TP
.I debug
Sets the debug level of the running cman daemon. Debug output will be
sent to syslog level LOG_DEBUG. the
.B -d
switch specifies the new logging level. This is the same bitmask used
for cman_tool join -d
.br
.SH "LEAVE" OPTIONS
.TP
.I -w
Normally, "cman_tool leave" will fail if the cluster is in transition (ie
another node is joining or leaving the cluster). By adding the -w flag,
cman_tool will wait and retry the leave operation repeatedly until it succeeds
or a more serious error occurs.
.TP
.I -t <seconds>
If -w is also specified then -t dictates the maximum amount of time cman_tool
is prepared to wait. If the operation times out then a status of 2 is returned.
.TP
.I force
Shuts down the cluster manager without first telling any of the subsystems
to close down. Use this option with extreme care as it could easily cause data
loss.
.TP
.I remove
Tells the rest of the cluster to recalculate quorum such that activity can
continue without this node.
.SH "EXPECTED" OPTIONS
.TP
.I -e <expected-votes>
The new value of expected votes to use. This will usually be enough
to bring the cluster back to life. Values that would cause incorrect
quorum will be rejected.
.SH "KILL" OPTIONS
.TP
.I -n <nodename>
The node name of the node to be killed. This should be the unqualified node
name as it appears in 'cman_tool nodes'.
.SH "VERSION" OPTIONS
.TP
.I -r <config_version>
The new config version. You don't need to use this when adding a new node,
the new cman node will tell the rest of the cluster to get their latest
version of the config file automatically.
.SH "WAIT" OPTIONS
.TP
.I -q
Waits until the cluster is quorate before returning.
.I -t <seconds>
Dictates the maximum amount of time cman_tool is prepared to wait.
If the operation times out then a status of 2 is returned.
.br
.SH "JOIN" OPTIONS
.TP
.I -c <clustername>
Provides a text name for the cluster. You can have several clusters on one
LAN and they are distinguished by this name. Note that the name is hashed to
provide a unique number which is what actually distinguishes the cluster, so
it is possible that two different names can clash. If this happens, the node
will not be allowed into the existing cluster and you will have to pick
another name or use different port number for cluster communication.
.TP
.I -p <port>
UDP port number used for cluster communication. This defaults to 5405.
.TP
.I -v <votes>
Number of votes this node has in the cluster. Defaults to 1.
.TP
.I -e <expected votes>
Number of expected votes for the whole cluster. If different nodes
provide different values then the highest is used. The cluster will
only operate when quorum is reached - that is more than half the
available votes are available to the cluster. The default for
this value is the total number of votes for all nodes in the configuration file.
.TP
.I -2
Sets the cluster up for a special "two node only" mode. Because of the
quorum requirements mentioned above, a two-node cluster cannot be valid.
This option tells the cluster manager that there will only ever be two
nodes in the cluster and relies on fencing to ensure cluster integrity.
If you specify this you cannot add more nodes without taking down the
existing cluster and reconfiguring it. Expected votes should be set to
1 for a two-node cluster.
.TP
.I -n <nodename>
Overrides the node name. By default the unqualified hostname is used. This
option is also used to specify which interface is used for cluster
communication.
.TP
.I -N <nodeid>
Overrides the node ID for this node. Normally, nodes are assigned a
node id in cluster.conf. If you specify an incorrect node ID here, the
node might not be allowed to join the cluster. Setting node IDs in the
configuration is a far better way to do this.
.BR
Note that the node's application to join the cluster may be rejected if you
try to set the nodeid to one that has already been used, or if the node
was previously a member of the cluster but with a different nodeid.
.TP
.I -o <nodename>
Override the name this node will have in the cluster. This will
normally be the hostname or the first name specified by -n.
Note how this differs from -n: -n tells cman_tool how to find
the host address and/or the entry in the configuration file. -o simply
changes the name the node will have in the cluster and has no
bearing on the actual name of the machine. Use this option
will extreme caution.
.BR
.TP
.I -m <multicast-address>
Specifies a multicast address to use for cluster communication. This
is required for IPv6 operation. You should also specify an ethernet
interface to bind to this multicast address using the -i option.
.TP
.I -w
Join and wait until the node is a cluster member.
.TP
.I -q
Join and wait until the cluster is quorate.
If the cluster join fails and -w (or -q) is specified, then it will be retried. Note that
cman_tool cannot tell whether the cluster join was rejected by another node for a good reason
or that it timed out for some benign reason; so it is strongly recommended that a timeout
is also given with the wait options to join. If you don't want join to retry on failure but
do want to wait, use the
.B cman_tool join
command without -w followed by
.B cman_tool wait.
.TP
.I -k <keyfile>
All traffic sent out by cman/openais is encrypted. By default the security key
used is simply the cluster name. If you need more security you can specify a
key file that contains the key used to encrypt cluster communications.
Of course, the contents of the key file must be the same on all nodes in the
cluster. It is up to you to securely copy the file to the nodes.
.TP
.I -t <seconds>
If -w or -q is also specified then -t dictates the maximum amount of time cman_tool
is prepared to wait. If the operation times out then a status of 2 is returned.
Note that just because cman_tool has given up, does not mean that cman itself
has stopped trying to join a cluster.
.TP
.I -X
Tells cman not to use the configuration file to get cluster information. If you use this option then cman will
apply several defaults to the cluster to get it going. The cluster name will be
"RHCluster", node IDs will default to the IP address of the node and remote node
names will show up as Node<nodeid>. All of these, apart from the node names can
be overridden on the cman_tool command-line if required.
.br
If you have to set up fence devices, services or anything else in cluster.conf then
this option is probably not worthwhile to you - the extra readability of sensible node
names and numbers will make it worth using cluster.conf for the cluster too. But for a simple
failover cluster this might save you some effort.
.br
On each node using this configuration you will need to have the same authorization key
installed. To create this key run
.br
mkdir /etc/ais
.br
ais-keygen
.br
mv /etc/ais/authkey /etc/cluster/cman_authkey
.br
then copy that file to all nodes you want to join the cluster.
.br
.TP
.I -C
Overrides the default configuration module. Usually cman uses ccsd to load its
configuration. If you have your configuration database held elsewhere (eg LDAP) and
have a configuration plugin for it, then you should specify the name of the module
(see the documentation for the module for the name of it - it's not necessarily the
same as the filename) here.
.br
It is possible to chain configuration modules by separating them with colons. So to
add two modules (eg) 'ldapconfig' and 'ldappreproc' to the chain start cman with
-C ldapconfig:ldappreproc
.br
The default value for this is 'ccsconfig'. Note that if the -X is on the command-line
then -C will be ignored.
-
+.TP
+.I -A
+Don't load openais services. Normally cman_tool join will load the configuration
+module 'openaisserviceenable' which will load the services installed by openais.
+If you don't want to use these services or have not installed openais then
+this switch will disable them.
.SH "NODES" OPTIONS
.TP
.I -f
Shows the date/time the node was last fenced (if it has bee fenced), and also
the fence system that was used.
.br
.TP
.I -a
Shows the IP address(es) the nodes are communicating on.
.br
.TP
.I -n <nodename>
Shows node information for a specific node. This should be the unqualified node
name as it appears in 'cman_tool nodes'.
.br
.TP
.I -F <format>
Specify the format of the output. The format string may contain one or
more format options, each separated by a comma. Valid format options
include: id, name, type, and addr.
.br
.SH "DEBUG" OPTIONS
.TP
.I -d <value>
The value is a bitmask of
.br
2 Barriers
.br
4 Membership messages
.br
8 Daemon operation, including command-line interaction
.br
16 Interaction with OpenAIS
.br
32 Startup debugging (cman_tool join operations only)
.br
.SH NOTES
.br
the
.B nodes
subcommand shows a list of nodes known to cman. the state is one of the following:
.br
M The node is a member of the cluster
.br
X The node is not a member of the cluster
.br
d The node is known to the cluster but disallowed access to it.
.br
.SH ENVIRONMENT VARIABLES
cman_tool removes most environment variables before forking and running OpenAIS, as well as adding some of its own for setting up
configuration parameters that were overridden on the command-line, the exception to this is that variable with names starting
COROSYNC_ will be passed down intact as they are assumed to be used for configuring the daemon.
.SH DISALLOWED NODES
Occasionally (but very infrequently I hope) you may see nodes marked as "Disallowed" in cman_tool status or "d" in cman_tool nodes. This is a bit of a nasty hack to get around mismatch between what the upper layers expect of the cluster manager and OpenAIS.
.TP
If a node experiences a momentary lack of connectivity, but one that is long enough to trigger the token timeouts, then it will be removed from the cluster. When connectivity is restored OpenAIS will happily let it rejoin the cluster with no fuss. Sadly the upper layers don't like this very much. They may (indeed probably will have) have changed their internal state while the other node was away and there is no straightforward way to bring the rejoined node up-to-date with that state. When this happens the node is marked "Disallowed" and is not permitted to take part in cman operations.
.P
If the remainder of the cluster is quorate the the node will be sent a kill message and it will be forced to leave the cluster that way. Note that fencing should kick in to remove the node permanently anyway, but it may take longer than the network outage for this to complete.
If the remainder of the cluster is inquorate then we have a problem. The likelihood is that we will have two (or more) partitioned clusters and we cannot decide which is the "right" one. In this case we need to defer to the system administrator to kill an appropriate selection of nodes to restore the cluster to sensible operation.
The latter scenario should be very rare and may indicate a bug somewhere in the code. If the local network is very flaky or busy it may be necessary to increase some of the protocol timeouts for OpenAIS. We are trying to think of better solutions to this problem.
Recovering from this state can, unfortunately, be complicated. Fortunately, in the majority of cases, fencing will do the job for you, and the disallowed state will only be temporary. If it persists, the recommended approach it is to do a cman tool nodes on all systems in the cluster and determine the largest common subset of nodes that are valid members to each other. Then reboot the others and let them rejoin correctly. In the case of a single-node disconnection this should be straightforward, with a large cluster that has experienced a network partition it could get very complicated!
Example:
In this example we have a five node cluster that has experienced a network partition. Here is the output of cman_tool nodes from all systems:
.nf
Node Sts Inc Joined Name
1 M 2372 2007-11-05 02:58:55 node-01.example.com
2 d 2376 2007-11-05 02:58:56 node-02.example.com
3 d 2376 2007-11-05 02:58:56 node-03.example.com
4 M 2376 2007-11-05 02:58:56 node-04.example.com
5 M 2376 2007-11-05 02:58:56 node-05.example.com
Node Sts Inc Joined Name
1 d 2372 2007-11-05 02:58:55 node-01.example.com
2 M 2376 2007-11-05 02:58:56 node-02.example.com
3 M 2376 2007-11-05 02:58:56 node-03.example.com
4 d 2376 2007-11-05 02:58:56 node-04.example.com
5 d 2376 2007-11-05 02:58:56 node-05.example.com
Node Sts Inc Joined Name
1 d 2372 2007-11-05 02:58:55 node-01.example.com
2 M 2376 2007-11-05 02:58:56 node-02.example.com
3 M 2376 2007-11-05 02:58:56 node-03.example.com
4 d 2376 2007-11-05 02:58:56 node-04.example.com
5 d 2376 2007-11-05 02:58:56 node-05.example.com
Node Sts Inc Joined Name
1 M 2372 2007-11-05 02:58:55 node-01.example.com
2 d 2376 2007-11-05 02:58:56 node-02.example.com
3 d 2376 2007-11-05 02:58:56 node-03.example.com
4 M 2376 2007-11-05 02:58:56 node-04.example.com
5 M 2376 2007-11-05 02:58:56 node-05.example.com
Node Sts Inc Joined Name
1 M 2372 2007-11-05 02:58:55 node-01.example.com
2 d 2376 2007-11-05 02:58:56 node-02.example.com
3 d 2376 2007-11-05 02:58:56 node-03.example.com
4 M 2376 2007-11-05 02:58:56 node-04.example.com
5 M 2376 2007-11-05 02:58:56 node-05.example.com
.fi
In this scenario we should kill the node node-02 and node-03. Of course, the 3 node cluster of node-01, node-04 & node-05 should remain quorate and be able to fenced the two rejoined nodes anyway, but it is possible that the cluster has a qdisk setup that precludes this.
.SH CONFIGURATION SYSTEMS
This section details how the configuration systems work in cman. You might need to know this if you are using the -C option
to cman_tool, or writing your own configuration subsystem.
.br
By default cman uses two configuration plugins to OpenAIS. The first, 'ccsconfig', reads the configuration information
stored in cluster.conf and stores it in an internal database, in the same schema as it finds in cluster.conf.
The second plugin, 'cmanpreconfig', takes the information in that the database, adds several cman defaults, determines
the OpenAIS node name and nodeID
and formats the information in a similar manner to openais.conf(5). OpenAIS then reads those keys to start the cluster protocol.
cmanpreconfig also reads several environment variables that might be set by cman_tool which can override information in the
configuration.
.br
In the absence of ccsconfig, ie when 'cman_tool join' is run with -X switch (this removes ccsconfig from the module list),
cmanpreconfig also generates several defaults so that the cluster can be got running without any configuration information - see above
for the details.
.br
Note that cmanpreconfig will not overwrite OpenAIS keys that are explicitly set in the configuration file, allowing you to provide
custom values for token timeouts etc, even though cman has its own defaults for some of those values. The exception to this is the node
name/address and multicast values, which are always taken from the cman configuration keys.

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 11:04 AM (1 d, 18 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018597
Default Alt Text
(82 KB)

Event Timeline