Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/server/checkpoint.c b/server/checkpoint.c
index edf7728a..5134b4fe 100644
--- a/server/checkpoint.c
+++ b/server/checkpoint.c
@@ -1,840 +1,824 @@
/*
Copyright Red Hat, Inc. 2009
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
MA 02139, USA.
*/
/*
* Author: Lon Hohberger <lhh at redhat.com>
*/
#include <config.h>
#include <stdio.h>
#include <simpleconfig.h>
#include <static_map.h>
#include <sys/types.h>
#include <stdint.h>
#include <time.h>
#include <server_plugin.h>
#include <string.h>
#include <malloc.h>
#include <syslog.h>
#include <errno.h>
#include <unistd.h>
#include <libvirt/libvirt.h>
#include <pthread.h>
#ifdef HAVE_OPENAIS_CPG_H
#include <openais/cpg.h>
#else
#ifdef HAVE_COROSYNC_CPG_H
#include <corosync/cpg.h>
#endif
#endif
#include <libcman.h>
#include <debug.h>
#include "virt.h"
#include "xvm.h"
#include "checkpoint.h"
#define NAME "checkpoint"
#define VERSION "0.8"
#define MAGIC 0x1e017afe
struct check_info {
int magic;
int pad;
};
#define VALIDATE(arg) \
do {\
if (!arg || ((struct check_info *)arg)->magic != MAGIC) { \
errno = EINVAL;\
return -1; \
} \
} while(0)
static void *checkpoint_handle = NULL;
static virt_list_t *local_vms = NULL;
static char *uri = NULL;
static int use_uuid = 0;
static int
virt_list_update(virConnectPtr vp, virt_list_t **vl, int my_id)
{
virt_list_t *list = NULL;
-
- list = vl_get(vp, my_id);
- if (!list)
- return -1;
-
if (*vl)
vl_free(*vl);
+ list = vl_get(vp, my_id);
*vl = list;
- return 0;
-}
-
-
-static int
-get_cman_ids(cman_handle_t ch, uint32_t *my_id, uint32_t *high_id)
-{
- int max_nodes;
- int actual;
- cman_node_t *nodes = NULL;
- cman_node_t me;
- uint32_t high = 0;
- int ret = -1, x, _local = 0;
-
- if (!my_id && !high_id)
- return 0;
-
- if (!ch) {
- _local = 1;
- ch = cman_init(NULL);
- }
- if (!ch)
+ if (!list)
return -1;
-
- max_nodes = cman_get_node_count(ch);
- if (max_nodes <= 0)
- goto out;
-
- if (my_id) {
- memset(&me, 0, sizeof(me));
- if (cman_get_node(ch, CMAN_NODEID_US, &me) < 0)
- goto out;
- *my_id = me.cn_nodeid;
- }
-
- if (!high_id) {
- ret = 0;
- goto out;
- }
-
- nodes = malloc(sizeof(cman_node_t) * max_nodes);
- if (!nodes)
- goto out;
- memset(nodes, 0, sizeof(cman_node_t) * max_nodes);
-
- if (cman_get_nodes(ch, max_nodes, &actual, nodes) < 0)
- goto out;
-
- for (x = 0; x < actual; x++)
- if (nodes[x].cn_nodeid > high && nodes[x].cn_member)
- high = nodes[x].cn_nodeid;
-
- *high_id = high;
-
- ret = 0;
-out:
- if (nodes)
- free(nodes);
- if (ch && _local)
- cman_finish(ch);
- return ret;
+ return 0;
}
static int
node_operational(uint32_t nodeid)
{
cman_handle_t ch;
cman_node_t node;
ch = cman_init(NULL);
if (!ch)
return -1;
memset(&node, 0, sizeof(node));
if (cman_get_node(ch, nodeid, &node) == 0) {
cman_finish(ch);
return !!node.cn_member;
}
cman_finish(ch);
return 0;
}
static int
get_domain_state_ckpt(void *hp, const char *domain, vm_state_t *state)
{
errno = EINVAL;
if (!hp || !domain || !state || !strlen((char *)domain))
return -1;
if (!strcmp(DOMAIN0NAME, (char *)domain))
return -1;
return ckpt_read(hp, domain, state, sizeof(*state));
}
static inline int
wait_domain(const char *vm_name, virConnectPtr vp, int timeout)
{
int tries = 0;
int response = 1;
int ret;
virDomainPtr vdp;
virDomainInfo vdi;
if (use_uuid) {
vdp = virDomainLookupByUUIDString(vp, (const char *)vm_name);
} else {
vdp = virDomainLookupByName(vp, vm_name);
}
if (!vdp)
return 0;
/* Check domain liveliness. If the domain is still here,
we return failure, and the client must then retry */
/* XXX On the xen 3.0.4 API, we will be able to guarantee
synchronous virDomainDestroy, so this check will not
be necessary */
do {
if (++tries > timeout)
break;
sleep(1);
if (use_uuid) {
vdp = virDomainLookupByUUIDString(vp,
(const char *)vm_name);
} else {
vdp = virDomainLookupByName(vp, vm_name);
}
if (!vdp) {
dbg_printf(2, "Domain no longer exists\n");
response = 0;
break;
}
memset(&vdi, 0, sizeof(vdi));
ret = virDomainGetInfo(vdp, &vdi);
virDomainFree(vdp);
if (ret < 0)
continue;
if (vdi.state == VIR_DOMAIN_SHUTOFF) {
dbg_printf(2, "Domain has been shut off\n");
response = 0;
break;
}
dbg_printf(4, "Domain still exists (state %d) "
"after %d seconds\n",
vdi.state, tries);
} while (1);
return response;
}
/*
Returns: 0 - operational
1 - dead or presumed so
2 - VM not local and I am not the right node to deal with it
3 - VM status unknown; cannot operate on it
*/
static int
cluster_virt_status(const char *vm_name, uint32_t *owner)
{
- vm_state_t chk_state;
+ vm_state_t chk_state, temp_state;
virt_state_t *vs;
uint32_t me, high_id;
int ret = 0;
dbg_printf(80, "%s %s\n", __FUNCTION__, vm_name);
/* if we can't find the high ID, we can't do anything useful */
- /* This should be cpg_get_ids() but it's segfaulting for some
- reason :( */
- if (get_cman_ids(NULL, &me, &high_id) != 0)
+ if (cpg_get_ids(&me, &high_id) != 0)
return 2;
if (use_uuid) {
vs = vl_find_uuid(local_vms, vm_name);
} else {
vs = vl_find_name(local_vms, vm_name);
}
if (!vs) {
ret = 2; /* not found locally */
+ temp_state.s_owner = 0;
+ temp_state.s_state = 0;
+
+ if (get_domain_state_ckpt(checkpoint_handle,
+ vm_name, &chk_state) < 0) {
+ if (me == high_id) {
+ dbg_printf(2, "High ID: Unknown VM\n");
+ ret = 3;
+ goto out;
+ }
+ } else if (me == chk_state.s_owner) {
+ /* <UVT> If domain has disappeared completely from libvirt (i.e., destroyed)
+ we'd end up with the checkpoing section containing its last state and last owner.
+ fence_virtd will freeze at the next status call, as no one will be willing to
+ return anything but 2. So we should delete corresponding section, but only if
+ we are high_id, because otherwise we don't know if the domain hasn't been started
+ on some other node. If checkpoint states us as an owner of the domain, but we
+ don't have it, we set s_state to a special value to let high_id know about
+ this situation. </UVT> */
+ dbg_printf(2, "I am an owner of unexisting domain, mangling field\n");
+ temp_state.s_owner = me;
+ temp_state.s_state = -1;
+ if (ckpt_write(checkpoint_handle, vm_name,
+ &temp_state, sizeof(vm_state_t)) < 0)
+ dbg_printf(2, "error storing in %s\n", __FUNCTION__);
+ }
if (me != high_id)
goto out;
- if (get_domain_state_ckpt(checkpoint_handle,
- vm_name, &chk_state)) {
- dbg_printf(2, "High ID: Unknown VM\n");
- ret = 3;
+ if ((chk_state.s_state == -1) || (temp_state.s_state == -1)) {
+ dbg_printf(2, "I am high id and state field is mangled, removing section\n");
+ ckpt_erase (checkpoint_handle, vm_name);
+ ret = 1;
goto out;
}
if (node_operational(chk_state.s_owner)) {
*owner = chk_state.s_owner;
dbg_printf(2, "High ID: Owner is operational\n");
ret = 2;
} else {
dbg_printf(2, "High ID: Owner is dead; returning 'off'\n");
ret = 1;
}
} else if (vs->v_state.s_state == VIR_DOMAIN_SHUTOFF) {
ret = 1; /* local and off */
}
out:
- dbg_printf(80, "%s %s\n", __FUNCTION__, vm_name);
+ dbg_printf(80, "%s %s %d\n", __FUNCTION__, vm_name, ret);
return ret;
}
static void
store_domains_by_name(void *hp, virt_list_t *vl)
{
int x;
if (!vl)
return;
for (x = 0; x < vl->vm_count; x++) {
if (!strcmp(DOMAIN0NAME, vl->vm_states[x].v_name))
continue;
dbg_printf(2, "Storing %s\n", vl->vm_states[x].v_name);
- ckpt_write(hp, vl->vm_states[x].v_name,
+ if (ckpt_write(hp, vl->vm_states[x].v_name,
&vl->vm_states[x].v_state,
- sizeof(vm_state_t));
+ sizeof(vm_state_t)) < 0)
+ dbg_printf(2, "error storing in %s\n", __FUNCTION__);
}
}
static void
store_domains_by_uuid(void *hp, virt_list_t *vl)
{
int x;
if (!vl)
return;
for (x = 0; x < vl->vm_count; x++) {
if (!strcmp(DOMAIN0UUID, vl->vm_states[x].v_uuid))
continue;
dbg_printf(2, "Storing %s\n", vl->vm_states[x].v_uuid);
- ckpt_write(hp, vl->vm_states[x].v_uuid,
+ if (ckpt_write(hp, vl->vm_states[x].v_uuid,
&vl->vm_states[x].v_state,
- sizeof(vm_state_t));
+ sizeof(vm_state_t)) < 0)
+ dbg_printf(2, "error storing in %s\n", __FUNCTION__);
}
}
static void
update_local_vms(void)
{
virConnectPtr vp = NULL;
uint32_t my_id = 0;
cpg_get_ids(&my_id, NULL);
vp = virConnectOpen(uri);
if (!vp) {
syslog(LOG_ERR, "Failed to connect to hypervisor\n");
}
virt_list_update(vp, &local_vms, my_id);
vl_print(local_vms);
if (use_uuid)
store_domains_by_uuid(checkpoint_handle, local_vms);
else
store_domains_by_name(checkpoint_handle, local_vms);
- virConnectClose(vp);
+ if (vp) virConnectClose(vp);
}
+/* <UVT>
+ Functions do_off and do_reboot should return error only if fencing
+ was actualy unsuccessful, i.e., domain was running and is still
+ running after fencing attempt. If domain is not running after fencing
+ (did not exist before or couldn't be started after), 0 should be returned
+ </UVT> */
static int
do_off(const char *vm_name)
{
virConnectPtr vp;
virDomainPtr vdp;
virDomainInfo vdi;
int ret = -1;
dbg_printf(5, "%s %s\n", __FUNCTION__, vm_name);
vp = virConnectOpen(uri);
if (!vp)
return 1;
if (use_uuid) {
vdp = virDomainLookupByUUIDString(vp,
(const char *)vm_name);
} else {
vdp = virDomainLookupByName(vp, vm_name);
}
if (!vdp) {
dbg_printf(2, "Nothing to do - domain does not exist\n");
- return 1;
+ return 0;
}
if (((virDomainGetInfo(vdp, &vdi) == 0) &&
(vdi.state == VIR_DOMAIN_SHUTOFF))) {
dbg_printf(2, "Nothing to do - domain is off\n");
virDomainFree(vdp);
return 0;
}
syslog(LOG_NOTICE, "Destroying domain %s\n", vm_name);
dbg_printf(2, "[OFF] Calling virDomainDestroy\n");
ret = virDomainDestroy(vdp);
if (ret < 0) {
syslog(LOG_NOTICE, "Failed to destroy domain: %d\n", ret);
printf("virDomainDestroy() failed: %d\n", ret);
ret = 1;
goto out;
}
if (ret) {
syslog(LOG_NOTICE,
"Domain %s still exists; fencing failed\n",
vm_name);
printf("Domain %s still exists; fencing failed\n", vm_name);
ret = 1;
goto out;
}
ret = 0;
out:
virConnectClose(vp);
return ret;
}
static int
do_reboot(const char *vm_name)
{
virConnectPtr vp;
virDomainPtr vdp, nvdp;
virDomainInfo vdi;
char *domain_desc;
int ret;
//uuid_unparse(vm_uuid, uu_string);
dbg_printf(5, "%s %s\n", __FUNCTION__, vm_name);
vp = virConnectOpen(uri);
if (!vp)
return 1;
if (use_uuid) {
vdp = virDomainLookupByUUIDString(vp,
(const char *)vm_name);
} else {
vdp = virDomainLookupByName(vp, vm_name);
}
if (!vdp) {
dbg_printf(2, "[libvirt:REBOOT] Nothing to "
"do - domain does not exist\n");
- return 1;
+ return 0;
}
if (((virDomainGetInfo(vdp, &vdi) == 0) &&
(vdi.state == VIR_DOMAIN_SHUTOFF))) {
dbg_printf(2, "[libvirt:REBOOT] Nothing to "
"do - domain is off\n");
virDomainFree(vdp);
return 0;
}
syslog(LOG_NOTICE, "Rebooting domain %s\n", vm_name);
printf("Rebooting domain %s...\n", vm_name);
domain_desc = virDomainGetXMLDesc(vdp, 0);
if (!domain_desc) {
printf("Failed getting domain description from "
"libvirt\n");
}
dbg_printf(2, "[REBOOT] Calling virDomainDestroy(%p)\n", vdp);
ret = virDomainDestroy(vdp);
if (ret < 0) {
printf("virDomainDestroy() failed: %d/%d\n", ret, errno);
free(domain_desc);
virDomainFree(vdp);
ret = 1;
goto out;
}
ret = wait_domain(vm_name, vp, 15);
if (ret) {
syslog(LOG_NOTICE, "Domain %s still exists; fencing failed\n",
vm_name);
printf("Domain %s still exists; fencing failed\n", vm_name);
if (domain_desc)
free(domain_desc);
ret = 1;
goto out;
}
if (!domain_desc) {
ret = 0;
goto out;
}
/* 'on' is not a failure */
ret = 0;
dbg_printf(3, "[[ XML Domain Info ]]\n");
dbg_printf(3, "%s\n[[ XML END ]]\n", domain_desc);
dbg_printf(2, "Calling virDomainCreateLinux()...\n");
nvdp = virDomainCreateLinux(vp, domain_desc, 0);
if (nvdp == NULL) {
/* More recent versions of libvirt or perhaps the
* KVM back-end do not let you create a domain from
* XML if there is already a defined domain description
* with the same name that it knows about. You must
* then call virDomainCreate() */
dbg_printf(2, "Failed; Trying virDomainCreate()...\n");
if (virDomainCreate(vdp) < 0) {
syslog(LOG_NOTICE,
"Could not restart %s\n",
vm_name);
dbg_printf(1, "Failed to recreate guest"
" %s!\n", vm_name);
}
}
free(domain_desc);
out:
virConnectClose(vp);
return ret;
}
+/*<UVT> This function must send reply from at least one node, otherwise
+ requesting fence_virtd would block forever in wait_cpt_reply </UVT> */
static void
do_real_work(void *data, size_t len, uint32_t nodeid, uint32_t seqno)
{
struct ckpt_fence_req *req = data;
struct ckpt_fence_req reply;
uint32_t owner;
int ret = 1;
memcpy(&reply, req, sizeof(reply));
update_local_vms();
switch(req->request) {
case FENCE_STATUS:
ret = cluster_virt_status(req->vm_name, &owner);
if (ret == 3) {
ret = RESP_OFF;
break;
}
if (ret == 2) {
return;
}
if (ret == 1) {
ret = RESP_OFF;
}
break;
case FENCE_OFF:
ret = cluster_virt_status(req->vm_name, &owner);
if (ret == 3) {
/* No record of this VM in the checkpoint. */
ret = 0;
break;
}
- if (ret != 0) {
+ if (ret == 2) {
return;
}
+ if (ret == 1) {
+ ret = 0;
+ break;
+ }
/* Must be running locally to perform 'off' */
ret = do_off(req->vm_name);
break;
case FENCE_REBOOT:
ret = cluster_virt_status(req->vm_name, &owner);
- if (ret != 0) {
+ if (ret == 3) {
+ ret = 0;
+ break;
+ }
+ if (ret == 2) {
return;
}
+ if (ret == 1) {
+ ret = 0;
+ break;
+ }
/* Must be running locally to perform 'reboot' */
ret = do_reboot(req->vm_name);
break;
}
reply.response = ret;
cpg_send_reply(&reply, sizeof(reply), nodeid, seqno);
}
static int
do_request(const char *vm_name, int request, uint32_t seqno)
{
struct ckpt_fence_req freq, *frp;
size_t retlen;
uint32_t seq;
int ret;
memset(&freq, 0, sizeof(freq));
snprintf(freq.vm_name, sizeof(freq.vm_name), vm_name);
freq.request = request;
freq.seqno = seqno;
if (cpg_send_req(&freq, sizeof(freq), &seq) != 0) {
printf("Failed to send\n");
return 1;
}
if (cpg_wait_reply((void *)&frp, &retlen, seq) != 0) {
printf("Failed to receive\n");
return 1;
}
ret = frp->response;
free(frp);
return ret;
}
static int
checkpoint_null(const char *vm_name, void *priv)
{
VALIDATE(priv);
printf("[CKPT] Null operation on %s\n", vm_name);
return 1;
}
static int
checkpoint_off(const char *vm_name, const char *src,
uint32_t seqno, void *priv)
{
VALIDATE(priv);
printf("[CKPT] OFF operation on %s seq %d\n", vm_name, seqno);
return do_request(vm_name, FENCE_OFF, seqno);
}
static int
checkpoint_on(const char *vm_name, const char *src,
uint32_t seqno, void *priv)
{
VALIDATE(priv);
printf("[CKPT] ON operation on %s seq %d\n", vm_name, seqno);
return 1;
}
static int
checkpoint_devstatus(void *priv)
{
printf("[CKPT] Device status\n");
VALIDATE(priv);
return 0;
}
static int
checkpoint_status(const char *vm_name, void *priv)
{
VALIDATE(priv);
printf("[CKPT] STATUS operation on %s\n", vm_name);
return do_request(vm_name, FENCE_STATUS, 0);
}
static int
checkpoint_reboot(const char *vm_name, const char *src,
uint32_t seqno, void *priv)
{
VALIDATE(priv);
printf("[CKPT] REBOOT operation on %s seq %d\n", vm_name, seqno);
return do_request(vm_name, FENCE_REBOOT, 0);
}
static int
checkpoint_hostlist(hostlist_callback callback, void *arg, void *priv)
{
VALIDATE(priv);
printf("[CKPT] HOSTLIST operation\n");
return 1;
}
static int
checkpoint_init(backend_context_t *c, config_object_t *config)
{
char value[1024];
struct check_info *info = NULL;
int x;
#ifdef _MODULE
if (sc_get(config, "fence_virtd/@debug", value, sizeof(value))==0)
dset(atoi(value));
#endif
if (sc_get(config, "backends/libvirt/@uri",
value, sizeof(value)) == 0) {
uri = strdup(value);
if (!uri) {
free(info);
return -1;
}
dbg_printf(1, "Using %s\n", uri);
}
if (sc_get(config, "backends/checkpoint/@uri",
value, sizeof(value)) == 0) {
if (uri)
free(uri);
uri = strdup(value);
if (!uri) {
free(info);
return -1;
}
dbg_printf(1, "Using %s\n", uri);
}
/* Naming scheme is no longer a top-level config option.
* However, we retain it here for configuration compatibility with
* versions 0.1.3 and previous.
*/
if (sc_get(config, "fence_virtd/@name_mode",
value, sizeof(value)-1) == 0) {
dbg_printf(1, "Got %s for name_mode\n", value);
if (!strcasecmp(value, "uuid")) {
use_uuid = 1;
} else if (!strcasecmp(value, "name")) {
use_uuid = 0;
} else {
dbg_printf(1, "Unsupported name_mode: %s\n", value);
}
}
if (sc_get(config, "backends/checkpoint/@name_mode",
value, sizeof(value)-1) == 0) {
dbg_printf(1, "Got %s for name_mode\n", value);
if (!strcasecmp(value, "uuid")) {
use_uuid = 1;
} else if (!strcasecmp(value, "name")) {
use_uuid = 0;
} else {
dbg_printf(1, "Unsupported name_mode: %s\n", value);
}
}
if (cpg_start(PACKAGE_NAME, do_real_work) < 0) {
return -1;
}
info = malloc(sizeof(*info));
if (!info)
return -1;
memset(info, 0, sizeof(*info));
info->magic = MAGIC;
x = 0;
while ((checkpoint_handle = ckpt_init(
"vm_states", 262144, 4096, 64, 10
)) == NULL) {
if (!x) {
dbg_printf(1, "Could not initialize "
"saCkPt; retrying...\n");
x = 1;
}
sleep(3);
}
if (x)
dbg_printf(1, "Checkpoint initialized\n");
update_local_vms();
*c = (void *)info;
return 0;
}
static int
checkpoint_shutdown(backend_context_t c)
{
struct check_info *info = (struct check_info *)c;
VALIDATE(info);
info->magic = 0;
free(info);
cpg_stop();
return 0;
}
static fence_callbacks_t checkpoint_callbacks = {
.null = checkpoint_null,
.off = checkpoint_off,
.on = checkpoint_on,
.reboot = checkpoint_reboot,
.status = checkpoint_status,
.devstatus = checkpoint_devstatus,
.hostlist = checkpoint_hostlist
};
static backend_plugin_t checkpoint_plugin = {
.name = NAME,
.version = VERSION,
.callbacks = &checkpoint_callbacks,
.init = checkpoint_init,
.cleanup = checkpoint_shutdown,
};
#ifdef _MODULE
double
BACKEND_VER_SYM(void)
{
return PLUGIN_VERSION_BACKEND;
}
const backend_plugin_t *
BACKEND_INFO_SYM(void)
{
return &checkpoint_plugin;
}
#else
static void __attribute__((constructor))
checkpoint_register_plugin(void)
{
plugin_reg_backend(&checkpoint_plugin);
}
#endif
diff --git a/server/cpg.c b/server/cpg.c
index 3fc687cc..d6bba1ee 100644
--- a/server/cpg.c
+++ b/server/cpg.c
@@ -1,424 +1,450 @@
#include <config.h>
#include <stdio.h>
#include <sys/types.h>
#include <stdint.h>
#include <malloc.h>
#include <signal.h>
#include <unistd.h>
#include <sys/select.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <sys/uio.h>
#include <list.h>
#include <pthread.h>
#ifdef HAVE_OPENAIS_CPG_H
#include <openais/cpg.h>
#else
#ifdef HAVE_COROSYNC_CPG_H
#include <corosync/cpg.h>
#endif
#endif
#include "checkpoint.h"
#define NODE_ID_NONE ((uint32_t)-1)
struct msg_queue_node {
list_head();
uint32_t seqno;
#define STATE_CLEAR 0
#define STATE_MESSAGE 1
uint32_t state;
void *msg;
size_t msglen;
};
struct wire_msg {
#define TYPE_REQUEST 0
#define TYPE_REPLY 1
uint32_t type;
uint32_t seqno;
uint32_t target;
uint32_t pad;
char data[0];
};
static uint32_t seqnum = 0, my_node_id = NODE_ID_NONE;
+static uint32_t high_id_from_callback = NODE_ID_NONE;
static struct msg_queue_node *pending= NULL;
static cpg_handle_t cpg_handle;
static struct cpg_name gname;
static pthread_mutex_t cpg_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t cpg_cond = PTHREAD_COND_INITIALIZER;
static pthread_t cpg_thread = 0;
static request_callback_fn req_callback_fn;
-
+/* <UVT> function cpg_membership_get is (probably) buggy and returns correct
+count only before cpg_mcast_joined, subsequent calls set count to 0 </UVT> */
+#if 0
int
cpg_get_ids(uint32_t *my_id, uint32_t *high_id)
{
-#if 0
/* This is segfaulting for some reason */
struct cpg_address cpg_nodes[CPG_MEMBERS_MAX];
uint32_t high = my_node_id;
int count = CPG_MEMBERS_MAX, x;
if (!my_id && !high_id)
return 0;
if (my_id)
*my_id = my_node_id;
if (!high_id)
return 0;
memset(&cpg_nodes, 0, sizeof(cpg_nodes));
if (cpg_membership_get(cpg_handle, &gname,
cpg_nodes, &count) != CPG_OK)
return -1;
for (x = 0; x < count; x++) {
if (cpg_nodes[x].nodeid > high) {
high = cpg_nodes[x].nodeid;
}
}
*high_id = high;
return 0;
-#endif
- return -ENOSYS;
}
+#endif
+int
+cpg_get_ids(uint32_t *my_id, uint32_t *high_id)
+{
+ if (!my_id && !high_id)
+ return 0;
+ if (my_id)
+ *my_id = my_node_id;
+ if (!high_id)
+ return 0;
+
+ *high_id = high_id_from_callback;
+
+ return 0;
+}
void
#ifdef HAVE_OPENAIS_CPG_H
cpg_deliver_func(cpg_handle_t h,
struct cpg_name *group_name,
uint32_t nodeid,
uint32_t pid,
void *msg,
int msglen)
#else
cpg_deliver_func(cpg_handle_t h,
const struct cpg_name *group_name,
uint32_t nodeid,
uint32_t pid,
void *msg,
size_t msglen)
#endif
{
struct msg_queue_node *n;
struct wire_msg *m = msg;
int x, found;
pthread_mutex_lock(&cpg_mutex);
if (m->type == TYPE_REPLY) {
/* Reply to a request we sent */
found = 0;
list_for(&pending, n, x) {
if (m->seqno != n->seqno)
continue;
if (m->target != my_node_id)
continue;
found = 1;
break;
}
if (!found)
goto out_unlock;
/* Copy our message in to a buffer */
n->msglen = msglen - sizeof(*m);
if (!n->msglen) {
/* XXX do what? */
}
n->msg = malloc(n->msglen);
if (!n->msg) {
goto out_unlock;
}
n->state = STATE_MESSAGE;
memcpy(n->msg, (char *)msg + sizeof(*m), n->msglen);
list_remove(&pending, n);
list_insert(&pending, n);
#if 0
printf("Seqnum %d replied; removing from list",
n->seqno);
#endif
pthread_cond_broadcast(&cpg_cond);
goto out_unlock;
}
pthread_mutex_unlock(&cpg_mutex);
if (m->type == TYPE_REQUEST) {
req_callback_fn(&m->data, msglen - sizeof(*m),
nodeid, m->seqno);
}
return;
out_unlock:
pthread_mutex_unlock(&cpg_mutex);
}
+
void
#ifdef HAVE_OPENAIS_CPG_H
cpg_config_change(cpg_handle_t h,
struct cpg_name *group_name,
struct cpg_address *members, int memberlen,
struct cpg_address *left, int leftlen,
struct cpg_address *join, int joinlen)
#else
cpg_config_change(cpg_handle_t h,
const struct cpg_name *group_name,
const struct cpg_address *members, size_t memberlen,
const struct cpg_address *left, size_t leftlen,
const struct cpg_address *join, size_t joinlen)
#endif
{
- /* Don't care */
+ int x;
+ int high = my_node_id;
+
+ for (x = 0; x < memberlen; x++) {
+ if (members[x].nodeid > high) {
+ high = members[x].nodeid;
+ }
+ }
+
+ high_id_from_callback = high;
+
return;
}
static cpg_callbacks_t my_callbacks = {
.cpg_deliver_fn = cpg_deliver_func,
.cpg_confchg_fn = cpg_config_change
};
int
cpg_send_req(void *data, size_t len, uint32_t *seqno)
{
struct iovec iov;
struct msg_queue_node *n;
struct wire_msg *m;
size_t msgsz = sizeof(*m) + len;
int ret;
n = malloc(sizeof(*n));
if (!n)
return -1;
m = malloc(msgsz);
if (!m)
return -1;
/* only incremented on send */
n->state = STATE_CLEAR;
n->msg = NULL;
n->msglen = 0;
pthread_mutex_lock(&cpg_mutex);
list_insert(&pending, n);
n->seqno = ++seqnum;
m->seqno = seqnum;
*seqno = seqnum;
pthread_mutex_unlock(&cpg_mutex);
m->type = TYPE_REQUEST; /* XXX swab? */
m->target = NODE_ID_NONE;
memcpy(&m->data, data, len);
iov.iov_base = m;
iov.iov_len = msgsz;
ret = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, &iov, 1);
free(m);
if (ret == CPG_OK)
return 0;
return -1;
}
int
cpg_send_reply(void *data, size_t len, uint32_t nodeid,
uint32_t seqno)
{
struct iovec iov;
struct wire_msg *m;
size_t msgsz = sizeof(*m) + len;
int ret;
m = malloc(msgsz);
if (!m)
return -1;
/* only incremented on send */
m->seqno = seqno;
m->type = TYPE_REPLY; /* XXX swab? */
m->target = nodeid;
memcpy(&m->data, data, len);
iov.iov_base = m;
iov.iov_len = msgsz;
ret = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, &iov, 1);
free(m);
if (ret == CPG_OK)
return 0;
return -1;
}
int
cpg_wait_reply(void **data, size_t *len, uint32_t seqno)
{
struct msg_queue_node *n;
int x, found = 0;
while (!found) {
found = 0;
pthread_mutex_lock(&cpg_mutex);
pthread_cond_wait(&cpg_cond, &cpg_mutex);
list_for(&pending, n, x) {
if (n->seqno != seqno)
continue;
if (n->state != STATE_MESSAGE)
continue;
found = 1;
break;
}
pthread_mutex_unlock(&cpg_mutex);
}
list_remove(&pending, n);
pthread_mutex_unlock(&cpg_mutex);
*data = n->msg;
*len = n->msglen;
free(n);
return 0;
}
static void *
cpg_dispatch_thread(void *arg)
{
cpg_dispatch(cpg_handle, CPG_DISPATCH_BLOCKING);
return NULL;
}
int
cpg_start(const char *name, request_callback_fn func)
{
cpg_handle_t h;
errno = EINVAL;
if (!name)
return -1;
gname.length = snprintf(gname.value,
sizeof(gname.value), name);
if (gname.length >= sizeof(gname.value)) {
errno = ENAMETOOLONG;
return -1;
}
if (gname.length <= 0)
return -1;
memset(&h, 0, sizeof(h));
if (cpg_initialize(&h, &my_callbacks) != CPG_OK) {
perror("cpg_initialize");
return -1;
}
if (cpg_join(h, &gname) != CPG_OK) {
perror("cpg_join");
return -1;
}
pthread_mutex_lock(&cpg_mutex);
cpg_local_get(h, &my_node_id);
pthread_create(&cpg_thread, NULL, cpg_dispatch_thread, NULL);
memcpy(&cpg_handle, &h, sizeof(h));
req_callback_fn = func;
pthread_mutex_unlock(&cpg_mutex);
return 0;
}
int
cpg_stop(void)
{
pthread_cancel(cpg_thread);
pthread_join(cpg_thread, NULL);
cpg_leave(cpg_handle, &gname);
cpg_finalize(cpg_handle);
return 0;
}
#ifdef STANDALONE
int please_quit = 0;
void
go_away(int sig)
{
please_quit = 1;
}
void
request_callback(void *data, size_t len, uint32_t nodeid, uint32_t seqno)
{
char *msg = data;
printf("msg = %s\n", msg);
cpg_send_reply("fail.", 7, nodeid, seqno);
}
int
main(int argc, char **argv)
{
uint32_t seqno = 0;
int fd;
char *data;
size_t len;
signal(SIGINT, go_away);
if (cpg_start("lhh1", request_callback) < 0) {
perror("cpg_start");
return 1;
}
cpg_send_req("hi", 2, &seqno);
cpg_wait_reply(&data, &len, seqno);
printf("%s\n", data);
printf("going bye\n");
cpg_stop();
return 0;
}
#endif
diff --git a/server/virt-serial.c b/server/virt-serial.c
index fff38ef0..25281530 100644
--- a/server/virt-serial.c
+++ b/server/virt-serial.c
@@ -1,572 +1,440 @@
// #include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <pthread.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/poll.h>
#include <libvirt/libvirt.h>
#include <libxml/xmlreader.h>
#include <simpleconfig.h>
#include "debug.h"
#define DEBUG0(fmt) dbg_printf(5,"%s:%d :: " fmt "\n", \
__func__, __LINE__)
#define DEBUG1(fmt, ...) dbg_printf(5, "%s:%d: " fmt "\n", \
__func__, __LINE__, __VA_ARGS__)
#include "serial.h"
#define STREQ(a,b) (strcmp((a),(b)) == 0)
-/* handle globals */
-static int h_fd = -1;
-static virEventHandleType h_event = 0;
-static virEventHandleCallback h_cb = NULL;
-static virFreeCallback h_ff = NULL;
-static void *h_opaque = NULL;
-
-/* timeout globals */
-#define TIMEOUT_MS 1000
-static int t_active = 0;
-static int t_timeout = -1;
-static virEventTimeoutCallback t_cb = NULL;
-static virFreeCallback t_ff = NULL;
-static void *t_opaque = NULL;
static pthread_t event_tid = 0;
static int run = 0;
/* Prototypes */
const char *eventToString(int event);
int myDomainEventCallback1(virConnectPtr conn, virDomainPtr dom,
int event, int detail, void *opaque);
-int myEventAddHandleFunc(int fd, int event,
- virEventHandleCallback cb,
- void *opaque, virFreeCallback ff);
-void myEventUpdateHandleFunc(int watch, int event);
-int myEventRemoveHandleFunc(int watch);
-
-int myEventAddTimeoutFunc(int timeout,
- virEventTimeoutCallback cb,
- void *opaque, virFreeCallback ff);
-void myEventUpdateTimeoutFunc(int timer, int timout);
-int myEventRemoveTimeoutFunc(int timer);
-
-int myEventHandleTypeToPollEvent(virEventHandleType events);
-virEventHandleType myPollEventToEventHandleType(int events);
void usage(const char *pname);
struct domain_info {
virDomainPtr dom;
virDomainEventType event;
};
-
-/* EventImpl Functions */
-int
-myEventHandleTypeToPollEvent(virEventHandleType events)
-{
- int ret = 0;
- if (events & VIR_EVENT_HANDLE_READABLE)
- ret |= POLLIN;
- if (events & VIR_EVENT_HANDLE_WRITABLE)
- ret |= POLLOUT;
- if (events & VIR_EVENT_HANDLE_ERROR)
- ret |= POLLERR;
- if (events & VIR_EVENT_HANDLE_HANGUP)
- ret |= POLLHUP;
- return ret;
-}
-
-virEventHandleType
-myPollEventToEventHandleType(int events)
-{
- virEventHandleType ret = 0;
- if (events & POLLIN)
- ret |= VIR_EVENT_HANDLE_READABLE;
- if (events & POLLOUT)
- ret |= VIR_EVENT_HANDLE_WRITABLE;
- if (events & POLLERR)
- ret |= VIR_EVENT_HANDLE_ERROR;
- if (events & POLLHUP)
- ret |= VIR_EVENT_HANDLE_HANGUP;
-
- return ret;
-}
-
-int
-myEventAddHandleFunc(int fd, int event,
- virEventHandleCallback cb,
- void *opaque, virFreeCallback ff)
-{
- DEBUG1("Add handle %d %d %p %p %p", fd, event, cb, opaque, ff);
- h_fd = fd;
- h_event = myEventHandleTypeToPollEvent(event);
- h_cb = cb;
- h_opaque = opaque;
- h_ff = ff;
- return 0;
-}
-
-void
-myEventUpdateHandleFunc(int fd, int event)
-{
- DEBUG1("Updated Handle %d %d", fd, event);
- h_event = myEventHandleTypeToPollEvent(event);
- return;
-}
-
-int
-myEventRemoveHandleFunc(int fd)
-{
- DEBUG1("Removed Handle %d", fd);
- h_fd = 0;
- if (h_ff)
- (h_ff) (h_opaque);
- return 0;
-}
-
-int
-myEventAddTimeoutFunc(int timeout,
- virEventTimeoutCallback cb,
- void *opaque, virFreeCallback ff)
-{
- DEBUG1("Adding Timeout %d %p %p", timeout, cb, opaque);
- t_active = 1;
- t_timeout = timeout;
- t_cb = cb;
- t_ff = ff;
- t_opaque = opaque;
- return 0;
-}
-
-void
-myEventUpdateTimeoutFunc(int timer, int timeout)
-{
- /*DEBUG1("Timeout updated %d %d", timer, timeout); */
- t_timeout = timeout;
-}
-
-int
-myEventRemoveTimeoutFunc(int timer)
-{
- DEBUG1("Timeout removed %d", timer);
- t_active = 0;
- if (t_ff)
- (t_ff) (t_opaque);
- return 0;
-}
-
-
static int
is_in_directory(const char *dir, const char *pathspec)
{
char *last_slash = NULL;
size_t dirlen, pathlen;
if (!dir || !pathspec)
return 0;
dirlen = strlen(dir);
pathlen = strlen(pathspec);
/*
printf("dirlen = %d pathlen = %d\n",
dirlen, pathlen);
*/
/* chop off trailing slashes */
while (dirlen && dir[dirlen-1]=='/')
--dirlen;
/* chop off leading slashes */
while (dirlen && dir[0] == '/') {
++dir;
--dirlen;
}
/* chop off leading slashes */
while (pathlen && pathspec[0] == '/') {
++pathspec;
--pathlen;
}
if (!dirlen || !pathlen)
return 0;
if (pathlen <= dirlen)
return 0;
last_slash = strrchr(pathspec, '/');
if (!last_slash)
return 0;
while (*last_slash == '/' && last_slash > pathspec)
--last_slash;
if (last_slash == pathspec)
return 0;
pathlen = last_slash - pathspec + 1;
/*printf("real dirlen = %d real pathlen = %d\n",
dirlen, pathlen);*/
if (pathlen != dirlen)
return 0;
/* todo - intelligently skip multiple slashes mid-path */
return !strncmp(dir, pathspec, dirlen);
}
static int
domainStarted(virDomainPtr mojaDomain, const char *path, int mode)
{
char dom_uuid[42];
char *xml;
xmlDocPtr doc;
xmlNodePtr cur, devices, child, serial;
xmlAttrPtr attr, attr_mode, attr_path;
if (!mojaDomain)
return -1;
virDomainGetUUIDString(mojaDomain, dom_uuid);
xml = virDomainGetXMLDesc(mojaDomain, 0);
// printf("%s\n", xml);
// @todo: free mojaDomain
// parseXML output
doc = xmlParseMemory(xml, strlen(xml));
xmlFree(xml);
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr, "Empty doc\n");
xmlFreeDoc(doc);
return -1;
}
if (xmlStrcmp(cur->name, (const xmlChar *) "domain")) {
fprintf(stderr, "no domain?\n");
xmlFreeDoc(doc);
return -1;
}
devices = cur->xmlChildrenNode;
for (devices = cur->xmlChildrenNode; devices != NULL;
devices = devices->next) {
if (xmlStrcmp(devices->name, (const xmlChar *) "devices")) {
continue;
}
for (child = devices->xmlChildrenNode; child != NULL;
child = child->next) {
if ((!mode && xmlStrcmp(child->name, (const xmlChar *) "serial")) ||
(mode && xmlStrcmp(child->name, (const xmlChar *) "channel"))) {
continue;
}
attr = xmlHasProp(child, (const xmlChar *)"type");
if (attr == NULL)
continue;
if (xmlStrcmp(attr->children->content,
(const xmlChar *) "unix")) {
continue;
}
for (serial = child->xmlChildrenNode; serial != NULL;
serial = serial->next) {
if (xmlStrcmp(serial->name,
(const xmlChar *) "source")) {
continue;
}
attr_mode = xmlHasProp(serial, (const xmlChar *)"mode");
attr_path = xmlHasProp(serial, (const xmlChar *)"path");
if (!attr_path || !attr_mode)
continue;
if (xmlStrcmp(attr_mode->children->content,
(const xmlChar *) "bind"))
continue;
if (path && !is_in_directory(path, (const char *)
attr_path->children->content))
continue;
domain_sock_setup(dom_uuid, (const char *)
attr_path->children->content);
}
}
}
xmlFreeDoc(doc);
return 0;
}
static int
registerExisting(virConnectPtr vp, const char *path, int mode)
{
int *d_ids = NULL;
int d_count, x;
virDomainPtr dom;
virDomainInfo d_info;
errno = EINVAL;
if (!vp)
return -1;
d_count = virConnectNumOfDomains(vp);
if (d_count <= 0) {
if (d_count == 0) {
/* Successful, but no domains running */
errno = 0;
return 0;
}
goto out_fail;
}
d_ids = malloc(sizeof (int) * d_count);
if (!d_ids)
goto out_fail;
if (virConnectListDomains(vp, d_ids, d_count) < 0)
goto out_fail;
/* Ok, we have the domain IDs - let's get their names and states */
for (x = 0; x < d_count; x++) {
dom = virDomainLookupByID(vp, d_ids[x]);
if (!dom) {
/* XXX doom */
goto out_fail;
}
if (virDomainGetInfo(dom, &d_info) < 0) {
/* XXX no info for the domain?!! */
virDomainFree(dom);
goto out_fail;
}
if (d_info.state != VIR_DOMAIN_SHUTOFF &&
d_info.state != VIR_DOMAIN_CRASHED)
domainStarted(dom, path, mode);
virDomainFree(dom);
}
out_fail:
free(d_ids);
return 0;
}
static int
domainStopped(virDomainPtr mojaDomain)
{
char dom_uuid[42];
if (!mojaDomain)
return -1;
virDomainGetUUIDString(mojaDomain, dom_uuid);
domain_sock_close(dom_uuid);
return 0;
}
struct event_args {
char *uri;
char *path;
int mode;
int wake_fd;
};
+void
+connectClose(virConnectPtr conn ATTRIBUTE_UNUSED,
+ int reason,
+ void *opaque ATTRIBUTE_UNUSED)
+{
+ switch (reason) {
+ case VIR_CONNECT_CLOSE_REASON_ERROR:
+ dbg_printf(2, "Connection closed due to I/O error\n");
+ break;
+ case VIR_CONNECT_CLOSE_REASON_EOF:
+ dbg_printf(2, "Connection closed due to end of file\n");
+ break;
+ case VIR_CONNECT_CLOSE_REASON_KEEPALIVE:
+ dbg_printf(2, "Connection closed due to keepalive timeout\n");
+ break;
+ case VIR_CONNECT_CLOSE_REASON_CLIENT:
+ dbg_printf(2, "Connection closed due to client request\n");
+ break;
+ default:
+ dbg_printf(2, "Connection closed due to unknown reason\n");
+ break;
+ };
+ run = 0;
+}
int
myDomainEventCallback1(virConnectPtr conn,
virDomainPtr dom, int event, int detail, void *opaque)
{
struct event_args *args = (struct event_args *)opaque;
if (event == VIR_DOMAIN_EVENT_STARTED ||
event == VIR_DOMAIN_EVENT_STOPPED) {
virDomainRef(dom);
if (event == VIR_DOMAIN_EVENT_STARTED) {
domainStarted(dom, args->path, args->mode);
virDomainFree(dom);
write(args->wake_fd, "x", 1);
} else if (event == VIR_DOMAIN_EVENT_STOPPED) {
domainStopped(dom);
virDomainFree(dom);
}
}
return 0;
}
static void *
event_thread(void *arg)
{
struct event_args *args = (struct event_args *)arg;
virConnectPtr dconn = NULL;
int callback1ret = -1;
- int sts;
dbg_printf(3, "Libvirt event listener starting\n");
if (args->uri)
dbg_printf(3," * URI: %s\n", args->uri);
if (args->path)
dbg_printf(3," * Socket path: %s\n", args->path);
dbg_printf(3," * Mode: %s\n", args->mode ? "VMChannel" : "Serial");
-top:
- virEventRegisterImpl(myEventAddHandleFunc,
- myEventUpdateHandleFunc,
- myEventRemoveHandleFunc,
- myEventAddTimeoutFunc,
- myEventUpdateTimeoutFunc,
- myEventRemoveTimeoutFunc);
+ if (virEventRegisterDefaultImpl() < 0) {
+ dbg_printf(1, "Failed to register default event impl\n");
+ goto out;
+ }
dconn = virConnectOpen(args->uri);
if (!dconn) {
dbg_printf(1, "Error connecting to libvirt\n");
goto out;
}
+ virConnectRegisterCloseCallback(dconn, connectClose, NULL, NULL);
+
DEBUG0("Registering domain event cbs");
registerExisting(dconn, args->path, args->mode);
callback1ret =
virConnectDomainEventRegister(dconn, myDomainEventCallback1, arg, NULL);
- if (callback1ret == 0) {
+ if (callback1ret != -1) {
+ if (virConnectSetKeepAlive(dconn, 5, 5) < 0) {
+ dbg_printf(1, "Failed to start keepalive protocol\n");
+ run = 0;
+ }
while (run) {
- struct pollfd pfd = {
- .fd = h_fd,
- .events = h_event,
- .revents = 0
- };
-
- sts = poll(&pfd, 1, TIMEOUT_MS);
- /* We are assuming timeout of 0 here - so execute every time */
- if (t_cb && t_active) {
- t_cb(t_timeout, t_opaque);
- }
-
- if (sts == 0) {
- /* DEBUG0("Poll timeout"); */
- continue;
- }
-
- if (sts < 0) {
- DEBUG0("Poll failed");
- continue;
- }
-
- if (pfd.revents & POLLHUP) {
- DEBUG0("Reset by peer");
- virConnectDomainEventDeregister(dconn, myDomainEventCallback1);
- if (dconn && virConnectClose(dconn) < 0)
- dbg_printf(1, "error closing libvirt connection\n");
- DEBUG0("Attempting to reinitialize libvirt connection");
- goto top;
- }
-
- if (h_cb) {
- h_cb(0, h_fd,
- myPollEventToEventHandleType(pfd.revents & h_event),
- h_opaque);
+ if (virEventRunDefaultImpl() < 0) {
+ dbg_printf(1, "RunDefaultImpl Failed\n");
}
}
DEBUG0("Deregistering event handlers");
virConnectDomainEventDeregister(dconn, myDomainEventCallback1);
}
DEBUG0("Closing connection");
if (dconn && virConnectClose(dconn) < 0) {
dbg_printf(1, "error closing libvirt connection\n");
}
out:
free(args->uri);
free(args->path);
free(args);
return NULL;
}
int
start_event_listener(const char *uri, const char *path, int mode, int *wake_fd)
{
struct event_args *args = NULL;
int wake_pipe[2];
virInitialize();
args = malloc(sizeof(*args));
if (!args)
return -1;
memset(args, 0, sizeof(*args));
if (pipe2(wake_pipe, O_CLOEXEC) < 0) {
goto out_fail;
}
if (uri) {
args->uri = strdup(uri);
if (args->uri == NULL)
goto out_fail;
}
if (path) {
args->path = strdup(path);
if (args->path == NULL)
goto out_fail;
}
args->mode = mode;
//args->p_tid = pthread_self();
*wake_fd = wake_pipe[0];
args->wake_fd = wake_pipe[1];
run = 1;
return pthread_create(&event_tid, NULL, event_thread, args);
out_fail:
free(args->uri);
free(args->path);
free(args);
return -1;
}
int
stop_event_listener(void)
{
run = 0;
//pthread_cancel(event_tid);
pthread_join(event_tid, NULL);
event_tid = 0;
return 0;
}
diff --git a/server/virt.h b/server/virt.h
index 7ebea7d2..19f38231 100644
--- a/server/virt.h
+++ b/server/virt.h
@@ -1,80 +1,81 @@
/*
Copyright Red Hat, Inc. 2006
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
MA 02139, USA.
*/
#ifndef _VIRT_H
#define _VIRT_H
#include <libvirt/libvirt.h>
#include <stdint.h>
#include <netinet/in.h>
#include "xvm.h"
/*
Owner 0 = no owner.
checkpoint "xen-vm-states" {
section "vm-name0" {
owner_nodeid;
vm_state;
}
section "vm-name1" {
owner_nodeid;
vm_state;
}
...
}
*/
typedef struct {
uint32_t s_owner;
int32_t s_state;
} vm_state_t;
typedef struct {
char v_name[MAX_DOMAINNAME_LENGTH];
char v_uuid[MAX_DOMAINNAME_LENGTH];
vm_state_t v_state;
} virt_state_t;
/**
This is stored in our private checkpoint section.
*/
typedef struct _virt_list {
uint32_t vm_count;
virt_state_t vm_states[0];
} virt_list_t;
virt_list_t *vl_get(virConnectPtr vp, int my_id);
int vl_cmp(virt_list_t *left, virt_list_t *right);
void vl_print(virt_list_t *vl);
void vl_free(virt_list_t *old);
virt_state_t * vl_find_uuid(virt_list_t *vl, const char *name);
virt_state_t * vl_find_name(virt_list_t *vl, const char *name);
typedef void ckpt_handle;
int ckpt_read(void *hp, const char *secid, void *buf, size_t maxlen);
int ckpt_finish(void *hp);
int ckpt_write(void *hp, const char *secid, void *buf, size_t maxlen);
+int ckpt_erase(void *hp, const char *secid);
void *ckpt_init(const char *ckpt_name, int maxlen, int maxsec, int maxseclen,
int timeout);
#endif
diff --git a/server/vm_states.c b/server/vm_states.c
index 6b95bfa4..60076b50 100644
--- a/server/vm_states.c
+++ b/server/vm_states.c
@@ -1,433 +1,458 @@
/*
Copyright Red Hat, Inc. 2006
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
MA 02139, USA.
*/
/** @file
* Distributed VM states using saCkpt interface
*/
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <sys/time.h>
#include <pthread.h>
#include <openais/saAis.h>
#include <openais/saCkpt.h>
#include <unistd.h>
#include <stdio.h>
#include <assert.h>
+#include "xvm.h"
typedef struct {
uint32_t ck_ready;
int ck_timeout;
SaCkptCheckpointHandleT ck_checkpoint;
SaCkptHandleT ck_handle;
char *ck_name;
} ckpt_handle;
#define READY_MAGIC 0x13fd237c
#define VALIDATE(h) \
do { \
if (!h || h->ck_ready != READY_MAGIC) { \
errno = EINVAL; \
return -1; \
} \
} while(0)
int ais_to_posix(SaAisErrorT err);
int
ais_to_posix(SaAisErrorT err)
{
switch (err) {
case SA_AIS_OK:
return 0;
case SA_AIS_ERR_LIBRARY:
return ELIBBAD;
case SA_AIS_ERR_VERSION:
return EPROTONOSUPPORT; //XXX
case SA_AIS_ERR_INIT:
return EFAULT; //XXX
case SA_AIS_ERR_TIMEOUT:
return ETIMEDOUT;
case SA_AIS_ERR_TRY_AGAIN:
return EAGAIN;
case SA_AIS_ERR_INVALID_PARAM:
return EINVAL;
case SA_AIS_ERR_NO_MEMORY:
return ENOMEM;
case SA_AIS_ERR_BAD_HANDLE:
return EBADF;
case SA_AIS_ERR_BUSY:
return EBUSY;
case SA_AIS_ERR_ACCESS:
return EACCES;
case SA_AIS_ERR_NOT_EXIST:
return ENOENT;
case SA_AIS_ERR_NAME_TOO_LONG:
return ENAMETOOLONG;
case SA_AIS_ERR_EXIST:
return EEXIST;
case SA_AIS_ERR_NO_SPACE:
return ENOSPC;
case SA_AIS_ERR_INTERRUPT:
return EINTR;
case SA_AIS_ERR_NAME_NOT_FOUND:
return ENOENT;
case SA_AIS_ERR_NO_RESOURCES:
return ENOMEM; //XXX
case SA_AIS_ERR_NOT_SUPPORTED:
return ENOSYS;
case SA_AIS_ERR_BAD_OPERATION:
return EINVAL; //XXX
case SA_AIS_ERR_FAILED_OPERATION:
return EIO; //XXX
case SA_AIS_ERR_MESSAGE_ERROR:
return EIO; // XXX
case SA_AIS_ERR_QUEUE_FULL:
return ENOBUFS;
case SA_AIS_ERR_QUEUE_NOT_AVAILABLE:
return ENOENT;
case SA_AIS_ERR_BAD_FLAGS:
return EINVAL;
case SA_AIS_ERR_TOO_BIG:
return E2BIG;
case SA_AIS_ERR_NO_SECTIONS:
return ENOENT; // XXX
/*case SA_AIS_ERR_SECURITY:
return EPERM;*/
default:
return EINVAL; /* XXX */
}
return -1;
}
static int
ckpt_open(ckpt_handle *h, const char *ckpt_name, int maxsize,
int maxsec, int maxsecsize, int timeout)
{
SaCkptCheckpointCreationAttributesT attrs;
SaCkptCheckpointOpenFlagsT flags;
SaNameT cpname;
#if 0
SaCkptCheckpointDescriptorT status;
#endif
SaAisErrorT err = SA_AIS_OK;
VALIDATE(h);
flags = SA_CKPT_CHECKPOINT_READ |
SA_CKPT_CHECKPOINT_WRITE;
snprintf((char *)cpname.value, SA_MAX_NAME_LENGTH-1,
"%s", ckpt_name);
cpname.length = strlen(ckpt_name);
h->ck_timeout = timeout;
err = saCkptCheckpointOpen(h->ck_handle,
&cpname,
NULL,
flags,
timeout,
&h->ck_checkpoint);
if (err == SA_AIS_OK) {
#if 0
saCkptCheckpointStatusGet(h->ck_handle,
&status);
printf("Checkpoint Size = %d bytes\n", (int)
status.checkpointCreationAttributes.checkpointSize);
printf("Flags = ");
if (status.checkpointCreationAttributes.creationFlags &
SA_CKPT_WR_ALL_REPLICAS) {
printf("%s ", "SA_CKPT_WR_ALL_REPLICAS");
}
if (status.checkpointCreationAttributes.creationFlags &
SA_CKPT_WR_ACTIVE_REPLICA) {
printf("%s ", "SA_CKPT_WR_ACTIVE_REPLICA");
}
if (status.checkpointCreationAttributes.creationFlags &
SA_CKPT_WR_ACTIVE_REPLICA_WEAK) {
printf("%s ", "SA_CKPT_WR_ACTIVE_REPLICA_WEAK");
}
if (status.checkpointCreationAttributes.creationFlags &
SA_CKPT_CHECKPOINT_COLLOCATED) {
printf("%s ", "SA_CKPT_CHECKPOINT_COLLOCATED");
}
printf("\nMax sections = %d\n",
(int)status.checkpointCreationAttributes.maxSections);
printf("Max section size = %d\n",
(int)status.checkpointCreationAttributes.maxSectionSize);
printf("Max section ID size = %d\n",
(int)status.checkpointCreationAttributes.maxSectionIdSize);
printf("Section count = %d\n", status.numberOfSections);
printf("\n");
#endif
goto good;
}
attrs.creationFlags = SA_CKPT_WR_ALL_REPLICAS;
attrs.checkpointSize = (SaSizeT)maxsize;
- attrs.retentionDuration = SA_TIME_ONE_HOUR;
+ attrs.retentionDuration = SA_TIME_ONE_MINUTE;
attrs.maxSections = maxsec;
attrs.maxSectionSize = (SaSizeT)maxsecsize;
- attrs.maxSectionIdSize = (SaSizeT)32;
+ attrs.maxSectionIdSize = (SaSizeT)MAX_DOMAINNAME_LENGTH;
flags = SA_CKPT_CHECKPOINT_READ |
SA_CKPT_CHECKPOINT_WRITE |
SA_CKPT_CHECKPOINT_CREATE;
err = saCkptCheckpointOpen(h->ck_handle,
&cpname,
&attrs,
flags,
timeout,
&h->ck_checkpoint);
if (err == SA_AIS_OK)
goto good;
/* No checkpoint */
errno = ais_to_posix(err);
return (errno == 0 ? 0 : -1);
good:
printf("Opened ckpt %s\n", ckpt_name);
h->ck_name = strdup(ckpt_name);
errno = ais_to_posix(err);
return (errno == 0 ? 0 : -1);
}
void *
ckpt_init(char *ckpt_name, int maxlen, int maxsec,
int maxseclen, int timeout)
{
ckpt_handle *h;
SaAisErrorT err;
SaVersionT ver;
if (!ckpt_name || !strlen(ckpt_name)) {
errno = EINVAL;
return NULL;
}
h = malloc(sizeof(*h));
if (!h)
return NULL;
memset(h, 0, sizeof(*h));
ver.releaseCode = 'B';
ver.majorVersion = 1;
ver.minorVersion = 1;
err = saCkptInitialize(&h->ck_handle, NULL, &ver);
if (err != SA_AIS_OK) {
free(h);
return NULL;
} else {
h->ck_ready = READY_MAGIC;
}
if (ckpt_open(h, ckpt_name, maxlen, maxsec, maxseclen,
timeout) < 0) {
saCkptCheckpointClose(h->ck_checkpoint);
if (h->ck_name)
free(h->ck_name);
free(h);
return NULL;
}
return (void *)h;
}
int
ckpt_write(void *hp, const char *secid, void *buf, size_t maxlen)
{
ckpt_handle *h = (ckpt_handle *)hp;
SaCkptIOVectorElementT iov = {SA_CKPT_DEFAULT_SECTION_ID,
NULL, 0, 0, 0};
SaAisErrorT err;
SaCkptSectionCreationAttributesT attrs;
VALIDATE(h);
/* Set section ID here */
iov.sectionId.id = (uint8_t *)secid;
iov.sectionId.idLen = strlen(secid);
iov.dataBuffer = buf;
iov.dataSize = (SaSizeT)maxlen;
iov.dataOffset = 0;
iov.readSize = 0;
err = saCkptCheckpointWrite(h->ck_checkpoint, &iov, 1, NULL);
if (err == SA_AIS_ERR_NOT_EXIST) {
attrs.sectionId = &iov.sectionId;
attrs.expirationTime = SA_TIME_END;
err = saCkptSectionCreate(h->ck_checkpoint, &attrs,
buf, maxlen);
}
if (err == SA_AIS_OK)
saCkptCheckpointSynchronize(h->ck_checkpoint,
h->ck_timeout);
errno = ais_to_posix(err);
if (errno)
return -1;
return maxlen; /* XXX */
}
int
ckpt_read(void *hp, const char *secid, void *buf, size_t maxlen)
{
ckpt_handle *h = (ckpt_handle *)hp;
SaCkptIOVectorElementT iov = {SA_CKPT_DEFAULT_SECTION_ID,
NULL, 0, 0, 0};
SaAisErrorT err;
VALIDATE(h);
//printf("reading ckpt %s\n", keyid);
iov.sectionId.id = (uint8_t *)secid;
iov.sectionId.idLen = strlen(secid);
iov.dataBuffer = buf;
iov.dataSize = (SaSizeT)maxlen;
iov.dataOffset = 0;
iov.readSize = 0;
err = saCkptCheckpointRead(h->ck_checkpoint, &iov, 1, NULL);
errno = ais_to_posix(err);
if (errno)
return -1;
return iov.readSize; /* XXX */
}
+int
+ckpt_erase(void *hp, const char *secid)
+{
+ ckpt_handle *h = (ckpt_handle *)hp;
+ SaAisErrorT err;
+ SaCkptSectionIdT sectionId;
+ VALIDATE(h);
+
+ sectionId.id = (uint8_t *)secid;
+ sectionId.idLen = strlen(secid);
+
+ err = saCkptSectionDelete(h->ck_checkpoint, &sectionId);
+
+ if (err == SA_AIS_OK)
+ saCkptCheckpointSynchronize(h->ck_checkpoint,
+ h->ck_timeout);
+
+ errno = ais_to_posix(err);
+ if (errno)
+ return -1;
+ return 0;
+}
+
+
int
ckpt_finish(void *hp)
{
ckpt_handle *h = (ckpt_handle *)hp;
int ret = 0;
SaAisErrorT err;
saCkptCheckpointClose(h->ck_checkpoint);
err = saCkptFinalize(h->ck_handle);
if (err != SA_AIS_OK)
ret = -1;
else
h->ck_ready = 0;
if (h->ck_name)
free(h->ck_name);
if (ret != 0)
errno = ais_to_posix(err);
return ret;
}
#ifdef STANDALONE
void
usage(int ret)
{
printf("usage: ckpt [-c ckpt_name] <-r key|-w key -d data>\n");
exit(ret);
}
int
main(int argc, char **argv)
{
char *ckptname = "ckpt_test";
char *sec = "default";
char *val;
void *h;
char buf[64];
int ret;
int op = 0;
while((ret = getopt(argc, argv, "c:w:r:d:j?")) != EOF) {
switch(ret) {
case 'c':
ckptname = optarg;
break;
case 'w':
op = 'w';
sec = optarg;
break;
case 'r':
op = 'r';
sec = optarg;
break;
case 'd':
val = optarg;
break;
case '?':
case 'h':
usage(0);
default:
usage(1);
}
}
if (!op) {
usage(1);
}
if (!sec) {
usage(1);
}
h = ckpt_init(ckptname, 262144, 4096, 64, 10);
if (!h) {
perror("ckpt_init");
return -1;
}
if (op == 'w') {
if (ckpt_write(h, sec, val, strlen(val)+1) < 0) {
perror("ckpt_write");
return 1;
}
} else if (op == 'r') {
ret = ckpt_read(h, sec, buf, sizeof(buf));
if (ret < 0) {
perror("ckpt_read");
return 1;
}
printf("%d bytes\nDATA for '%s':\n%s\n", ret, sec,
buf);
}
ckpt_finish(h);
return 0;
}
#endif

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 7:16 AM (22 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018385
Default Alt Text
(53 KB)

Event Timeline