Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4525596
container.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
50 KB
Referenced Files
None
Subscribers
None
container.c
View Options
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <ctype.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <unpack.h>
#include <crm/msg_xml.h>
#define VARIANT_CONTAINER 1
#include "./variant.h"
void tuple_free(container_grouping_t *tuple);
static char *
next_ip(const char *last_ip)
{
unsigned int oct1 = 0;
unsigned int oct2 = 0;
unsigned int oct3 = 0;
unsigned int oct4 = 0;
int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4);
if (rc != 4) {
/*@ TODO check for IPv6 */
return NULL;
} else if (oct3 > 253) {
return NULL;
} else if (oct4 > 253) {
++oct3;
oct4 = 1;
} else {
++oct4;
}
return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4);
}
static int
allocate_ip(container_variant_data_t *data, container_grouping_t *tuple, char *buffer, int max)
{
if(data->ip_range_start == NULL) {
return 0;
} else if(data->ip_last) {
tuple->ipaddr = next_ip(data->ip_last);
} else {
tuple->ipaddr = strdup(data->ip_range_start);
}
data->ip_last = tuple->ipaddr;
#if 0
return snprintf(buffer, max, " --add-host=%s-%d:%s --link %s-docker-%d:%s-link-%d",
data->prefix, tuple->offset, tuple->ipaddr,
data->prefix, tuple->offset, data->prefix, tuple->offset);
#else
if (data->type == PE_CONTAINER_TYPE_DOCKER) {
return snprintf(buffer, max, " --add-host=%s-%d:%s",
data->prefix, tuple->offset, tuple->ipaddr);
} else if (data->type == PE_CONTAINER_TYPE_RKT) {
return snprintf(buffer, max, " --hosts-entry=%s=%s-%d",
tuple->ipaddr, data->prefix, tuple->offset);
} else {
return 0;
}
#endif
}
static xmlNode *
create_resource(const char *name, const char *provider, const char *kind)
{
xmlNode *rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, name);
crm_xml_add(rsc, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF);
crm_xml_add(rsc, XML_AGENT_ATTR_PROVIDER, provider);
crm_xml_add(rsc, XML_ATTR_TYPE, kind);
return rsc;
}
/*!
* \internal
* \brief Check whether cluster can manage resource inside container
*
* \param[in] data Container variant data
*
* \return TRUE if networking configuration is acceptable, FALSE otherwise
*
* \note The resource is manageable if an IP range or control port has been
* specified. If a control port is used without an IP range, replicas per
* host must be 1.
*/
static bool
valid_network(container_variant_data_t *data)
{
if(data->ip_range_start) {
return TRUE;
}
if(data->control_port) {
if(data->replicas_per_host > 1) {
pe_err("Specifying the 'control-port' for %s requires 'replicas-per-host=1'", data->prefix);
data->replicas_per_host = 1;
/* @TODO to be sure: clear_bit(rsc->flags, pe_rsc_unique); */
}
return TRUE;
}
return FALSE;
}
static bool
create_ip_resource(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
if(data->ip_range_start) {
char *id = NULL;
xmlNode *xml_ip = NULL;
xmlNode *xml_obj = NULL;
id = crm_strdup_printf("%s-ip-%s", data->prefix, tuple->ipaddr);
crm_xml_sanitize_id(id);
xml_ip = create_resource(id, "heartbeat", "IPaddr2");
free(id);
xml_obj = create_xml_node(xml_ip, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset);
crm_create_nvpair_xml(xml_obj, NULL, "ip", tuple->ipaddr);
if(data->host_network) {
crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network);
}
if(data->host_netmask) {
crm_create_nvpair_xml(xml_obj, NULL,
"cidr_netmask", data->host_netmask);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32");
}
xml_obj = create_xml_node(xml_ip, "operations");
crm_create_op_xml(xml_obj, ID(xml_ip), "monitor", "60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
crm_log_xml_trace(xml_ip, "Container-ip");
if (common_unpack(xml_ip, &tuple->ip, parent, data_set) == false) {
return FALSE;
}
parent->children = g_list_append(parent->children, tuple->ip);
}
return TRUE;
}
static bool
create_docker_resource(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
int offset = 0, max = 4096;
char *buffer = calloc(1, max+1);
int doffset = 0, dmax = 1024;
char *dbuffer = calloc(1, dmax+1);
char *id = NULL;
xmlNode *xml_docker = NULL;
xmlNode *xml_obj = NULL;
id = crm_strdup_printf("%s-docker-%d", data->prefix, tuple->offset);
crm_xml_sanitize_id(id);
xml_docker = create_resource(id, "heartbeat", "docker");
free(id);
xml_obj = create_xml_node(xml_docker, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset);
crm_create_nvpair_xml(xml_obj, NULL, "image", data->image);
crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", XML_BOOLEAN_TRUE);
crm_create_nvpair_xml(xml_obj, NULL, "force_kill", XML_BOOLEAN_FALSE);
crm_create_nvpair_xml(xml_obj, NULL, "reuse", XML_BOOLEAN_FALSE);
offset += snprintf(buffer+offset, max-offset, " --restart=no");
/* Set a container hostname only if we have an IP to map it to.
* The user can set -h or --uts=host themselves if they want a nicer
* name for logs, but this makes applications happy who need their
* hostname to match the IP they bind to.
*/
if (data->ip_range_start != NULL) {
offset += snprintf(buffer+offset, max-offset, " -h %s-%d",
data->prefix, tuple->offset);
}
offset += snprintf(buffer+offset, max-offset, " -e PCMK_stderr=1");
if(data->docker_network) {
// offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr);
offset += snprintf(buffer+offset, max-offset, " --net=%s", data->docker_network);
}
if(data->control_port) {
offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%s", data->control_port);
} else {
offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%d", DEFAULT_REMOTE_PORT);
}
for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) {
container_mount_t *mount = pIter->data;
if(mount->flags) {
char *source = crm_strdup_printf(
"%s/%s-%d", mount->source, data->prefix, tuple->offset);
if(doffset > 0) {
doffset += snprintf(dbuffer+doffset, dmax-doffset, ",");
}
doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source);
offset += snprintf(buffer+offset, max-offset, " -v %s:%s", source, mount->target);
free(source);
} else {
offset += snprintf(buffer+offset, max-offset, " -v %s:%s", mount->source, mount->target);
}
if(mount->options) {
offset += snprintf(buffer+offset, max-offset, ":%s", mount->options);
}
}
for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) {
container_port_t *port = pIter->data;
if(tuple->ipaddr) {
offset += snprintf(buffer+offset, max-offset, " -p %s:%s:%s",
tuple->ipaddr, port->source, port->target);
} else if(safe_str_neq(data->docker_network, "host")) {
// No need to do port mapping if net=host
offset += snprintf(buffer+offset, max-offset, " -p %s:%s", port->source, port->target);
}
}
if(data->docker_run_options) {
offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options);
}
if(data->docker_host_options) {
offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options);
}
crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer);
free(buffer);
crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer);
free(dbuffer);
if(tuple->child) {
if(data->docker_run_command) {
crm_create_nvpair_xml(xml_obj, NULL,
"run_cmd", data->docker_run_command);
} else {
crm_create_nvpair_xml(xml_obj, NULL,
"run_cmd", SBIN_DIR "/pacemaker_remoted");
}
/* TODO: Allow users to specify their own?
*
* We just want to know if the container is alive, we'll
* monitor the child independently
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
/* } else if(child && data->untrusted) {
* Support this use-case?
*
* The ability to have resources started/stopped by us, but
* unable to set attributes, etc.
*
* Arguably better to control API access this with ACLs like
* "normal" remote nodes
*
* crm_create_nvpair_xml(xml_obj, NULL,
* "run_cmd", "/usr/libexec/pacemaker/lrmd");
* crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd",
* "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke");
*/
} else {
if(data->docker_run_command) {
crm_create_nvpair_xml(xml_obj, NULL,
"run_cmd", data->docker_run_command);
}
/* TODO: Allow users to specify their own?
*
* We don't know what's in the container, so we just want
* to know if it is alive
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
}
xml_obj = create_xml_node(xml_docker, "operations");
crm_create_op_xml(xml_obj, ID(xml_docker), "monitor", "60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
crm_log_xml_trace(xml_docker, "Container-docker");
if (common_unpack(xml_docker, &tuple->docker, parent, data_set) == FALSE) {
return FALSE;
}
parent->children = g_list_append(parent->children, tuple->docker);
return TRUE;
}
static bool
create_rkt_resource(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
int offset = 0, max = 4096;
char *buffer = calloc(1, max+1);
int doffset = 0, dmax = 1024;
char *dbuffer = calloc(1, dmax+1);
char *id = NULL;
xmlNode *xml_docker = NULL;
xmlNode *xml_obj = NULL;
int volid = 0;
id = crm_strdup_printf("%s-rkt-%d", data->prefix, tuple->offset);
crm_xml_sanitize_id(id);
xml_docker = create_resource(id, "heartbeat", "rkt");
free(id);
xml_obj = create_xml_node(xml_docker, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset);
crm_create_nvpair_xml(xml_obj, NULL, "image", data->image);
crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", "true");
crm_create_nvpair_xml(xml_obj, NULL, "force_kill", "false");
crm_create_nvpair_xml(xml_obj, NULL, "reuse", "false");
/* Set a container hostname only if we have an IP to map it to.
* The user can set -h or --uts=host themselves if they want a nicer
* name for logs, but this makes applications happy who need their
* hostname to match the IP they bind to.
*/
if (data->ip_range_start != NULL) {
offset += snprintf(buffer+offset, max-offset, " --hostname=%s-%d",
data->prefix, tuple->offset);
}
offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_stderr=1");
if(data->docker_network) {
// offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr);
offset += snprintf(buffer+offset, max-offset, " --net=%s", data->docker_network);
}
if(data->control_port) {
offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_remote_port=%s", data->control_port);
} else {
offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_remote_port=%d", DEFAULT_REMOTE_PORT);
}
for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) {
container_mount_t *mount = pIter->data;
if(mount->flags) {
char *source = crm_strdup_printf(
"%s/%s-%d", mount->source, data->prefix, tuple->offset);
if(doffset > 0) {
doffset += snprintf(dbuffer+doffset, dmax-doffset, ",");
}
doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source);
offset += snprintf(buffer+offset, max-offset, " --volume vol%d,kind=host,source=%s", volid, source);
if(mount->options) {
offset += snprintf(buffer+offset, max-offset, ",%s", mount->options);
}
offset += snprintf(buffer+offset, max-offset, " --mount volume=vol%d,target=%s", volid, mount->target);
free(source);
} else {
offset += snprintf(buffer+offset, max-offset, " --volume vol%d,kind=host,source=%s", volid, mount->source);
if(mount->options) {
offset += snprintf(buffer+offset, max-offset, ",%s", mount->options);
}
offset += snprintf(buffer+offset, max-offset, " --mount volume=vol%d,target=%s", volid, mount->target);
}
volid++;
}
for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) {
container_port_t *port = pIter->data;
if(tuple->ipaddr) {
offset += snprintf(buffer+offset, max-offset, " --port=%s:%s:%s",
port->target, tuple->ipaddr, port->source);
} else {
offset += snprintf(buffer+offset, max-offset, " --port=%s:%s", port->target, port->source);
}
}
if(data->docker_run_options) {
offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options);
}
if(data->docker_host_options) {
offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options);
}
crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer);
free(buffer);
crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer);
free(dbuffer);
if(tuple->child) {
if(data->docker_run_command) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->docker_run_command);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR"/pacemaker_remoted");
}
/* TODO: Allow users to specify their own?
*
* We just want to know if the container is alive, we'll
* monitor the child independently
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
/* } else if(child && data->untrusted) {
* Support this use-case?
*
* The ability to have resources started/stopped by us, but
* unable to set attributes, etc.
*
* Arguably better to control API access this with ACLs like
* "normal" remote nodes
*
* crm_create_nvpair_xml(xml_obj, NULL,
* "run_cmd", "/usr/libexec/pacemaker/lrmd");
* crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd",
* "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke");
*/
} else {
if(data->docker_run_command) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
data->docker_run_command);
}
/* TODO: Allow users to specify their own?
*
* We don't know what's in the container, so we just want
* to know if it is alive
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
}
xml_obj = create_xml_node(xml_docker, "operations");
crm_create_op_xml(xml_obj, ID(xml_docker), "monitor", "60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
crm_log_xml_trace(xml_docker, "Container-rkt");
if (common_unpack(xml_docker, &tuple->docker, parent, data_set) == FALSE) {
return FALSE;
}
parent->children = g_list_append(parent->children, tuple->docker);
return TRUE;
}
/*!
* \brief Ban a node from a resource's (and its children's) allowed nodes list
*
* \param[in,out] rsc Resource to modify
* \param[in] uname Name of node to ban
*/
static void
disallow_node(resource_t *rsc, const char *uname)
{
gpointer match = g_hash_table_lookup(rsc->allowed_nodes, uname);
if (match) {
((pe_node_t *) match)->weight = -INFINITY;
((pe_node_t *) match)->rsc_discover_mode = pe_discover_never;
}
if (rsc->children) {
GListPtr child;
for (child = rsc->children; child != NULL; child = child->next) {
disallow_node((resource_t *) (child->data), uname);
}
}
}
static bool
create_remote_resource(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
if (tuple->child && valid_network(data)) {
GHashTableIter gIter;
GListPtr rsc_iter = NULL;
node_t *node = NULL;
xmlNode *xml_remote = NULL;
char *id = crm_strdup_printf("%s-%d", data->prefix, tuple->offset);
char *port_s = NULL;
const char *uname = NULL;
const char *connect_name = NULL;
if (remote_id_conflict(id, data_set)) {
free(id);
// The biggest hammer we have
id = crm_strdup_printf("pcmk-internal-%s-remote-%d", tuple->child->id, tuple->offset);
CRM_ASSERT(remote_id_conflict(id, data_set) == FALSE);
}
/* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the
* connection does not have its own IP is a magic string that we use to
* support nested remotes (i.e. a bundle running on a remote node).
*/
connect_name = (tuple->ipaddr? tuple->ipaddr : "#uname");
if (data->control_port == NULL) {
port_s = crm_itoa(DEFAULT_REMOTE_PORT);
}
/* This sets tuple->docker as tuple->remote's container, which is
* similar to what happens with guest nodes. This is how the PE knows
* that the bundle node is fenced by recovering docker, and that
* remote should be ordered relative to docker.
*/
xml_remote = pe_create_remote_xml(NULL, id, tuple->docker->id,
XML_BOOLEAN_FALSE, NULL, "60s", NULL,
NULL, connect_name,
(data->control_port?
data->control_port : port_s));
free(port_s);
/* Abandon our created ID, and pull the copy from the XML, because we
* need something that will get freed during data set cleanup to use as
* the node ID and uname.
*/
free(id);
id = NULL;
uname = ID(xml_remote);
/* Ensure a node has been created for the guest (it may have already
* been, if it has a permanent node attribute), and ensure its weight is
* -INFINITY so no other resources can run on it.
*/
node = pe_find_node(data_set->nodes, uname);
if (node == NULL) {
node = pe_create_node(uname, uname, "remote", "-INFINITY",
data_set);
} else {
node->weight = -INFINITY;
}
node->rsc_discover_mode = pe_discover_never;
/* unpack_remote_nodes() ensures that each remote node and guest node
* has a pe_node_t entry. Ideally, it would do the same for bundle nodes.
* Unfortunately, a bundle has to be mostly unpacked before it's obvious
* what nodes will be needed, so we do it just above.
*
* Worse, that means that the node may have been utilized while
* unpacking other resources, without our weight correction. The most
* likely place for this to happen is when common_unpack() calls
* resource_location() to set a default score in symmetric clusters.
* This adds a node *copy* to each resource's allowed nodes, and these
* copies will have the wrong weight.
*
* As a hacky workaround, fix those copies here.
*
* @TODO Possible alternative: ensure bundles are unpacked before other
* resources, so the weight is correct before any copies are made.
*/
for (rsc_iter = data_set->resources; rsc_iter; rsc_iter = rsc_iter->next) {
disallow_node((resource_t *) (rsc_iter->data), uname);
}
tuple->node = node_copy(node);
tuple->node->weight = 500;
tuple->node->rsc_discover_mode = pe_discover_exclusive;
/* Ensure the node shows up as allowed and with the correct discovery set */
g_hash_table_destroy(tuple->child->allowed_nodes);
tuple->child->allowed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str);
g_hash_table_insert(tuple->child->allowed_nodes, (gpointer) tuple->node->details->id, node_copy(tuple->node));
{
node_t *copy = node_copy(tuple->node);
copy->weight = -INFINITY;
g_hash_table_insert(tuple->child->parent->allowed_nodes, (gpointer) tuple->node->details->id, copy);
}
crm_log_xml_trace(xml_remote, "Container-remote");
if (common_unpack(xml_remote, &tuple->remote, parent, data_set) == FALSE) {
return FALSE;
}
g_hash_table_iter_init(&gIter, tuple->remote->allowed_nodes);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) {
if(is_remote_node(node)) {
/* Remote resources can only run on 'normal' cluster node */
node->weight = -INFINITY;
}
}
tuple->node->details->remote_rsc = tuple->remote;
tuple->remote->container = tuple->docker; // Ensures is_container_remote_node() functions correctly immediately
/* A bundle's #kind is closer to "container" (guest node) than the
* "remote" set by pe_create_node().
*/
g_hash_table_insert(tuple->node->details->attrs,
strdup(CRM_ATTR_KIND), strdup("container"));
/* One effect of this is that setup_container() will add
* tuple->remote to tuple->docker's fillers, which will make
* rsc_contains_remote_node() true for tuple->docker.
*
* tuple->child does NOT get added to tuple->docker's fillers.
* The only noticeable effect if it did would be for its fail count to
* be taken into account when checking tuple->docker's migration
* threshold.
*/
parent->children = g_list_append(parent->children, tuple->remote);
}
return TRUE;
}
static bool
create_container(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
if (data->type == PE_CONTAINER_TYPE_DOCKER &&
create_docker_resource(parent, data, tuple, data_set) == FALSE) {
return FALSE;
}
if (data->type == PE_CONTAINER_TYPE_RKT &&
create_rkt_resource(parent, data, tuple, data_set) == FALSE) {
return FALSE;
}
if(create_ip_resource(parent, data, tuple, data_set) == FALSE) {
return FALSE;
}
if(create_remote_resource(parent, data, tuple, data_set) == FALSE) {
return FALSE;
}
if(tuple->child && tuple->ipaddr) {
add_hash_param(tuple->child->meta, "external-ip", tuple->ipaddr);
}
if(tuple->remote) {
/*
* Allow the remote connection resource to be allocated to a
* different node than the one on which the docker container
* is active.
*
* Makes it possible to have remote nodes, running docker
* containers with pacemaker_remoted inside in order to start
* services inside those containers.
*/
set_bit(tuple->remote->flags, pe_rsc_allow_remote_remotes);
}
return TRUE;
}
static void
mount_add(container_variant_data_t *container_data, const char *source,
const char *target, const char *options, int flags)
{
container_mount_t *mount = calloc(1, sizeof(container_mount_t));
mount->source = strdup(source);
mount->target = strdup(target);
if (options) {
mount->options = strdup(options);
}
mount->flags = flags;
container_data->mounts = g_list_append(container_data->mounts, mount);
}
static void mount_free(container_mount_t *mount)
{
free(mount->source);
free(mount->target);
free(mount->options);
free(mount);
}
static void port_free(container_port_t *port)
{
free(port->source);
free(port->target);
free(port);
}
static container_grouping_t *
tuple_for_remote(resource_t *remote)
{
resource_t *top = remote;
container_variant_data_t *container_data = NULL;
if (top == NULL) {
return NULL;
}
while (top->parent != NULL) {
top = top->parent;
}
get_container_variant_data(container_data, top);
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
if(tuple->remote == remote) {
return tuple;
}
}
CRM_LOG_ASSERT(FALSE);
return NULL;
}
bool
container_fix_remote_addr(resource_t *rsc)
{
const char *name;
const char *value;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
const char *value_list[] = {
"remote",
PCMK_RESOURCE_CLASS_OCF,
"pacemaker"
};
if(rsc == NULL) {
return FALSE;
}
name = "addr";
value = g_hash_table_lookup(rsc->parameters, name);
if (safe_str_eq(value, "#uname") == FALSE) {
return FALSE;
}
for (int lpc = 0; lpc < DIMOF(attr_list); lpc++) {
name = attr_list[lpc];
value = crm_element_value(rsc->xml, attr_list[lpc]);
if (safe_str_eq(value, value_list[lpc]) == FALSE) {
return FALSE;
}
}
return TRUE;
}
const char *
container_fix_remote_addr_in(resource_t *rsc, xmlNode *xml, const char *field)
{
// REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
pe_node_t *node = NULL;
container_grouping_t *tuple = NULL;
if(container_fix_remote_addr(rsc) == FALSE) {
return NULL;
}
tuple = tuple_for_remote(rsc);
if(tuple == NULL) {
return NULL;
}
node = tuple->docker->allocated_to;
if(node == NULL && tuple->docker->running_on) {
/* If it won't be running anywhere after the
* transition, go with where it's running now.
*/
node = tuple->docker->running_on->data;
}
if(node == NULL) {
crm_trace("Cannot fix address for %s", tuple->remote->id);
return NULL;
}
crm_trace("Fixing addr for %s on %s", rsc->id, node->details->uname);
if(xml != NULL && field != NULL) {
crm_xml_add(xml, field, node->details->uname);
}
return node->details->uname;
}
gboolean
container_unpack(resource_t * rsc, pe_working_set_t * data_set)
{
const char *value = NULL;
xmlNode *xml_obj = NULL;
xmlNode *xml_resource = NULL;
container_variant_data_t *container_data = NULL;
CRM_ASSERT(rsc != NULL);
pe_rsc_trace(rsc, "Processing resource %s...", rsc->id);
container_data = calloc(1, sizeof(container_variant_data_t));
rsc->variant_opaque = container_data;
container_data->prefix = strdup(rsc->id);
xml_obj = first_named_child(rsc->xml, "docker");
if (xml_obj != NULL) {
container_data->type = PE_CONTAINER_TYPE_DOCKER;
} else {
xml_obj = first_named_child(rsc->xml, "rkt");
if (xml_obj != NULL) {
container_data->type = PE_CONTAINER_TYPE_RKT;
} else {
return FALSE;
}
}
value = crm_element_value(xml_obj, "masters");
container_data->masters = crm_parse_int(value, "0");
if (container_data->masters < 0) {
pe_err("'masters' for %s must be nonnegative integer, using 0",
rsc->id);
container_data->masters = 0;
}
value = crm_element_value(xml_obj, "replicas");
if ((value == NULL) && (container_data->masters > 0)) {
container_data->replicas = container_data->masters;
} else {
container_data->replicas = crm_parse_int(value, "1");
}
if (container_data->replicas < 1) {
pe_err("'replicas' for %s must be positive integer, using 1", rsc->id);
container_data->replicas = 1;
}
/*
* Communication between containers on the same host via the
* floating IPs only works if docker is started with:
* --userland-proxy=false --ip-masq=false
*/
value = crm_element_value(xml_obj, "replicas-per-host");
container_data->replicas_per_host = crm_parse_int(value, "1");
if (container_data->replicas_per_host < 1) {
pe_err("'replicas-per-host' for %s must be positive integer, using 1",
rsc->id);
container_data->replicas_per_host = 1;
}
if (container_data->replicas_per_host == 1) {
clear_bit(rsc->flags, pe_rsc_unique);
}
container_data->docker_run_command = crm_element_value_copy(xml_obj, "run-command");
container_data->docker_run_options = crm_element_value_copy(xml_obj, "options");
container_data->image = crm_element_value_copy(xml_obj, "image");
container_data->docker_network = crm_element_value_copy(xml_obj, "network");
xml_obj = first_named_child(rsc->xml, "network");
if(xml_obj) {
container_data->ip_range_start = crm_element_value_copy(xml_obj, "ip-range-start");
container_data->host_netmask = crm_element_value_copy(xml_obj, "host-netmask");
container_data->host_network = crm_element_value_copy(xml_obj, "host-interface");
container_data->control_port = crm_element_value_copy(xml_obj, "control-port");
for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL;
xml_child = __xml_next_element(xml_child)) {
container_port_t *port = calloc(1, sizeof(container_port_t));
port->source = crm_element_value_copy(xml_child, "port");
if(port->source == NULL) {
port->source = crm_element_value_copy(xml_child, "range");
} else {
port->target = crm_element_value_copy(xml_child, "internal-port");
}
if(port->source != NULL && strlen(port->source) > 0) {
if(port->target == NULL) {
port->target = strdup(port->source);
}
container_data->ports = g_list_append(container_data->ports, port);
} else {
pe_err("Invalid port directive %s", ID(xml_child));
port_free(port);
}
}
}
xml_obj = first_named_child(rsc->xml, "storage");
for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL;
xml_child = __xml_next_element(xml_child)) {
const char *source = crm_element_value(xml_child, "source-dir");
const char *target = crm_element_value(xml_child, "target-dir");
const char *options = crm_element_value(xml_child, "options");
int flags = 0;
if (source == NULL) {
source = crm_element_value(xml_child, "source-dir-root");
flags = 1;
}
if (source && target) {
mount_add(container_data, source, target, options, flags);
} else {
pe_err("Invalid mount directive %s", ID(xml_child));
}
}
xml_obj = first_named_child(rsc->xml, "primitive");
if (xml_obj && valid_network(container_data)) {
char *value = NULL;
xmlNode *xml_set = NULL;
if(container_data->masters > 0) {
xml_resource = create_xml_node(NULL, XML_CIB_TAG_MASTER);
} else {
xml_resource = create_xml_node(NULL, XML_CIB_TAG_INCARNATION);
}
crm_xml_set_id(xml_resource, "%s-%s", container_data->prefix, xml_resource->name);
xml_set = create_xml_node(xml_resource, XML_TAG_META_SETS);
crm_xml_set_id(xml_set, "%s-%s-meta", container_data->prefix, xml_resource->name);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_ORDERED, XML_BOOLEAN_TRUE);
value = crm_itoa(container_data->replicas);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_INCARNATION_MAX, value);
free(value);
value = crm_itoa(container_data->replicas_per_host);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_INCARNATION_NODEMAX, value);
free(value);
crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_UNIQUE,
(container_data->replicas_per_host > 1)?
XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE);
if(container_data->masters) {
value = crm_itoa(container_data->masters);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_MASTER_MAX, value);
free(value);
}
//crm_xml_add(xml_obj, XML_ATTR_ID, container_data->prefix);
add_node_copy(xml_resource, xml_obj);
} else if(xml_obj) {
pe_err("Cannot control %s inside %s without either ip-range-start or control-port",
rsc->id, ID(xml_obj));
return FALSE;
}
if(xml_resource) {
int lpc = 0;
GListPtr childIter = NULL;
resource_t *new_rsc = NULL;
container_port_t *port = NULL;
const char *key_loc = NULL;
int offset = 0, max = 1024;
char *buffer = NULL;
if (common_unpack(xml_resource, &new_rsc, rsc, data_set) == FALSE) {
pe_err("Failed unpacking resource %s", ID(rsc->xml));
if (new_rsc != NULL && new_rsc->fns != NULL) {
new_rsc->fns->free(new_rsc);
}
return FALSE;
}
container_data->child = new_rsc;
/* We map the remote authentication key (likely) used on the DC to the
* default key location inside the container. This is only the likely
* location because an actual connection will do some validity checking
* on the file before using it.
*
* Mapping to the default location inside the container avoids having to
* pass another environment variable to the container.
*
* This makes several assumptions:
* - if PCMK_authkey_location is set, it has the same value on all nodes
* - the container technology does not propagate host environment
* variables to the container
* - the user does not set this environment variable via their container
* image
*
* @TODO A convoluted but possible way around the first limitation would
* be to allow a resource parameter to include environment
* variable references in its value, and resolve them on the
* executing node's crmd before sending the command to the lrmd.
*/
key_loc = getenv("PCMK_authkey_location");
if (key_loc == NULL) {
key_loc = DEFAULT_REMOTE_KEY_LOCATION;
}
mount_add(container_data, key_loc, DEFAULT_REMOTE_KEY_LOCATION, NULL,
0);
mount_add(container_data, CRM_LOG_DIR "/bundles", "/var/log", NULL, 1);
port = calloc(1, sizeof(container_port_t));
if(container_data->control_port) {
port->source = strdup(container_data->control_port);
} else {
/* If we wanted to respect PCMK_remote_port, we could use
* crm_default_remote_port() here and elsewhere in this file instead
* of DEFAULT_REMOTE_PORT.
*
* However, it gains nothing, since we control both the container
* environment and the connection resource parameters, and the user
* can use a different port if desired by setting control-port.
*/
port->source = crm_itoa(DEFAULT_REMOTE_PORT);
}
port->target = strdup(port->source);
container_data->ports = g_list_append(container_data->ports, port);
buffer = calloc(1, max+1);
for(childIter = container_data->child->children; childIter != NULL; childIter = childIter->next) {
container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t));
tuple->child = childIter->data;
tuple->child->exclusive_discover = TRUE;
tuple->offset = lpc++;
// Ensure the child's notify gets set based on the underlying primitive's value
if(is_set(tuple->child->flags, pe_rsc_notify)) {
set_bit(container_data->child->flags, pe_rsc_notify);
}
offset += allocate_ip(container_data, tuple, buffer+offset, max-offset);
container_data->tuples = g_list_append(container_data->tuples, tuple);
container_data->attribute_target = g_hash_table_lookup(tuple->child->meta, XML_RSC_ATTR_TARGET);
}
container_data->docker_host_options = buffer;
if(container_data->attribute_target) {
g_hash_table_replace(rsc->meta, strdup(XML_RSC_ATTR_TARGET), strdup(container_data->attribute_target));
g_hash_table_replace(container_data->child->meta, strdup(XML_RSC_ATTR_TARGET), strdup(container_data->attribute_target));
}
} else {
// Just a naked container, no pacemaker-remote
int offset = 0, max = 1024;
char *buffer = calloc(1, max+1);
for(int lpc = 0; lpc < container_data->replicas; lpc++) {
container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t));
tuple->offset = lpc;
offset += allocate_ip(container_data, tuple, buffer+offset, max-offset);
container_data->tuples = g_list_append(container_data->tuples, tuple);
}
container_data->docker_host_options = buffer;
}
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
if (create_container(rsc, container_data, tuple, data_set) == FALSE) {
pe_err("Failed unpacking resource %s", rsc->id);
rsc->fns->free(rsc);
return FALSE;
}
}
if(container_data->child) {
rsc->children = g_list_append(rsc->children, container_data->child);
}
return TRUE;
}
static int
tuple_rsc_active(resource_t *rsc, gboolean all)
{
if (rsc) {
gboolean child_active = rsc->fns->active(rsc, all);
if (child_active && !all) {
return TRUE;
} else if (!child_active && all) {
return FALSE;
}
}
return -1;
}
gboolean
container_active(resource_t * rsc, gboolean all)
{
container_variant_data_t *container_data = NULL;
GListPtr iter = NULL;
get_container_variant_data(container_data, rsc);
for (iter = container_data->tuples; iter != NULL; iter = iter->next) {
container_grouping_t *tuple = (container_grouping_t *)(iter->data);
int rsc_active;
rsc_active = tuple_rsc_active(tuple->ip, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = tuple_rsc_active(tuple->child, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = tuple_rsc_active(tuple->docker, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = tuple_rsc_active(tuple->remote, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
}
/* If "all" is TRUE, we've already checked that no resources were inactive,
* so return TRUE; if "all" is FALSE, we didn't find any active resources,
* so return FALSE.
*/
return all;
}
resource_t *
find_container_child(const char *stem, resource_t * rsc, node_t *node)
{
container_variant_data_t *container_data = NULL;
resource_t *parent = uber_parent(rsc);
CRM_ASSERT(parent->parent);
parent = parent->parent;
get_container_variant_data(container_data, parent);
if (is_not_set(rsc->flags, pe_rsc_unique)) {
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
CRM_ASSERT(tuple);
if(tuple->node->details == node->details) {
rsc = tuple->child;
break;
}
}
}
if (rsc && safe_str_neq(stem, rsc->id)) {
free(rsc->clone_name);
rsc->clone_name = strdup(stem);
}
return rsc;
}
static void
print_rsc_in_list(resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
if (rsc != NULL) {
if (options & pe_print_html) {
status_print("<li>");
}
rsc->fns->print(rsc, pre_text, options, print_data);
if (options & pe_print_html) {
status_print("</li>\n");
}
}
}
static const char*
container_type_as_string(enum container_type t)
{
if (t == PE_CONTAINER_TYPE_DOCKER) {
return PE_CONTAINER_TYPE_DOCKER_S;
} else if (t == PE_CONTAINER_TYPE_RKT) {
return PE_CONTAINER_TYPE_RKT_S;
} else {
return PE_CONTAINER_TYPE_UNKNOWN_S;
}
}
static void
container_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data)
{
container_variant_data_t *container_data = NULL;
char *child_text = NULL;
CRM_CHECK(rsc != NULL, return);
if (pre_text == NULL) {
pre_text = "";
}
child_text = crm_concat(pre_text, " ", ' ');
get_container_variant_data(container_data, rsc);
status_print("%s<bundle ", pre_text);
status_print("id=\"%s\" ", rsc->id);
// Always lowercase the container technology type for use as XML value
status_print("type=\"");
for (const char *c = container_type_as_string(container_data->type);
*c; ++c) {
status_print("%c", tolower(*c));
}
status_print("\" ");
status_print("image=\"%s\" ", container_data->image);
status_print("unique=\"%s\" ", is_set(rsc->flags, pe_rsc_unique)? "true" : "false");
status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false");
status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false");
status_print(">\n");
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
CRM_ASSERT(tuple);
status_print("%s <replica id=\"%d\">\n", pre_text, tuple->offset);
print_rsc_in_list(tuple->ip, child_text, options, print_data);
print_rsc_in_list(tuple->child, child_text, options, print_data);
print_rsc_in_list(tuple->docker, child_text, options, print_data);
print_rsc_in_list(tuple->remote, child_text, options, print_data);
status_print("%s </replica>\n", pre_text);
}
status_print("%s</bundle>\n", pre_text);
free(child_text);
}
static void
tuple_print(container_grouping_t * tuple, const char *pre_text, long options, void *print_data)
{
node_t *node = NULL;
resource_t *rsc = tuple->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = tuple->docker;
}
if(tuple->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(tuple->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(tuple->docker));
}
if(tuple->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", tuple->ipaddr);
}
if (tuple->docker->running_on) {
node = tuple->docker->running_on->data;
}
common_print(rsc, pre_text, buffer, node, options, print_data);
}
void
container_print(resource_t * rsc, const char *pre_text, long options, void *print_data)
{
container_variant_data_t *container_data = NULL;
char *child_text = NULL;
CRM_CHECK(rsc != NULL, return);
if (options & pe_print_xml) {
container_print_xml(rsc, pre_text, options, print_data);
return;
}
get_container_variant_data(container_data, rsc);
if (pre_text == NULL) {
pre_text = " ";
}
status_print("%s%s container%s: %s [%s]%s%s\n",
pre_text, container_type_as_string(container_data->type),
container_data->replicas>1?" set":"", rsc->id, container_data->image,
is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "",
is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)");
if (options & pe_print_html) {
status_print("<br />\n<ul>\n");
}
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
CRM_ASSERT(tuple);
if (options & pe_print_html) {
status_print("<li>");
}
if (is_set(options, pe_print_implicit)) {
child_text = crm_strdup_printf(" %s", pre_text);
if(g_list_length(container_data->tuples) > 1) {
status_print(" %sReplica[%d]\n", pre_text, tuple->offset);
}
if (options & pe_print_html) {
status_print("<br />\n<ul>\n");
}
print_rsc_in_list(tuple->ip, child_text, options, print_data);
print_rsc_in_list(tuple->docker, child_text, options, print_data);
print_rsc_in_list(tuple->remote, child_text, options, print_data);
print_rsc_in_list(tuple->child, child_text, options, print_data);
if (options & pe_print_html) {
status_print("</ul>\n");
}
} else {
child_text = crm_strdup_printf("%s ", pre_text);
tuple_print(tuple, child_text, options, print_data);
}
free(child_text);
if (options & pe_print_html) {
status_print("</li>\n");
}
}
if (options & pe_print_html) {
status_print("</ul>\n");
}
}
void
tuple_free(container_grouping_t *tuple)
{
if(tuple == NULL) {
return;
}
if(tuple->node) {
free(tuple->node);
tuple->node = NULL;
}
if(tuple->ip) {
free_xml(tuple->ip->xml);
tuple->ip->xml = NULL;
tuple->ip->fns->free(tuple->ip);
tuple->ip = NULL;
}
if(tuple->docker) {
free_xml(tuple->docker->xml);
tuple->docker->xml = NULL;
tuple->docker->fns->free(tuple->docker);
tuple->docker = NULL;
}
if(tuple->remote) {
free_xml(tuple->remote->xml);
tuple->remote->xml = NULL;
tuple->remote->fns->free(tuple->remote);
tuple->remote = NULL;
}
free(tuple->ipaddr);
free(tuple);
}
void
container_free(resource_t * rsc)
{
container_variant_data_t *container_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_container_variant_data(container_data, rsc);
pe_rsc_trace(rsc, "Freeing %s", rsc->id);
free(container_data->prefix);
free(container_data->image);
free(container_data->control_port);
free(container_data->host_network);
free(container_data->host_netmask);
free(container_data->ip_range_start);
free(container_data->docker_network);
free(container_data->docker_run_options);
free(container_data->docker_run_command);
free(container_data->docker_host_options);
g_list_free_full(container_data->tuples, (GDestroyNotify)tuple_free);
g_list_free_full(container_data->mounts, (GDestroyNotify)mount_free);
g_list_free_full(container_data->ports, (GDestroyNotify)port_free);
g_list_free(rsc->children);
if(container_data->child) {
free_xml(container_data->child->xml);
container_data->child->xml = NULL;
container_data->child->fns->free(container_data->child);
}
common_free(rsc);
}
enum rsc_role_e
container_resource_state(const resource_t * rsc, gboolean current)
{
enum rsc_role_e container_role = RSC_ROLE_UNKNOWN;
return container_role;
}
/*!
* \brief Get the number of configured replicas in a bundle
*
* \param[in] rsc Bundle resource
*
* \return Number of configured replicas, or 0 on error
*/
int
pe_bundle_replicas(const resource_t *rsc)
{
if ((rsc == NULL) || (rsc->variant != pe_container)) {
return 0;
} else {
container_variant_data_t *container_data = NULL;
get_container_variant_data(container_data, rsc);
return container_data->replicas;
}
}
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Thu, Jun 26, 7:20 PM (1 d, 4 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1959543
Default Alt Text
container.c (50 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment