diff --git a/lib/pengine/container.c b/lib/pengine/container.c index 1d7dfa6a40..6970fa1606 100644 --- a/lib/pengine/container.c +++ b/lib/pengine/container.c @@ -1,689 +1,701 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #define VARIANT_CONTAINER 1 #include "./variant.h" void tuple_free(container_grouping_t *tuple); static char * next_ip(const char *last_ip) { int oct1 = 0; int oct2 = 0; int oct3 = 0; int oct4 = 0; int rc = sscanf(last_ip, "%d.%d.%d.%d", &oct1, &oct2, &oct3, &oct4); if (rc != 4) { return NULL; } else if(oct4 > 255) { return NULL; } return crm_strdup_printf("%d.%d.%d.%d", oct1, oct2, oct3, oct4+1); } static int allocate_ip(container_variant_data_t *data, container_grouping_t *tuple, char *buffer, int max) { if(data->ip_range_start == NULL) { return 0; } else if(data->ip_last) { tuple->ipaddr = next_ip(data->ip_last); } else { tuple->ipaddr = strdup(data->ip_range_start); } data->ip_last = tuple->ipaddr; #if 0 return snprintf(buffer, max, " --add-host=%s-%d:%s --link %s-docker-%d:%s-link-%d", data->prefix, tuple->offset, tuple->ipaddr, data->prefix, tuple->offset, data->prefix, tuple->offset); #else return snprintf(buffer, max, " --add-host=%s-%d:%s", data->prefix, tuple->offset, tuple->ipaddr); #endif } static xmlNode * create_resource(const char *name, const char *provider, const char *kind) { xmlNode *rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, name); crm_xml_add(rsc, XML_AGENT_ATTR_CLASS, "ocf"); crm_xml_add(rsc, XML_AGENT_ATTR_PROVIDER, provider); crm_xml_add(rsc, "type", kind); return rsc; } static void create_nvp(xmlNode *parent, const char *name, const char *value) { char *id = crm_strdup_printf("%s-%s", ID(parent), name); xmlNode *xml_nvp = create_xml_node(parent, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_nvp, XML_ATTR_ID, id); free(id); crm_xml_add(xml_nvp, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(xml_nvp, XML_NVPAIR_ATTR_VALUE, value); } static void create_op(xmlNode *parent, const char *prefix, const char *task, const char *interval) { char *id = crm_strdup_printf("%s-%s-%s", prefix, task, interval); xmlNode *xml_op = create_xml_node(parent, "op"); crm_xml_add(xml_op, XML_ATTR_ID, id); free(id); crm_xml_add(xml_op, XML_LRM_ATTR_INTERVAL, interval); crm_xml_add(xml_op, "name", task); } static bool create_ip_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { if(data->ip_range_start) { char *id = crm_strdup_printf("%s-ip-%s", data->prefix, tuple->ipaddr); xmlNode *xml_ip = create_resource(id, "heartbeat", "IPaddr2"); xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-attributes-%d", data->prefix, tuple->offset); xml_obj = create_xml_node(xml_ip, XML_TAG_ATTR_SETS); crm_xml_add(xml_obj, XML_ATTR_ID, id); free(id); create_nvp(xml_obj, "ip", tuple->ipaddr); if(data->ip_nic) { create_nvp(xml_obj, "nic", data->ip_nic); } if(data->ip_mask) { create_nvp(xml_obj, "cidr_netmask", data->ip_mask); } else { create_nvp(xml_obj, "cidr_netmask", "32"); } xml_obj = create_xml_node(xml_ip, "operations"); create_op(xml_obj, ID(xml_ip), "monitor", "60s"); // TODO: Other ops? Timeouts and intervals from underlying resource? if (common_unpack(xml_ip, &tuple->ip, NULL, data_set) == false) { return FALSE; } parent->children = g_list_append(parent->children, tuple->ip); } return TRUE; } static bool create_docker_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { int offset = 0, max = 4096; char *buffer = calloc(1, max+1); int doffset = 0, dmax = 1024; char *dbuffer = calloc(1, dmax+1); char *id = crm_strdup_printf("%s-docker-%d", data->prefix, tuple->offset); xmlNode *xml_docker = create_resource(id, "heartbeat", "docker"); xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-attributes-%d", data->prefix, tuple->offset); xml_obj = create_xml_node(xml_docker, XML_TAG_ATTR_SETS); crm_xml_add(xml_obj, XML_ATTR_ID, id); free(id); create_nvp(xml_obj, "image", data->image); create_nvp(xml_obj, "allow_pull", "true"); create_nvp(xml_obj, "force_kill", "false"); create_nvp(xml_obj, "reuse", "false"); offset += snprintf(buffer+offset, max-offset, "-h %s-%d --restart=no ", data->prefix, tuple->offset); // offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr); for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) { container_mount_t *mount = pIter->data; if(mount->flags) { char *source = crm_strdup_printf( "%s/%s-%d", mount->source, data->prefix, tuple->offset); // '#' should be sufficiently unlikely in a directory // name and thus safe to use as a separator doffset += snprintf(dbuffer+doffset, dmax-doffset, "#%s", source); offset += snprintf(buffer+offset, max-offset, " -v %s:%s", source, mount->target); } else { offset += snprintf(buffer+offset, max-offset, " -v %s:%s", mount->source, mount->target); } if(mount->options) { offset += snprintf(buffer+offset, max-offset, ":%s", mount->options); } } for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) { char *port = pIter->data; offset += snprintf(buffer+offset, max-offset, " -p %s:%s:%s", tuple->ipaddr, port, port); #if 0 offset += snprintf(buffer+offset, max-offset, " --expose %s", port); #endif } if(data->docker_run_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options); } if(data->docker_host_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options); } create_nvp(xml_obj, "run_opts", buffer); free(buffer); // TODO: Arrange for these directories to get created on the host create_nvp(xml_obj, "directory_list", dbuffer); free(dbuffer); if(tuple->child) { // TODO: Use autoconf var create_nvp(xml_obj, "run_cmd", "/usr/sbin/pacemaker_remoted"); // TODO: Allow users to specify their own? create_nvp(xml_obj, "monitor_cmd", "/bin/true"); // We just want to know if the container // is alive, we'll monitor the child independantly /* } else if(child && data->untrusted) { */ /* create_nvp(xml_obj, "run_cmd", "/usr/libexec/pacemaker/lrmd"); */ /* create_nvp(xml_obj, "monitor_cmd", "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke"); */ } else { // TODO: Leave blank to use the built-in monitor? } xml_obj = create_xml_node(xml_docker, "operations"); create_op(xml_obj, ID(xml_docker), "monitor", "60s"); // TODO: Other ops? Timeouts and intervals from underlying resource? if (common_unpack(xml_docker, &tuple->docker, NULL, data_set) == FALSE) { return FALSE; } parent->children = g_list_append(parent->children, tuple->docker); return TRUE; } static bool create_remote_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { if(tuple->ip && tuple->child) { node_t *node = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_remote = NULL; char *nodeid = crm_strdup_printf("%s-%d", data->prefix, tuple->offset); char *id = strdup(nodeid); if(remote_id_conflict(id, data_set)) { // The biggest hammer we have id = crm_strdup_printf("pcmk-internal-%s-remote-%d", tuple->child->id, tuple->offset); } CRM_ASSERT(remote_id_conflict(id, data_set) == FALSE); xml_remote = create_resource(id, "pacemaker", "remote"); xml_obj = create_xml_node(xml_remote, "operations"); create_op(xml_obj, ID(xml_remote), "monitor", "60s"); id = crm_strdup_printf("%s-attributes-%d", data->prefix, tuple->offset); xml_obj = create_xml_node(xml_remote, XML_TAG_ATTR_SETS); crm_xml_add(xml_obj, XML_ATTR_ID, id); free(id); create_nvp(xml_obj, "addr", tuple->ipaddr); create_nvp(xml_obj, "port", crm_itoa(DEFAULT_REMOTE_PORT)); id = crm_strdup_printf("%s-meta-%d", data->prefix, tuple->offset); xml_obj = create_xml_node(xml_remote, XML_TAG_META_SETS); crm_xml_add(xml_obj, XML_ATTR_ID, id); free(id); create_nvp(xml_obj, XML_OP_ATTR_ALLOW_MIGRATE, "false"); // Sets up node->details->remote_rsc->container == tuple->docker create_nvp(xml_obj, XML_RSC_ATTR_CONTAINER, tuple->docker->id); // TODO: Do this generically, eg with rsc->flags // create_nvp(xml_obj, XML_RSC_ATTR_INTERNAL_RSC, "true"); // Suppress printing // tuple->docker->fillers = g_list_append(tuple->docker->fillers, child); // -INFINITY prevents anyone else from running here node = create_node(strdup(nodeid), nodeid, "remote", "-INFINITY", data_set); tuple->node = node_copy(node); tuple->node->weight = 500; nodeid = NULL; id = NULL; if (common_unpack(xml_remote, &tuple->remote, NULL, data_set) == FALSE) { return FALSE; } tuple->node->details->remote_rsc = tuple->remote; parent->children = g_list_append(parent->children, tuple->remote); } return TRUE; } static bool create_container( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { if(create_docker_resource(parent, data, tuple, data_set) == FALSE) { return TRUE; } if(create_ip_resource(parent, data, tuple, data_set) == FALSE) { return TRUE; } if(create_remote_resource(parent, data, tuple, data_set) == FALSE) { return TRUE; } return FALSE; } static void mount_free(container_mount_t *mount) { free(mount->source); free(mount->target); free(mount->options); free(mount); } gboolean container_unpack(resource_t * rsc, pe_working_set_t * data_set) { const char *value = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_resource = NULL; container_variant_data_t *container_data = NULL; pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); container_data = calloc(1, sizeof(container_variant_data_t)); rsc->variant_opaque = container_data; container_data->prefix = strdup(rsc->id); xml_obj = first_named_child(rsc->xml, "docker"); if(xml_obj == NULL) { return FALSE; } value = crm_element_value(xml_obj, "replicas"); container_data->replicas = crm_parse_int(value, "1"); + /* + * Communication between containers on the same host via the + * floating IPs only works if docker is started with: + * --userland-proxy=false --ip-masq=false + */ + value = crm_element_value(xml_obj, "replicas-per-host"); + container_data->replicas_per_host = crm_parse_int(value, "1"); + value = crm_element_value(xml_obj, "masters"); container_data->masters = crm_parse_int(value, "1"); container_data->docker_run_options = crm_element_value_copy(xml_obj, "options"); container_data->image = crm_element_value_copy(xml_obj, "image"); xml_obj = first_named_child(rsc->xml, "network"); if(xml_obj) { container_data->ip_range_start = crm_element_value_copy(xml_obj, "ip-range-start"); container_data->ip_nic = crm_element_value_copy(xml_obj, "nic"); container_data->ip_mask = crm_element_value_copy(xml_obj, "netmask"); for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL; xml_child = __xml_next_element(xml_child)) { char *port = crm_element_value_copy(xml_child, "port"); if(port == NULL) { port = crm_element_value_copy(xml_child, "range"); } if(port != NULL) { container_data->ports = g_list_append(container_data->ports, port); } else { pe_err("Invalid port directive %s", ID(xml_child)); } } } xml_obj = first_named_child(rsc->xml, "storage"); for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL; xml_child = __xml_next_element(xml_child)) { container_mount_t *mount = calloc(1, sizeof(container_mount_t)); mount->source = crm_element_value_copy(xml_child, "source-dir"); if(mount->source == NULL) { mount->source = crm_element_value_copy(xml_child, "source-dir-root"); mount->flags = 1; } mount->target = crm_element_value_copy(xml_child, "target-dir"); mount->options = crm_element_value_copy(xml_child, "options"); if(mount->source && mount->target) { container_data->mounts = g_list_append(container_data->mounts, mount); } else { pe_err("Invalid mount directive %s", ID(xml_child)); mount_free(mount); } } xml_obj = first_named_child(rsc->xml, "primitive"); if(xml_obj && container_data->ip_range_start && container_data->replicas > 0) { char *value = NULL; xmlNode *xml_set = NULL; if(container_data->masters > 0) { xml_resource = create_xml_node(NULL, XML_CIB_TAG_MASTER); } else { xml_resource = create_xml_node(NULL, XML_CIB_TAG_INCARNATION); } value = crm_strdup_printf("%s-%s", container_data->prefix, xml_resource->name); crm_xml_add(xml_resource, XML_ATTR_ID, value); free(value); value = crm_strdup_printf("%s-%s-meta", container_data->prefix, xml_resource->name); xml_set = create_xml_node(xml_resource, XML_TAG_META_SETS); free(value); create_nvp(xml_set, XML_RSC_ATTR_ORDERED, "true"); value = crm_itoa(container_data->replicas); create_nvp(xml_set, XML_RSC_ATTR_INCARNATION_MAX, value); free(value); + value = crm_itoa(container_data->replicas_per_host); + create_nvp(xml_set, XML_RSC_ATTR_INCARNATION_NODEMAX, value); + free(value); + if(container_data->masters) { value = crm_itoa(container_data->masters); create_nvp(xml_set, XML_RSC_ATTR_MASTER_MAX, value); free(value); } //crm_xml_add(xml_obj, XML_ATTR_ID, container_data->prefix); add_node_copy(xml_resource, xml_obj); - } else if(xml_obj && container_data->ip_range_start) { - xml_resource = copy_xml(xml_resource); + /* } else if(xml_obj && container_data->ip_range_start) { */ + /* xml_resource = copy_xml(xml_resource); */ } else if(xml_obj) { pe_err("Cannot control %s inside container %s without a value for ip-range-start", rsc->id, ID(xml_obj)); return FALSE; } if(xml_resource) { int lpc = 0; GListPtr childIter = NULL; resource_t *new_rsc = NULL; container_mount_t *mount = NULL; int offset = 0, max = 1024; char *buffer = calloc(1, max+1); mount = calloc(1, sizeof(container_mount_t)); mount->source = strdup(DEFAULT_REMOTE_KEY_LOCATION); mount->target = strdup(DEFAULT_REMOTE_KEY_LOCATION); mount->options = NULL; mount->flags = 0; container_data->mounts = g_list_append(container_data->mounts, mount); mount = calloc(1, sizeof(container_mount_t)); mount->source = strdup("/var/log/containers"); mount->target = strdup("/var/log"); mount->options = NULL; mount->flags = 1; container_data->mounts = g_list_append(container_data->mounts, mount); container_data->ports = g_list_append(container_data->ports, crm_itoa(DEFAULT_REMOTE_PORT)); if (common_unpack(xml_resource, &new_rsc, rsc, data_set) == FALSE) { pe_err("Failed unpacking resource %s", crm_element_value(rsc->xml, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } return FALSE; } container_data->child = new_rsc; container_data->child->orig_xml = xml_obj; // Also the trigger for common_free() // to free xml_resource as container_data->child->xml for(childIter = container_data->child->children; childIter != NULL; childIter = childIter->next) { container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t)); tuple->child = childIter->data; tuple->offset = lpc++; offset += allocate_ip(container_data, tuple, buffer+offset, max-offset); container_data->tuples = g_list_append(container_data->tuples, tuple); } container_data->docker_host_options = buffer; } else { // Just a naked container, no pacemaker-remote int offset = 0, max = 1024; char *buffer = calloc(1, max+1); for(int lpc = 0; lpc < container_data->replicas; lpc++) { container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t)); tuple->offset = lpc; offset += allocate_ip(container_data, tuple, buffer+offset, max-offset); container_data->tuples = g_list_append(container_data->tuples, tuple); } container_data->docker_host_options = buffer; } for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; // TODO: Remove from list if create_container() returns TRUE create_container(rsc, container_data, tuple, data_set); } if(container_data->child) { rsc->children = g_list_append(rsc->children, container_data->child); } return TRUE; } gboolean container_active(resource_t * rsc, gboolean all) { return TRUE; } static void container_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data) { container_variant_data_t *container_data = NULL; char *child_text = NULL; CRM_CHECK(rsc != NULL, return); if (pre_text == NULL) { pre_text = ""; } child_text = crm_concat(pre_text, " ", ' '); status_print("%sid); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print(">\n"); get_container_variant_data(container_data, rsc); status_print("%sDocker container: %s [%s]%s%s", pre_text, rsc->id, container_data->image, is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { tuple->ip->fns->print(tuple->ip, child_text, options, print_data); } if(tuple->child) { tuple->child->fns->print(tuple->child, child_text, options, print_data); } if(tuple->docker) { tuple->docker->fns->print(tuple->docker, child_text, options, print_data); } if(tuple->remote) { tuple->remote->fns->print(tuple->remote, child_text, options, print_data); } } status_print("%s\n", pre_text); free(child_text); } void container_print(resource_t * rsc, const char *pre_text, long options, void *print_data) { container_variant_data_t *container_data = NULL; char *child_text = NULL; CRM_CHECK(rsc != NULL, return); if (options & pe_print_xml) { container_print_xml(rsc, pre_text, options, print_data); return; } get_container_variant_data(container_data, rsc); if (pre_text == NULL) { pre_text = " "; } child_text = crm_strdup_printf(" %s", pre_text); status_print("%sDocker container%s: %s [%s]%s%s\n", pre_text, container_data->replicas>1?" set":"", rsc->id, container_data->image, is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(g_list_length(container_data->tuples) > 1) { status_print(" %sReplica[%d]\n", pre_text, tuple->offset); } if(tuple->ip) { tuple->ip->fns->print(tuple->ip, child_text, options, print_data); } if(tuple->docker) { tuple->docker->fns->print(tuple->docker, child_text, options, print_data); } if(tuple->remote) { tuple->remote->fns->print(tuple->remote, child_text, options, print_data); } if(tuple->child) { tuple->child->fns->print(tuple->child, child_text, options, print_data); } } } void tuple_free(container_grouping_t *tuple) { if(tuple == NULL) { return; } // TODO: Free tuple->node ? if(tuple->ip) { tuple->ip->fns->free(tuple->ip); tuple->ip = NULL; } if(tuple->child) { tuple->child->fns->free(tuple->child); tuple->child = NULL; } if(tuple->docker) { tuple->docker->fns->free(tuple->docker); tuple->docker = NULL; } if(tuple->remote) { tuple->remote->fns->free(tuple->remote); tuple->remote = NULL; } free(tuple->ipaddr); free(tuple); } void container_free(resource_t * rsc) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); pe_rsc_trace(rsc, "Freeing %s", rsc->id); free(container_data->prefix); free(container_data->image); free(container_data->ip_nic); free(container_data->ip_mask); free(container_data->ip_range_start); free(container_data->docker_run_options); free(container_data->docker_host_options); g_list_free_full(container_data->tuples, (GDestroyNotify)tuple_free); g_list_free_full(container_data->mounts, (GDestroyNotify)mount_free); g_list_free_full(container_data->ports, free); common_free(rsc); } enum rsc_role_e container_resource_state(const resource_t * rsc, gboolean current) { enum rsc_role_e container_role = RSC_ROLE_UNKNOWN; return container_role; } diff --git a/lib/pengine/variant.h b/lib/pengine/variant.h index 52971e1d7c..c9c32eed71 100644 --- a/lib/pengine/variant.h +++ b/lib/pengine/variant.h @@ -1,143 +1,144 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PE_VARIANT__H # define PE_VARIANT__H # if VARIANT_CLONE typedef struct clone_variant_data_s { int clone_max; int clone_node_max; int master_max; int master_node_max; int total_clones; int active_clones; int max_nodes; int masters_active; int masters_allocated; gboolean interleave; gboolean ordered; gboolean applied_master_prefs; gboolean merged_master_weights; notify_data_t *stop_notify; notify_data_t *start_notify; notify_data_t *demote_notify; notify_data_t *promote_notify; xmlNode *xml_obj_child; gboolean notify_confirm; } clone_variant_data_t; # define get_clone_variant_data(data, rsc) \ CRM_ASSERT(rsc != NULL); \ CRM_ASSERT(rsc->variant == pe_clone || rsc->variant == pe_master); \ data = (clone_variant_data_t *)rsc->variant_opaque; # elif VARIANT_CONTAINER typedef struct { int offset; node_t *node; char *ipaddr; resource_t *ip; resource_t *child; resource_t *docker; resource_t *remote; } container_grouping_t; typedef struct { char *source; char *target; char *options; int flags; } container_mount_t; typedef struct container_variant_data_s { int masters; int replicas; + int replicas_per_host; char *prefix; char *image; const char *ip_last; char *ip_nic; char *ip_mask; char *ip_range_start; char *docker_host_options; char *docker_run_options; resource_t *child; GListPtr tuples; /* container_grouping_t * */ GListPtr ports; /* */ GListPtr mounts; /* */ } container_variant_data_t; # define get_container_variant_data(data, rsc) \ CRM_ASSERT(rsc != NULL); \ CRM_ASSERT(rsc->variant == pe_container); \ CRM_ASSERT(rsc->variant_opaque != NULL); \ data = (container_variant_data_t *)rsc->variant_opaque; \ # elif VARIANT_GROUP typedef struct group_variant_data_s { int num_children; resource_t *first_child; resource_t *last_child; gboolean colocated; gboolean ordered; gboolean child_starting; gboolean child_stopping; } group_variant_data_t; # define get_group_variant_data(data, rsc) \ CRM_ASSERT(rsc != NULL); \ CRM_ASSERT(rsc->variant == pe_group); \ CRM_ASSERT(rsc->variant_opaque != NULL); \ data = (group_variant_data_t *)rsc->variant_opaque; \ # elif VARIANT_NATIVE typedef struct native_variant_data_s { int dummy; } native_variant_data_t; # define get_native_variant_data(data, rsc) \ CRM_ASSERT(rsc != NULL); \ CRM_ASSERT(rsc->variant == pe_native); \ CRM_ASSERT(rsc->variant_opaque != NULL); \ data = (native_variant_data_t *)rsc->variant_opaque; # endif #endif diff --git a/pengine/clone.c b/pengine/clone.c index dc5ec03197..c077cf4308 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -1,1661 +1,1683 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #define VARIANT_CLONE 1 #include gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); static void append_parent_colocation(resource_t * rsc, resource_t * child, gboolean all); static gint sort_rsc_id(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); return strcmp(resource1->id, resource2->id); } static node_t * parent_node_instance(const resource_t * rsc, node_t * node) { node_t *ret = NULL; - if (node != NULL) { + if (node != NULL && rsc->parent) { ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id); + } else if(node != NULL) { + ret = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); } return ret; } static gboolean did_fail(const resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_failed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if (did_fail(child_rsc)) { return TRUE; } } return FALSE; } gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) { int rc = 0; node_t *node1 = NULL; node_t *node2 = NULL; gboolean can1 = TRUE; gboolean can2 = TRUE; const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* allocation order: * - active instances * - instances running on nodes with the least copies * - active instances on nodes that can't support them or are to be fenced * - failed instances * - inactive instances */ if (resource1->running_on && resource2->running_on) { if (g_list_length(resource1->running_on) < g_list_length(resource2->running_on)) { crm_trace("%s < %s: running_on", resource1->id, resource2->id); return -1; } else if (g_list_length(resource1->running_on) > g_list_length(resource2->running_on)) { crm_trace("%s > %s: running_on", resource1->id, resource2->id); return 1; } } if (resource1->running_on) { node1 = resource1->running_on->data; } if (resource2->running_on) { node2 = resource2->running_on->data; } if (node1) { node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource1->id); node1 = NULL; can1 = FALSE; } } if (node2) { node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource2->id); node2 = NULL; can2 = FALSE; } } if (can1 != can2) { if (can1) { crm_trace("%s < %s: availability of current location", resource1->id, resource2->id); return -1; } crm_trace("%s > %s: availability of current location", resource1->id, resource2->id); return 1; } if (resource1->priority < resource2->priority) { crm_trace("%s < %s: priority", resource1->id, resource2->id); return 1; } else if (resource1->priority > resource2->priority) { crm_trace("%s > %s: priority", resource1->id, resource2->id); return -1; } if (node1 == NULL && node2 == NULL) { crm_trace("%s == %s: not active", resource1->id, resource2->id); return 0; } if (node1 != node2) { if (node1 == NULL) { crm_trace("%s > %s: active", resource1->id, resource2->id); return 1; } else if (node2 == NULL) { crm_trace("%s < %s: active", resource1->id, resource2->id); return -1; } } can1 = can_run_resources(node1); can2 = can_run_resources(node2); if (can1 != can2) { if (can1) { crm_trace("%s < %s: can", resource1->id, resource2->id); return -1; } crm_trace("%s > %s: can", resource1->id, resource2->id); return 1; } node1 = parent_node_instance(resource1, node1); node2 = parent_node_instance(resource2, node2); if (node1 != NULL && node2 == NULL) { crm_trace("%s < %s: not allowed", resource1->id, resource2->id); return -1; } else if (node1 == NULL && node2 != NULL) { crm_trace("%s > %s: not allowed", resource1->id, resource2->id); return 1; } if (node1 == NULL || node2 == NULL) { crm_trace("%s == %s: not allowed", resource1->id, resource2->id); return 0; } if (node1->count < node2->count) { crm_trace("%s < %s: count", resource1->id, resource2->id); return -1; } else if (node1->count > node2->count) { crm_trace("%s > %s: count", resource1->id, resource2->id); return 1; } can1 = did_fail(resource1); can2 = did_fail(resource2); if (can1 != can2) { if (can1) { crm_trace("%s > %s: failed", resource1->id, resource2->id); return 1; } crm_trace("%s < %s: failed", resource1->id, resource2->id); return -1; } if (node1 && node2) { int lpc = 0; int max = 0; node_t *n = NULL; GListPtr gIter = NULL; GListPtr list1 = NULL; GListPtr list2 = NULL; GHashTable *hash1 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); GHashTable *hash2 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); n = node_copy(resource1->running_on->data); g_hash_table_insert(hash1, (gpointer) n->details->id, n); n = node_copy(resource2->running_on->data); g_hash_table_insert(hash2, (gpointer) n->details->id, n); - for (gIter = resource1->parent->rsc_cons; gIter; gIter = gIter->next) { - rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; + if(resource1->parent) { + for (gIter = resource1->parent->rsc_cons; gIter; gIter = gIter->next) { + rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; - crm_trace("Applying %s to %s", constraint->id, resource1->id); + crm_trace("Applying %s to %s", constraint->id, resource1->id); - hash1 = native_merge_weights(constraint->rsc_rh, resource1->id, hash1, - constraint->node_attribute, - (float)constraint->score / INFINITY, 0); - } + hash1 = native_merge_weights(constraint->rsc_rh, resource1->id, hash1, + constraint->node_attribute, + (float)constraint->score / INFINITY, 0); + } - for (gIter = resource1->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { - rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; + for (gIter = resource1->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { + rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; - crm_trace("Applying %s to %s", constraint->id, resource1->id); + crm_trace("Applying %s to %s", constraint->id, resource1->id); - hash1 = native_merge_weights(constraint->rsc_lh, resource1->id, hash1, - constraint->node_attribute, - (float)constraint->score / INFINITY, pe_weights_positive); + hash1 = native_merge_weights(constraint->rsc_lh, resource1->id, hash1, + constraint->node_attribute, + (float)constraint->score / INFINITY, pe_weights_positive); + } } - for (gIter = resource2->parent->rsc_cons; gIter; gIter = gIter->next) { - rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; + if(resource2->parent) { + for (gIter = resource2->parent->rsc_cons; gIter; gIter = gIter->next) { + rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; - crm_trace("Applying %s to %s", constraint->id, resource2->id); + crm_trace("Applying %s to %s", constraint->id, resource2->id); - hash2 = native_merge_weights(constraint->rsc_rh, resource2->id, hash2, - constraint->node_attribute, - (float)constraint->score / INFINITY, 0); - } + hash2 = native_merge_weights(constraint->rsc_rh, resource2->id, hash2, + constraint->node_attribute, + (float)constraint->score / INFINITY, 0); + } - for (gIter = resource2->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { - rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; + for (gIter = resource2->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { + rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; - crm_trace("Applying %s to %s", constraint->id, resource2->id); + crm_trace("Applying %s to %s", constraint->id, resource2->id); - hash2 = native_merge_weights(constraint->rsc_lh, resource2->id, hash2, - constraint->node_attribute, - (float)constraint->score / INFINITY, pe_weights_positive); + hash2 = native_merge_weights(constraint->rsc_lh, resource2->id, hash2, + constraint->node_attribute, + (float)constraint->score / INFINITY, pe_weights_positive); + } } /* Current location score */ node1 = g_list_nth_data(resource1->running_on, 0); node1 = g_hash_table_lookup(hash1, node1->details->id); node2 = g_list_nth_data(resource2->running_on, 0); node2 = g_hash_table_lookup(hash2, node2->details->id); if (node1->weight < node2->weight) { if (node1->weight < 0) { crm_trace("%s > %s: current score", resource1->id, resource2->id); rc = -1; goto out; } else { crm_trace("%s < %s: current score", resource1->id, resource2->id); rc = 1; goto out; } } else if (node1->weight > node2->weight) { crm_trace("%s > %s: current score", resource1->id, resource2->id); rc = -1; goto out; } /* All location scores */ list1 = g_hash_table_get_values(hash1); list2 = g_hash_table_get_values(hash2); list1 = g_list_sort_with_data(list1, sort_node_weight, g_list_nth_data(resource1->running_on, 0)); list2 = g_list_sort_with_data(list2, sort_node_weight, g_list_nth_data(resource2->running_on, 0)); max = g_list_length(list1); if (max < g_list_length(list2)) { max = g_list_length(list2); } for (; lpc < max; lpc++) { node1 = g_list_nth_data(list1, lpc); node2 = g_list_nth_data(list2, lpc); if (node1 == NULL) { crm_trace("%s < %s: colocated score NULL", resource1->id, resource2->id); rc = 1; break; } else if (node2 == NULL) { crm_trace("%s > %s: colocated score NULL", resource1->id, resource2->id); rc = -1; break; } if (node1->weight < node2->weight) { crm_trace("%s < %s: colocated score", resource1->id, resource2->id); rc = 1; break; } else if (node1->weight > node2->weight) { crm_trace("%s > %s: colocated score", resource1->id, resource2->id); rc = -1; break; } } /* Order by reverse uname - same as sort_node_weight() does? */ out: g_hash_table_destroy(hash1); /* Free mem */ g_hash_table_destroy(hash2); /* Free mem */ g_list_free(list1); g_list_free(list2); if (rc != 0) { return rc; } } rc = strcmp(resource1->id, resource2->id); crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id); return rc; } static node_t * can_run_instance(resource_t * rsc, node_t * node) { node_t *local_node = NULL; clone_variant_data_t *clone_data = NULL; if (can_run_resources(node) == FALSE) { goto bail; } else if (is_set(rsc->flags, pe_rsc_orphan)) { goto bail; } local_node = parent_node_instance(rsc, node); - get_clone_variant_data(clone_data, rsc->parent); + if(rsc->parent) { + get_clone_variant_data(clone_data, rsc->parent); + } if (local_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); goto bail; } else if (local_node->weight < 0) { common_update_score(rsc, node->details->id, local_node->weight); pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", rsc->id, node->details->uname); + } else if(clone_data == NULL) { + pe_rsc_trace(rsc, "%s can run on %s: %d (container)", rsc->id, node->details->uname, local_node->count); + return local_node; + } else if (local_node->count < clone_data->clone_node_max) { pe_rsc_trace(rsc, "%s can run on %s: %d", rsc->id, node->details->uname, local_node->count); return local_node; } else { pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", rsc->id, node->details->uname, local_node->count, clone_data->clone_node_max); } bail: if (node) { common_update_score(rsc, node->details->id, -INFINITY); } return NULL; } static node_t * color_instance(resource_t * rsc, node_t * prefer, gboolean all_coloc, pe_working_set_t * data_set) { node_t *chosen = NULL; node_t *local_node = NULL; GHashTable *backup = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing %s %d", rsc->id, all_coloc); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->fns->location(rsc, NULL, FALSE); } else if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } /* Only include positive colocation preferences of dependent resources * if not every node will get a copy of the clone */ - append_parent_colocation(rsc->parent, rsc, all_coloc); + if(rsc->parent) { + append_parent_colocation(rsc->parent, rsc, all_coloc); + } if (prefer) { node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (local_prefer == NULL || local_prefer->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, prefer->details->uname); return NULL; } } if (rsc->allowed_nodes) { GHashTableIter iter; node_t *try_node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&try_node)) { can_run_instance(rsc, try_node); } } backup = node_hash_dup(rsc->allowed_nodes); chosen = rsc->cmds->allocate(rsc, prefer, data_set); - if (chosen) { + if (chosen && rsc->parent) { local_node = pe_hash_table_lookup(rsc->parent->allowed_nodes, chosen->details->id); - if (prefer && chosen && chosen->details != prefer->details) { crm_notice("Pre-allocation failed: got %s instead of %s", chosen->details->uname, prefer->details->uname); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = backup; native_deallocate(rsc); chosen = NULL; backup = NULL; } else if (local_node) { local_node->count++; } else if (is_set(rsc->flags, pe_rsc_managed)) { /* what to do? we can't enforce per-node limits in this case */ crm_config_err("%s not found in %s (list=%d)", chosen->details->id, rsc->parent->id, g_hash_table_size(rsc->parent->allowed_nodes)); } } if(backup) { g_hash_table_destroy(backup); } return chosen; } static void append_parent_colocation(resource_t * rsc, resource_t * child, gboolean all) { GListPtr gIter = NULL; gIter = rsc->rsc_cons; for (; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data; if (all || cons->score < 0 || cons->score == INFINITY) { child->rsc_cons = g_list_prepend(child->rsc_cons, cons); } } gIter = rsc->rsc_cons_lhs; for (; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data; if (all || cons->score < 0) { child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons); } } } -node_t * -clone_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) -{ - GHashTableIter iter; - GListPtr nIter = NULL; - GListPtr gIter = NULL; - GListPtr nodes = NULL; - node_t *node = NULL; +void +distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes, + int max, int per_host_max, pe_working_set_t * data_set); - int allocated = 0; +void +distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes, + int max, int per_host_max, pe_working_set_t * data_set) +{ int loop_max = 0; - int clone_max = 0; + int attempts = 0; + int allocated = 0; int available_nodes = 0; - clone_variant_data_t *clone_data = NULL; - - get_clone_variant_data(clone_data, rsc); - - if (is_not_set(rsc->flags, pe_rsc_provisional)) { - return NULL; - - } else if (is_set(rsc->flags, pe_rsc_allocating)) { - pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); - return NULL; - } - - set_bit(rsc->flags, pe_rsc_allocating); - pe_rsc_trace(rsc, "Processing %s", rsc->id); - - /* this information is used by sort_clone_instance() when deciding in which - * order to allocate clone instances - */ - gIter = rsc->rsc_cons; - for (; gIter != NULL; gIter = gIter->next) { - rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; - - pe_rsc_trace(rsc, "%s: Coloring %s first", rsc->id, constraint->rsc_rh->id); - constraint->rsc_rh->cmds->allocate(constraint->rsc_rh, prefer, data_set); - } - - gIter = rsc->rsc_cons_lhs; - for (; gIter != NULL; gIter = gIter->next) { - rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; - - rsc->allowed_nodes = - constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, - constraint->node_attribute, - (float)constraint->score / INFINITY, - (pe_weights_rollback | pe_weights_positive)); - } - - dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes); /* count now tracks the number of clones currently allocated */ - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + for(GListPtr nIter = nodes; nIter != NULL; nIter = nIter->next) { + pe_node_t *node = nIter->data; + node->count = 0; if (can_run_resources(node)) { available_nodes++; } } - clone_max = clone_data->clone_max; + pe_rsc_debug(rsc, "Allocating %d %s instances to a possible %d nodes (%d per host)", + max, rsc->id, available_nodes, per_host_max); + if(available_nodes) { - loop_max = clone_data->clone_max / available_nodes; + loop_max = max / available_nodes; } if (loop_max < 1) { loop_max = 1; } - rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set); - /* Pre-allocate as many instances as we can to their current location - * First pre-sort the list of nodes by their placement score - */ - nodes = g_hash_table_get_values(rsc->allowed_nodes); - nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL); + /* Pre-allocate as many instances as we can to their current location */ - for(nIter = nodes; nIter; nIter = nIter->next) { + for(GListPtr nIter = nodes; nIter; nIter = nIter->next) { int lpc; + pe_node_t *node = nIter->data; - node = nIter->data; - - if(clone_max <= 0) { + if(max - attempts <= 0) { break; } if (can_run_resources(node) == FALSE || node->weight < 0) { pe_rsc_trace(rsc, "Not Pre-allocatiing %s", node->details->uname); continue; } - clone_max--; - pe_rsc_trace(rsc, "Pre-allocating %s (%d remaining)", node->details->uname, clone_max); + attempts++; + pe_rsc_trace(rsc, "Pre-allocating %s (%d remaining)", node->details->uname, max - allocated); for (lpc = 0; - allocated < clone_data->clone_max - && node->count < clone_data->clone_node_max - && lpc < clone_data->clone_node_max && lpc < loop_max; lpc++) { - for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + allocated < max + && node->count < per_host_max + && lpc < per_host_max && lpc < loop_max; lpc++) { + for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; if (child->running_on && is_set(child->flags, pe_rsc_provisional) && is_not_set(child->flags, pe_rsc_failed)) { node_t *child_node = child->running_on->data; + pe_rsc_trace(rsc, "Foo %s to %s %p %p", child->id, + node->details->uname, child_node->details, node->details); if (child_node->details == node->details - && color_instance(child, node, clone_data->clone_max < available_nodes, + && color_instance(child, node, max < available_nodes, data_set)) { pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, node->details->uname); allocated++; break; } } } } } - pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, clone_data->clone_max); - g_list_free(nodes); + pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); - for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; if (g_list_length(child->running_on) > 0) { node_t *child_node = child->running_on->data; node_t *local_node = parent_node_instance(child, child->running_on->data); if (local_node == NULL) { crm_err("%s is running on %s which isn't allowed", child->id, child_node->details->uname); } } if (is_not_set(child->flags, pe_rsc_provisional)) { - } else if (allocated >= clone_data->clone_max) { - pe_rsc_debug(rsc, "Child %s not allocated - limit reached", child->id); + } else if (allocated >= max) { + pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); resource_location(child, NULL, -INFINITY, "clone_color:limit_reached", data_set); - } else if (color_instance(child, NULL, clone_data->clone_max < available_nodes, data_set)) { + } else if (color_instance(child, NULL, max < available_nodes, data_set)) { allocated++; } } pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", - allocated, rsc->id, clone_data->clone_max); + allocated, rsc->id, max); +} + + +node_t * +clone_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) +{ + GListPtr nodes = NULL; + + clone_variant_data_t *clone_data = NULL; + + get_clone_variant_data(clone_data, rsc); + + if (is_not_set(rsc->flags, pe_rsc_provisional)) { + return NULL; + + } else if (is_set(rsc->flags, pe_rsc_allocating)) { + pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); + return NULL; + } + + set_bit(rsc->flags, pe_rsc_allocating); + pe_rsc_trace(rsc, "Processing %s", rsc->id); + + /* this information is used by sort_clone_instance() when deciding in which + * order to allocate clone instances + */ + for (GListPtr gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { + rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; + + pe_rsc_trace(rsc, "%s: Coloring %s first", rsc->id, constraint->rsc_rh->id); + constraint->rsc_rh->cmds->allocate(constraint->rsc_rh, prefer, data_set); + } + + for (GListPtr gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { + rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; + + rsc->allowed_nodes = + constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, + constraint->node_attribute, + (float)constraint->score / INFINITY, + (pe_weights_rollback | pe_weights_positive)); + } + + dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes); + + nodes = g_hash_table_get_values(rsc->allowed_nodes); + nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL); + rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set); + distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set); + g_list_free(nodes); clear_bit(rsc->flags, pe_rsc_provisional); clear_bit(rsc->flags, pe_rsc_allocating); pe_rsc_trace(rsc, "Done allocating %s", rsc->id); return NULL; } static void clone_update_pseudo_status(resource_t * rsc, gboolean * stopping, gboolean * starting, gboolean * active) { GListPtr gIter = NULL; if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; clone_update_pseudo_status(child, stopping, starting, active); } return; } CRM_ASSERT(active != NULL); CRM_ASSERT(starting != NULL); CRM_ASSERT(stopping != NULL); if (rsc->running_on) { *active = TRUE; } gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (*starting && *stopping) { return; } else if (is_set(action->flags, pe_action_optional)) { pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); continue; } else if (is_set(action->flags, pe_action_pseudo) == FALSE && is_set(action->flags, pe_action_runnable) == FALSE) { pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); continue; } else if (safe_str_eq(RSC_STOP, action->task)) { pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); *stopping = TRUE; } else if (safe_str_eq(RSC_START, action->task)) { if (is_set(action->flags, pe_action_runnable) == FALSE) { pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", action->uuid, is_set(action->flags, pe_action_runnable), is_set(action->flags, pe_action_pseudo)); } else { pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", action->uuid, is_set(action->flags, pe_action_runnable), is_set(action->flags, pe_action_pseudo)); *starting = TRUE; } } } } static action_t * find_rsc_action(resource_t * rsc, const char *key, gboolean active_only, GListPtr * list) { action_t *match = NULL; GListPtr possible = NULL; GListPtr active = NULL; possible = find_actions(rsc->actions, key, NULL); if (active_only) { GListPtr gIter = possible; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE) { active = g_list_prepend(active, op); } } if (active && g_list_length(active) == 1) { match = g_list_nth_data(active, 0); } if (list) { *list = active; active = NULL; } } else if (possible && g_list_length(possible) == 1) { match = g_list_nth_data(possible, 0); } if (list) { *list = possible; possible = NULL; } if (possible) { g_list_free(possible); } if (active) { g_list_free(active); } return match; } static void child_ordering_constraints(resource_t * rsc, pe_working_set_t * data_set) { char *key = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *last_stop = NULL; action_t *last_start = NULL; GListPtr gIter = NULL; gboolean active_only = TRUE; /* change to false to get the old behavior */ clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->ordered == FALSE) { return; } /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; key = stop_key(child); stop = find_rsc_action(child, key, active_only, NULL); free(key); key = start_key(child); start = find_rsc_action(child, key, active_only, NULL); free(key); if (stop) { if (last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_optional); } last_stop = stop; } if (start) { if (last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_optional); } last_start = start; } } } void clone_create_actions(resource_t * rsc, pe_working_set_t * data_set) { gboolean child_active = FALSE; gboolean child_starting = FALSE; gboolean child_stopping = FALSE; gboolean allow_dependent_migrations = TRUE; action_t *stop = NULL; action_t *stopped = NULL; action_t *start = NULL; action_t *started = NULL; GListPtr gIter = rsc->children; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; gboolean starting = FALSE; gboolean stopping = FALSE; child_rsc->cmds->create_actions(child_rsc, data_set); clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); if (stopping && starting) { allow_dependent_migrations = FALSE; } child_stopping |= stopping; child_starting |= starting; } /* start */ start = start_action(rsc, NULL, !child_starting); started = custom_action(rsc, started_key(rsc), RSC_STARTED, NULL, !child_starting, TRUE, data_set); update_action_flags(start, pe_action_pseudo | pe_action_runnable, __FUNCTION__, __LINE__); update_action_flags(started, pe_action_pseudo, __FUNCTION__, __LINE__); started->priority = INFINITY; if (child_active || child_starting) { update_action_flags(started, pe_action_runnable, __FUNCTION__, __LINE__); } child_ordering_constraints(rsc, data_set); if (clone_data->start_notify == NULL) { clone_data->start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set); } /* stop */ stop = stop_action(rsc, NULL, !child_stopping); stopped = custom_action(rsc, stopped_key(rsc), RSC_STOPPED, NULL, !child_stopping, TRUE, data_set); stopped->priority = INFINITY; update_action_flags(stop, pe_action_pseudo | pe_action_runnable, __FUNCTION__, __LINE__); if (allow_dependent_migrations) { update_action_flags(stop, pe_action_migrate_runnable, __FUNCTION__, __LINE__); } update_action_flags(stopped, pe_action_pseudo | pe_action_runnable, __FUNCTION__, __LINE__); if (clone_data->stop_notify == NULL) { clone_data->stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set); if (clone_data->stop_notify && clone_data->start_notify) { order_actions(clone_data->stop_notify->post_done, clone_data->start_notify->pre, pe_order_optional); } } } void clone_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { resource_t *last_rsc = NULL; GListPtr gIter; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); if (rsc->variant == pe_master) { new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set); new_rsc_order(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set); } if (clone_data->ordered) { /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->internal_constraints(child_rsc, data_set); order_start_start(rsc, child_rsc, pe_order_runnable_left | pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { order_start_start(last_rsc, child_rsc, pe_order_optional); } order_stop_stop(rsc, child_rsc, pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { order_stop_stop(child_rsc, last_rsc, pe_order_optional); } last_rsc = child_rsc; } } static bool assign_node(resource_t * rsc, node_t * node, gboolean force) { bool changed = FALSE; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; changed |= native_assign_node(child_rsc, NULL, node, force); } return changed; } if (rsc->allocated_to != NULL) { changed = true; } native_assign_node(rsc, NULL, node, force); return changed; } static resource_t * find_compatible_child_by_node(resource_t * local_child, node_t * local_node, resource_t * rsc, enum rsc_role_e filter, gboolean current) { node_t *node = NULL; GListPtr gIter = NULL; if (local_node == NULL) { crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); return NULL; } crm_trace("Looking for compatible child from %s for %s on %s", local_child->id, rsc->id, local_node->details->uname); gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { /* We only want instances that haven't failed */ node = child_rsc->fns->location(child_rsc, NULL, current); } if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_trace("Filtered %s", child_rsc->id); continue; } if (node && local_node && node->details == local_node->details) { crm_trace("Pairing %s with %s on %s", local_child->id, child_rsc->id, node->details->uname); return child_rsc; } else if (node) { crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname, local_node->details->uname); } else { crm_trace("%s - not allocated %d", child_rsc->id, current); } } crm_trace("Can't pair %s with %s", local_child->id, rsc->id); return NULL; } resource_t * find_compatible_child(resource_t * local_child, resource_t * rsc, enum rsc_role_e filter, gboolean current) { resource_t *pair = NULL; GListPtr gIter = NULL; GListPtr scratch = NULL; node_t *local_node = NULL; local_node = local_child->fns->location(local_child, NULL, current); if (local_node) { return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); } scratch = g_hash_table_get_values(local_child->allowed_nodes); scratch = g_list_sort_with_data(scratch, sort_node_weight, NULL); gIter = scratch; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); done: g_list_free(scratch); return pair; } void clone_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ GListPtr gIter = rsc_lh->children; CRM_CHECK(FALSE, crm_err("This functionality is not thought to be used. Please report a bug.")); CRM_CHECK(rsc_lh, return); CRM_CHECK(rsc_rh, return); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_lh(child_rsc, rsc_rh, constraint); } return; } void clone_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { GListPtr gIter = NULL; gboolean do_interleave = FALSE; clone_variant_data_t *clone_data = NULL; clone_variant_data_t *clone_data_lh = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(rsc_lh != NULL, pe_err("rsc_lh was NULL for %s", constraint->id); return); CRM_CHECK(rsc_rh != NULL, pe_err("rsc_rh was NULL for %s", constraint->id); return); CRM_CHECK(rsc_lh->variant == pe_native, return); get_clone_variant_data(clone_data, constraint->rsc_rh); pe_rsc_trace(rsc_rh, "Processing constraint %s: %s -> %s %d", constraint->id, rsc_lh->id, rsc_rh->id, constraint->score); if (constraint->rsc_lh->variant >= pe_clone) { get_clone_variant_data(clone_data_lh, constraint->rsc_lh); if (clone_data_lh->interleave && clone_data->clone_node_max != clone_data_lh->clone_node_max) { crm_config_err("Cannot interleave " XML_CIB_TAG_INCARNATION " %s and %s because" " they do not support the same number of" " resources per node", constraint->rsc_lh->id, constraint->rsc_rh->id); /* only the LHS side needs to be labeled as interleave */ } else if (clone_data_lh->interleave) { do_interleave = TRUE; } } if (is_set(rsc_rh->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id); return; } else if (do_interleave) { resource_t *rh_child = NULL; rh_child = find_compatible_child(rsc_lh, rsc_rh, RSC_ROLE_UNKNOWN, FALSE); if (rh_child) { pe_rsc_debug(rsc_rh, "Pairing %s with %s", rsc_lh->id, rh_child->id); rsc_lh->cmds->rsc_colocation_lh(rsc_lh, rh_child, constraint); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id); assign_node(rsc_lh, NULL, TRUE); } else { pe_rsc_debug(rsc_rh, "Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id); } return; } else if (constraint->score >= INFINITY) { GListPtr rhs = NULL; gIter = rsc_rh->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { pe_rsc_trace(rsc_rh, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); rhs = g_list_prepend(rhs, chosen); } } node_list_exclude(rsc_lh->allowed_nodes, rhs, FALSE); g_list_free(rhs); return; } gIter = rsc_rh->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint); } } static enum action_tasks clone_child_action(action_t * action) { enum action_tasks result = no_action; resource_t *child = (resource_t *) action->rsc->children->data; if (safe_str_eq(action->task, "notify") || safe_str_eq(action->task, "notified")) { /* Find the action we're notifying about instead */ int stop = 0; char *key = action->uuid; int lpc = strlen(key); for (; lpc > 0; lpc--) { if (key[lpc] == '_' && stop == 0) { stop = lpc; } else if (key[lpc] == '_') { char *task_mutable = NULL; lpc++; task_mutable = strdup(key + lpc); task_mutable[stop - lpc] = 0; crm_trace("Extracted action '%s' from '%s'", task_mutable, key); result = get_complex_task(child, task_mutable, TRUE); free(task_mutable); break; } } } else { result = get_complex_task(child, action->task, TRUE); } return result; } enum pe_action_flags clone_action_flags(action_t * action, node_t * node) { GListPtr gIter = NULL; gboolean any_runnable = FALSE; gboolean check_runnable = TRUE; enum action_tasks task = clone_child_action(action); enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); const char *task_s = task2text(task); gIter = action->rsc->children; for (; gIter != NULL; gIter = gIter->next) { action_t *child_action = NULL; resource_t *child = (resource_t *) gIter->data; child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); pe_rsc_trace(action->rsc, "Checking for %s in %s on %s", task_s, child->id, node ? node->details->uname : "none"); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (is_set(flags, pe_action_optional) && is_set(child_flags, pe_action_optional) == FALSE) { pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, child_action->uuid); flags = crm_clear_bit(__FUNCTION__, __LINE__, action->rsc->id, flags, pe_action_optional); pe_clear_action_bit(action, pe_action_optional); } if (is_set(child_flags, pe_action_runnable)) { any_runnable = TRUE; } } else { GListPtr gIter2 = child->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { action_t *op = (action_t *) gIter2->data; pe_rsc_trace(child, "%s on %s (%s)", op->uuid, op->node ? op->node->details->uname : "none", op->task); } } } if (check_runnable && any_runnable == FALSE) { pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); flags = crm_clear_bit(__FUNCTION__, __LINE__, action->rsc->id, flags, pe_action_runnable); if (node == NULL) { pe_clear_action_bit(action, pe_action_runnable); } } return flags; } static enum pe_graph_flags clone_update_actions_interleave(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { gboolean current = FALSE; resource_t *first_child = NULL; GListPtr gIter = then->rsc->children; enum pe_graph_flags changed = pe_graph_none; /*pe_graph_disable */ enum action_tasks task = clone_child_action(first); const char *first_task = task2text(task); /* Fix this - lazy */ if (crm_ends_with(first->uuid, "_stopped_0") || crm_ends_with(first->uuid, "_demoted_0")) { current = TRUE; } for (; gIter != NULL; gIter = gIter->next) { resource_t *then_child = (resource_t *) gIter->data; CRM_ASSERT(then_child != NULL); first_child = find_compatible_child(then_child, first->rsc, RSC_ROLE_UNKNOWN, current); if (first_child == NULL && current) { crm_trace("Ignore"); } else if (first_child == NULL) { crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid); /* Me no like this hack - but what else can we do? * * If there is no-one active or about to be active * on the same node as then_child, then they must * not be allowed to start */ if (type & (pe_order_runnable_left | pe_order_implies_then) /* Mandatory */ ) { pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id); if(assign_node(then_child, NULL, TRUE)) { changed |= pe_graph_updated_then; } } } else { action_t *first_action = NULL; action_t *then_action = NULL; pe_rsc_debug(then->rsc, "Pairing %s with %s", first_child->id, then_child->id); first_action = find_first_action(first_child->actions, NULL, first_task, node); then_action = find_first_action(then_child->actions, NULL, then->task, node); if (first_action == NULL) { if (is_not_set(first_child->flags, pe_rsc_orphan) && crm_str_eq(first_task, RSC_STOP, TRUE) == FALSE && crm_str_eq(first_task, RSC_DEMOTE, TRUE) == FALSE) { crm_err("Internal error: No action found for %s in %s (first)", first_task, first_child->id); } else { crm_trace("No action found for %s in %s%s (first)", first_task, first_child->id, is_set(first_child->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } continue; } /* We're only interested if 'then' is neither stopping nor being demoted */ if (then_action == NULL) { if (is_not_set(then_child->flags, pe_rsc_orphan) && crm_str_eq(then->task, RSC_STOP, TRUE) == FALSE && crm_str_eq(then->task, RSC_DEMOTE, TRUE) == FALSE) { crm_err("Internal error: No action found for %s in %s (then)", then->task, then_child->id); } else { crm_trace("No action found for %s in %s%s (then)", then->task, then_child->id, is_set(then_child->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } continue; } if (order_actions(first_action, then_action, type)) { crm_debug("Created constraint for %s -> %s", first_action->uuid, then_action->uuid); changed |= (pe_graph_updated_first | pe_graph_updated_then); } changed |= then_child->cmds->update_actions(first_action, then_action, node, first_child->cmds->action_flags(first_action, node), filter, type); } } return changed; } enum pe_graph_flags clone_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { const char *rsc = "none"; gboolean interleave = FALSE; enum pe_graph_flags changed = pe_graph_none; if (first->rsc != then->rsc && first->rsc && first->rsc->variant >= pe_clone && then->rsc && then->rsc->variant >= pe_clone) { clone_variant_data_t *clone_data = NULL; if (crm_ends_with(then->uuid, "_stop_0") || crm_ends_with(then->uuid, "_demote_0")) { get_clone_variant_data(clone_data, first->rsc); rsc = first->rsc->id; } else { get_clone_variant_data(clone_data, then->rsc); rsc = then->rsc->id; } interleave = clone_data->interleave; } crm_trace("Interleave %s -> %s: %s (based on %s)", first->uuid, then->uuid, interleave ? "yes" : "no", rsc); if (interleave) { changed = clone_update_actions_interleave(first, then, node, flags, filter, type); } else if (then->rsc) { GListPtr gIter = then->rsc->children; changed |= native_update_actions(first, then, node, flags, filter, type); for (; gIter != NULL; gIter = gIter->next) { enum pe_graph_flags child_changed = pe_graph_none; GListPtr lpc = NULL; resource_t *child = (resource_t *) gIter->data; action_t *child_action = find_first_action(child->actions, NULL, then->task, node); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (is_set(child_flags, pe_action_runnable)) { child_changed |= child->cmds->update_actions(first, child_action, node, flags, filter, type); } changed |= child_changed; if (child_changed & pe_graph_updated_then) { for (lpc = child_action->actions_after; lpc != NULL; lpc = lpc->next) { action_wrapper_t *other = (action_wrapper_t *) lpc->data; update_action(other->action); } } } } } return changed; } void clone_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) { GListPtr gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); } } void clone_expand(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; rsc->cmds->action_flags(op, NULL); } if (clone_data->start_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify); expand_notification_data(clone_data->start_notify, data_set); create_notifications(rsc, clone_data->start_notify, data_set); } if (clone_data->stop_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify); expand_notification_data(clone_data->stop_notify, data_set); create_notifications(rsc, clone_data->stop_notify, data_set); } if (clone_data->promote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify); expand_notification_data(clone_data->promote_notify, data_set); create_notifications(rsc, clone_data->promote_notify, data_set); } if (clone_data->demote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify); expand_notification_data(clone_data->demote_notify, data_set); create_notifications(rsc, clone_data->demote_notify, data_set); } /* Now that the notifcations have been created we can expand the children */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } native_expand(rsc, data_set); /* The notifications are in the graph now, we can destroy the notify_data */ free_notification_data(clone_data->demote_notify); clone_data->demote_notify = NULL; free_notification_data(clone_data->stop_notify); clone_data->stop_notify = NULL; free_notification_data(clone_data->start_notify); clone_data->start_notify = NULL; free_notification_data(clone_data->promote_notify); clone_data->promote_notify = NULL; } node_t * rsc_known_on(resource_t * rsc, GListPtr * list) { GListPtr gIter = NULL; node_t *one = NULL; GListPtr result = NULL; if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; rsc_known_on(child, &result); } } else if (rsc->known_on) { result = g_hash_table_get_values(rsc->known_on); } if (result && g_list_length(result) == 1) { one = g_list_nth_data(result, 0); } if (list) { GListPtr gIter = NULL; gIter = result; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) { *list = g_list_prepend(*list, node); } } } g_list_free(result); return one; } static resource_t * find_instance_on(resource_t * rsc, node_t * node) { GListPtr gIter = NULL; gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { GListPtr gIter2 = NULL; GListPtr known_list = NULL; resource_t *child = (resource_t *) gIter->data; rsc_known_on(child, &known_list); gIter2 = known_list; for (; gIter2 != NULL; gIter2 = gIter2->next) { node_t *known = (node_t *) gIter2->data; if (node->details == known->details) { g_list_free(known_list); return child; } } g_list_free(known_list); } return NULL; } gboolean clone_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean any_created = FALSE; clone_variant_data_t *clone_data = NULL; CRM_ASSERT(rsc); get_clone_variant_data(clone_data, rsc); rsc->children = g_list_sort(rsc->children, sort_rsc_id); if (rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return FALSE; } if (rsc->exclusive_discover) { node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (allowed && allowed->rsc_discover_mode != discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on * * remove the node from allowed_nodes so that the * notification contains only nodes that we might ever run * on */ g_hash_table_remove(rsc->allowed_nodes, node->details->id); /* Bit of a shortcut - might as well take it */ return FALSE; } } if (is_not_set(rsc->flags, pe_rsc_unique) && clone_data->clone_node_max == 1) { /* only look for one copy */ resource_t *child = NULL; /* Try whoever we probed last time */ child = find_instance_on(rsc, node); if (child) { return child->cmds->create_probe(child, node, complete, force, data_set); } /* Try whoever we plan on starting there */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { node_t *local_node = NULL; resource_t *child_rsc = (resource_t *) gIter->data; CRM_ASSERT(child_rsc); local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if (local_node == NULL) { continue; } if (local_node->details == node->details) { return child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set); } } /* Fall back to the first clone instance */ CRM_ASSERT(rsc->children); child = rsc->children->data; return child->cmds->create_probe(child, node, complete, force, data_set); } gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if (child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set)) { any_created = TRUE; } if (any_created && is_not_set(rsc->flags, pe_rsc_unique) && clone_data->clone_node_max == 1) { /* only look for one copy (clone :0) */ break; } } return any_created; } void clone_append_meta(resource_t * rsc, xmlNode * xml) { char *name = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_notify) ? "true" : "false"); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, clone_data->clone_max); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, clone_data->clone_node_max); free(name); } GHashTable * clone_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags); } diff --git a/pengine/container.c b/pengine/container.c index 17ef0dfe11..99113e7ebb 100644 --- a/pengine/container.c +++ b/pengine/container.c @@ -1,328 +1,346 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #define VARIANT_CONTAINER 1 #include static bool is_child_container_node(container_variant_data_t *data, pe_node_t *node) { for (GListPtr gIter = data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; if(node->details == tuple->node->details) { return TRUE; } } return FALSE; } +gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); +void distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes, + int max, int per_host_max, pe_working_set_t * data_set); + node_t * container_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { + GListPtr containers = NULL; + GListPtr nodes = NULL; container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return NULL); get_container_variant_data(container_data, rsc); + for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { + container_grouping_t *tuple = (container_grouping_t *)gIter->data; + containers = g_list_append(containers, tuple->docker); + } + + dump_node_scores(0, rsc, __FUNCTION__, rsc->allowed_nodes); + + nodes = g_hash_table_get_values(rsc->allowed_nodes); + nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL); + containers = g_list_sort_with_data(containers, sort_clone_instance, data_set); + distribute_children(rsc, containers, nodes, + container_data->replicas, container_data->replicas_per_host, data_set); + g_list_free(nodes); + g_list_free(containers); + for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); - if(tuple->docker) { - tuple->docker->cmds->allocate(tuple->docker, prefer, data_set); - } if(tuple->ip) { tuple->ip->cmds->allocate(tuple->ip, prefer, data_set); } if(tuple->remote) { tuple->remote->cmds->allocate(tuple->remote, prefer, data_set); } // Explicitly allocate tuple->child before the container->child if(tuple->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, tuple->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if(node->details != tuple->node->details) { node->weight = -INFINITY; } else { node->weight = INFINITY; } } set_bit(tuple->child->parent->flags, pe_rsc_allocating); tuple->child->cmds->allocate(tuple->child, tuple->node, data_set); clear_bit(tuple->child->parent->flags, pe_rsc_allocating); } } if(container_data->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, container_data->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if(is_child_container_node(container_data, node)) { node->weight = 0; } else { node->weight = -INFINITY; } } container_data->child->cmds->allocate(container_data->child, prefer, data_set); } return NULL; } void container_create_actions(resource_t * rsc, pe_working_set_t * data_set) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { tuple->ip->cmds->create_actions(tuple->ip, data_set); } if(tuple->docker) { tuple->docker->cmds->create_actions(tuple->docker, data_set); } if(tuple->remote) { tuple->remote->cmds->create_actions(tuple->remote, data_set); } } if(container_data->child) { container_data->child->cmds->create_actions(container_data->child, data_set); } } void container_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { char *id = NULL; container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->docker) { tuple->docker->cmds->internal_constraints(tuple->docker, data_set); } if(tuple->ip) { tuple->ip->cmds->internal_constraints(tuple->ip, data_set); // Start ip then docker new_rsc_order(tuple->ip, RSC_START, tuple->docker, RSC_START, pe_order_runnable_left, data_set); new_rsc_order(tuple->docker, RSC_STOP, tuple->ip, RSC_STOP, pe_order_implies_first, data_set); id = crm_strdup_printf("%s-ip-with-docker-%d", rsc->id, tuple->offset); rsc_colocation_new(id, NULL, INFINITY, tuple->ip, tuple->docker, NULL, NULL, data_set); free(id); } if(tuple->remote) { CRM_ASSERT(tuple->ip); tuple->remote->cmds->internal_constraints(tuple->remote, data_set); // Start docker then remote new_rsc_order( tuple->docker, RSC_START, tuple->remote, RSC_START, pe_order_runnable_left, data_set); new_rsc_order( tuple->remote, RSC_STOP, tuple->docker, RSC_STOP, pe_order_implies_first, data_set); id = crm_strdup_printf("%s-remote-with-ip-%d", rsc->id, tuple->offset); rsc_colocation_new(id, NULL, INFINITY, tuple->remote, tuple->ip, NULL, NULL, data_set); free(id); } if(tuple->child) { CRM_ASSERT(tuple->remote); // Start remote then child new_rsc_order( tuple->remote, RSC_START, tuple->child, RSC_START, pe_order_runnable_left, data_set); new_rsc_order( tuple->child, RSC_STOP, tuple->remote, RSC_STOP, pe_order_implies_first, data_set); } } if(container_data->child) { container_data->child->cmds->internal_constraints(container_data->child, data_set); } } void container_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { pe_err("Container %s cannot be colocated with anything", rsc_lh->id); } void container_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { pe_err("Container %s cannot be colocated with anything", rsc_rh->id); } enum pe_action_flags container_action_flags(action_t * action, node_t * node) { enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); return flags; } enum pe_graph_flags container_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { enum pe_graph_flags changed = pe_graph_none; return changed; } void container_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) { GListPtr gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); } } void container_expand(resource_t * rsc, pe_working_set_t * data_set) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { tuple->ip->cmds->expand(tuple->ip, data_set); } if(tuple->child) { tuple->child->cmds->expand(tuple->child, data_set); } if(tuple->docker) { tuple->docker->cmds->expand(tuple->docker, data_set); } if(tuple->remote) { tuple->remote->cmds->expand(tuple->remote, data_set); } } } gboolean container_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { bool any_created = FALSE; container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return FALSE); get_container_variant_data(container_data, rsc); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { any_created |= tuple->ip->cmds->create_probe(tuple->ip, node, complete, force, data_set); } if(tuple->child && node->details == tuple->node->details) { any_created |= tuple->child->cmds->create_probe(tuple->child, node, complete, force, data_set); } if(tuple->docker) { any_created |= tuple->docker->cmds->create_probe(tuple->docker, node, complete, force, data_set); } if(FALSE && tuple->remote) { // TODO: Needed? any_created |= tuple->remote->cmds->create_probe(tuple->remote, node, complete, force, data_set); } } return any_created; } void container_append_meta(resource_t * rsc, xmlNode * xml) { } GHashTable * container_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags); } void container_LogActions( resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { LogActions(tuple->ip, data_set, terminal); } if(tuple->docker) { LogActions(tuple->docker, data_set, terminal); } if(tuple->remote) { LogActions(tuple->remote, data_set, terminal); } if(tuple->child) { LogActions(tuple->child, data_set, terminal); } } } diff --git a/xml/resources-2.8.rng b/xml/resources-2.8.rng index 39fdd6b567..a40c931c50 100644 --- a/xml/resources-2.8.rng +++ b/xml/resources-2.8.rng @@ -1,295 +1,298 @@ + + + ([0-9\-]+) Stopped Started Slave Master nothing quorum fencing unfencing ignore block stop restart standby fence restart-container ocf lsb heartbeat stonith upstart service systemd nagios