diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c index 2839317b8c..edc9c3e550 100644 --- a/lib/cib/cib_attrs.c +++ b/lib/cib/cib_attrs.c @@ -1,664 +1,664 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static pcmk__output_t * new_output_object(const char *ty) { int rc = pcmk_rc_ok; pcmk__output_t *out = NULL; const char* argv[] = { "", NULL }; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_LOG, PCMK__SUPPORTED_FORMAT_TEXT, { NULL, NULL, NULL } }; pcmk__register_formats(NULL, formats); rc = pcmk__output_new(&out, ty, NULL, (char**)argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { crm_err("Can't out due to internal error: %s", pcmk_rc_str(rc)); return NULL; } return out; } static int find_attr(cib_t *cib, const char *section, const char *node_uuid, const char *attr_set_type, const char *set_name, const char *attr_id, const char *attr_name, const char *user_name, xmlNode **result) { int rc = pcmk_rc_ok; const char *xpath_base = NULL; GString *xpath = NULL; xmlNode *xml_search = NULL; const char *set_type = NULL; const char *node_type = NULL; if (attr_set_type) { set_type = attr_set_type; } else { set_type = PCMK_XE_INSTANCE_ATTRIBUTES; } if (pcmk__str_eq(section, PCMK_XE_CRM_CONFIG, pcmk__str_casei)) { node_uuid = NULL; set_type = PCMK_XE_CLUSTER_PROPERTY_SET; } else if (pcmk__strcase_any_of(section, PCMK_XE_OP_DEFAULTS, PCMK_XE_RSC_DEFAULTS, NULL)) { node_uuid = NULL; set_type = PCMK_XE_META_ATTRIBUTES; } else if (pcmk__str_eq(section, PCMK_XE_TICKETS, pcmk__str_casei)) { node_uuid = NULL; section = PCMK_XE_STATUS; node_type = PCMK_XE_TICKETS; } else if (node_uuid == NULL) { return EINVAL; } xpath_base = pcmk_cib_xpath_for(section); if (xpath_base == NULL) { crm_warn("%s CIB section not known", section); return ENOMSG; } xpath = g_string_sized_new(1024); g_string_append(xpath, xpath_base); if (pcmk__str_eq(node_type, PCMK_XE_TICKETS, pcmk__str_casei)) { pcmk__g_strcat(xpath, "//", node_type, NULL); } else if (node_uuid) { const char *node_type = PCMK_XE_NODE; if (pcmk__str_eq(section, PCMK_XE_STATUS, pcmk__str_casei)) { node_type = PCMK__XE_NODE_STATE; set_type = PCMK__XE_TRANSIENT_ATTRIBUTES; } pcmk__g_strcat(xpath, "//", node_type, "[@" PCMK_XA_ID "='", node_uuid, "']", NULL); } pcmk__g_strcat(xpath, "//", set_type, NULL); if (set_name) { pcmk__g_strcat(xpath, "[@" PCMK_XA_ID "='", set_name, "']", NULL); } g_string_append(xpath, "//nvpair"); if (attr_id && attr_name) { pcmk__g_strcat(xpath, "[@" PCMK_XA_ID "='", attr_id, "' " "and @" PCMK_XA_NAME "='", attr_name, "']", NULL); } else if (attr_id) { pcmk__g_strcat(xpath, "[@" PCMK_XA_ID "='", attr_id, "']", NULL); } else if (attr_name) { pcmk__g_strcat(xpath, "[@" PCMK_XA_NAME "='", attr_name, "']", NULL); } rc = cib_internal_op(cib, PCMK__CIB_REQUEST_QUERY, NULL, (const char *) xpath->str, NULL, &xml_search, cib_sync_call|cib_xpath, user_name); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { crm_trace("Query failed for attribute %s (section=%s, node=%s, set=%s, xpath=%s): %s", attr_name, section, pcmk__s(node_uuid, ""), pcmk__s(set_name, ""), (const char *) xpath->str, pcmk_rc_str(rc)); } else { crm_log_xml_debug(xml_search, "Match"); } g_string_free(xpath, TRUE); *result = xml_search; return rc; } static int handle_multiples(pcmk__output_t *out, xmlNode *search, const char *attr_name) { if ((search != NULL) && (search->children != NULL)) { pcmk__warn_multiple_name_matches(out, search, attr_name); return ENOTUNIQ; } else { return pcmk_rc_ok; } } int cib__update_node_attr(pcmk__output_t *out, cib_t *cib, int call_options, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, const char *attr_value, const char *user_name, const char *node_type) { const char *tag = NULL; int rc = pcmk_rc_ok; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_search = NULL; char *local_attr_id = NULL; char *local_set_name = NULL; CRM_CHECK((out != NULL) && (cib != NULL) && (section != NULL) && ((attr_id != NULL) || (attr_name != NULL)) && (attr_value != NULL), return EINVAL); rc = find_attr(cib, section, node_uuid, set_type, set_name, attr_id, attr_name, user_name, &xml_search); if (rc == pcmk_rc_ok) { if (handle_multiples(out, xml_search, attr_name) == ENOTUNIQ) { pcmk__xml_free(xml_search); return ENOTUNIQ; } else { local_attr_id = pcmk__xe_get_copy(xml_search, PCMK_XA_ID); attr_id = local_attr_id; pcmk__xml_free(xml_search); goto do_modify; } } else if (rc != ENXIO) { pcmk__xml_free(xml_search); return rc; /* } else if(attr_id == NULL) { */ /* return EINVAL; */ } else { pcmk__xml_free(xml_search); crm_trace("%s does not exist, create it", attr_name); if (pcmk__str_eq(section, PCMK_XE_TICKETS, pcmk__str_casei)) { node_uuid = NULL; section = PCMK_XE_STATUS; node_type = PCMK_XE_TICKETS; xml_top = pcmk__xe_create(xml_obj, PCMK_XE_STATUS); xml_obj = pcmk__xe_create(xml_top, PCMK_XE_TICKETS); } else if (pcmk__str_eq(section, PCMK_XE_NODES, pcmk__str_casei)) { if (node_uuid == NULL) { return EINVAL; } if (pcmk__str_eq(node_type, PCMK_VALUE_REMOTE, pcmk__str_casei)) { xml_top = pcmk__xe_create(xml_obj, PCMK_XE_NODES); xml_obj = pcmk__xe_create(xml_top, PCMK_XE_NODE); crm_xml_add(xml_obj, PCMK_XA_TYPE, PCMK_VALUE_REMOTE); crm_xml_add(xml_obj, PCMK_XA_ID, node_uuid); crm_xml_add(xml_obj, PCMK_XA_UNAME, node_uuid); } else { tag = PCMK_XE_NODE; } } else if (pcmk__str_eq(section, PCMK_XE_STATUS, pcmk__str_casei)) { tag = PCMK__XE_TRANSIENT_ATTRIBUTES; if (node_uuid == NULL) { return EINVAL; } xml_top = pcmk__xe_create(xml_obj, PCMK__XE_NODE_STATE); crm_xml_add(xml_top, PCMK_XA_ID, node_uuid); xml_obj = xml_top; } else { tag = section; node_uuid = NULL; } if (set_name == NULL) { if (pcmk__str_eq(section, PCMK_XE_CRM_CONFIG, pcmk__str_casei)) { local_set_name = pcmk__str_copy(PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS); } else if (pcmk__str_eq(node_type, PCMK_XE_TICKETS, pcmk__str_casei)) { local_set_name = crm_strdup_printf("%s-%s", section, PCMK_XE_TICKETS); } else if (node_uuid) { local_set_name = crm_strdup_printf("%s-%s", section, node_uuid); if (set_type) { char *tmp_set_name = local_set_name; local_set_name = crm_strdup_printf("%s-%s", tmp_set_name, set_type); free(tmp_set_name); } } else { local_set_name = crm_strdup_printf("%s-options", section); } set_name = local_set_name; } if (attr_id == NULL) { local_attr_id = crm_strdup_printf("%s-%s", set_name, attr_name); pcmk__xml_sanitize_id(local_attr_id); attr_id = local_attr_id; } else if (attr_name == NULL) { attr_name = attr_id; } crm_trace("Creating %s/%s", section, tag); if (tag != NULL) { xml_obj = pcmk__xe_create(xml_obj, tag); crm_xml_add(xml_obj, PCMK_XA_ID, node_uuid); if (xml_top == NULL) { xml_top = xml_obj; } } if ((node_uuid == NULL) && !pcmk__str_eq(node_type, PCMK_XE_TICKETS, pcmk__str_casei)) { if (pcmk__str_eq(section, PCMK_XE_CRM_CONFIG, pcmk__str_casei)) { xml_obj = pcmk__xe_create(xml_obj, PCMK_XE_CLUSTER_PROPERTY_SET); } else { xml_obj = pcmk__xe_create(xml_obj, PCMK_XE_META_ATTRIBUTES); } } else if (set_type) { xml_obj = pcmk__xe_create(xml_obj, set_type); } else { xml_obj = pcmk__xe_create(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES); } crm_xml_add(xml_obj, PCMK_XA_ID, set_name); if (xml_top == NULL) { xml_top = xml_obj; } } do_modify: xml_obj = crm_create_nvpair_xml(xml_obj, attr_id, attr_name, attr_value); if (xml_top == NULL) { xml_top = xml_obj; } crm_log_xml_trace(xml_top, "update_attr"); rc = cib_internal_op(cib, PCMK__CIB_REQUEST_MODIFY, NULL, section, xml_top, NULL, call_options, user_name); if (!pcmk_is_set(call_options, cib_sync_call) && (cib->variant != cib_file) && (rc >= 0)) { // For async call, positive rc is the call ID (file always synchronous) rc = pcmk_rc_ok; } else { rc = pcmk_legacy2rc(rc); } if (rc != pcmk_rc_ok) { out->err(out, "Error setting %s=%s (section=%s, set=%s): %s", attr_name, attr_value, section, pcmk__s(set_name, ""), pcmk_rc_str(rc)); crm_log_xml_info(xml_top, "Update"); } free(local_set_name); free(local_attr_id); pcmk__xml_free(xml_top); return rc; } int cib__get_node_attrs(pcmk__output_t *out, cib_t *cib, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, const char *user_name, xmlNode **result) { int rc = pcmk_rc_ok; pcmk__assert(result != NULL); CRM_CHECK(section != NULL, return EINVAL); *result = NULL; rc = find_attr(cib, section, node_uuid, set_type, set_name, attr_id, attr_name, user_name, result); if (rc != pcmk_rc_ok) { crm_trace("Query failed for attribute %s (section=%s node=%s set=%s): %s", pcmk__s(attr_name, "with unspecified name"), section, pcmk__s(set_name, ""), pcmk__s(node_uuid, ""), pcmk_rc_str(rc)); } return rc; } int cib__delete_node_attr(pcmk__output_t *out, cib_t *cib, int options, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, const char *attr_value, const char *user_name) { int rc = pcmk_rc_ok; xmlNode *xml_obj = NULL; xmlNode *xml_search = NULL; char *local_attr_id = NULL; CRM_CHECK(section != NULL, return EINVAL); CRM_CHECK(attr_name != NULL || attr_id != NULL, return EINVAL); if (attr_id == NULL) { rc = find_attr(cib, section, node_uuid, set_type, set_name, attr_id, attr_name, user_name, &xml_search); if (rc != pcmk_rc_ok || handle_multiples(out, xml_search, attr_name) == ENOTUNIQ) { pcmk__xml_free(xml_search); return rc; } else { local_attr_id = pcmk__xe_get_copy(xml_search, PCMK_XA_ID); attr_id = local_attr_id; pcmk__xml_free(xml_search); } } xml_obj = crm_create_nvpair_xml(NULL, attr_id, attr_name, attr_value); rc = cib_internal_op(cib, PCMK__CIB_REQUEST_DELETE, NULL, section, xml_obj, NULL, options, user_name); if (!pcmk_is_set(options, cib_sync_call) && (cib->variant != cib_file) && (rc >= 0)) { // For async call, positive rc is the call ID (file always synchronous) rc = pcmk_rc_ok; } else { rc = pcmk_legacy2rc(rc); } if (rc == pcmk_rc_ok) { out->info(out, "Deleted %s %s: id=%s%s%s%s%s", section, node_uuid ? "attribute" : "option", local_attr_id, set_name ? " set=" : "", set_name ? set_name : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free(local_attr_id); pcmk__xml_free(xml_obj); return rc; } int find_nvpair_attr_delegate(cib_t *cib, const char *attr, const char *section, const char *node_uuid, const char *attr_set_type, const char *set_name, const char *attr_id, const char *attr_name, gboolean to_console, char **value, const char *user_name) { pcmk__output_t *out = NULL; xmlNode *xml_search = NULL; int rc = pcmk_ok; out = new_output_object(to_console ? "text" : "log"); if (out == NULL) { return pcmk_err_generic; } rc = find_attr(cib, section, node_uuid, attr_set_type, set_name, attr_id, attr_name, user_name, &xml_search); if (rc == pcmk_rc_ok) { rc = handle_multiples(out, xml_search, attr_name); if (rc == pcmk_rc_ok) { pcmk__str_update(value, pcmk__xe_get(xml_search, attr)); } } out->finish(out, CRM_EX_OK, true, NULL); pcmk__xml_free(xml_search); pcmk__output_free(out); return pcmk_rc2legacy(rc); } int update_attr_delegate(cib_t *cib, int call_options, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, const char *attr_value, gboolean to_console, const char *user_name, const char *node_type) { pcmk__output_t *out = NULL; int rc = pcmk_ok; out = new_output_object(to_console ? "text" : "log"); if (out == NULL) { return pcmk_err_generic; } rc = cib__update_node_attr(out, cib, call_options, section, node_uuid, set_type, set_name, attr_id, attr_name, attr_value, user_name, node_type); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); return pcmk_rc2legacy(rc); } int read_attr_delegate(cib_t *cib, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, char **attr_value, gboolean to_console, const char *user_name) { pcmk__output_t *out = NULL; xmlNode *result = NULL; int rc = pcmk_ok; out = new_output_object(to_console ? "text" : "log"); if (out == NULL) { return pcmk_err_generic; } rc = cib__get_node_attrs(out, cib, section, node_uuid, set_type, set_name, attr_id, attr_name, user_name, &result); if (rc == pcmk_rc_ok) { if (result->children == NULL) { pcmk__str_update(attr_value, pcmk__xe_get(result, PCMK_XA_VALUE)); } else { rc = ENOTUNIQ; } } out->finish(out, CRM_EX_OK, true, NULL); pcmk__xml_free(result); pcmk__output_free(out); return pcmk_rc2legacy(rc); } int delete_attr_delegate(cib_t *cib, int options, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, const char *attr_value, gboolean to_console, const char *user_name) { pcmk__output_t *out = NULL; int rc = pcmk_ok; out = new_output_object(to_console ? "text" : "log"); if (out == NULL) { return pcmk_err_generic; } rc = cib__delete_node_attr(out, cib, options, section, node_uuid, set_type, set_name, attr_id, attr_name, attr_value, user_name); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); return pcmk_rc2legacy(rc); } /*! * \internal * \brief Parse node UUID from search result * * \param[in] result XML search result * \param[out] uuid If non-NULL, where to store parsed UUID * \param[out] is_remote If non-NULL, set TRUE if result is remote node * * \return pcmk_ok if UUID was successfully parsed, -ENXIO otherwise */ static int get_uuid_from_result(const xmlNode *result, char **uuid, int *is_remote) { int rc = -ENXIO; const char *parsed_uuid = NULL; int parsed_is_remote = FALSE; if (result == NULL) { return rc; } /* If there are multiple results, the first is sufficient */ if (pcmk__xe_is(result, PCMK__XE_XPATH_QUERY)) { result = pcmk__xe_first_child(result, NULL, NULL, NULL); CRM_CHECK(result != NULL, return rc); } if (pcmk__xe_is(result, PCMK_XE_NODE)) { // Result is PCMK_XE_NODE element from PCMK_XE_NODES section if (pcmk__str_eq(pcmk__xe_get(result, PCMK_XA_TYPE), PCMK_VALUE_REMOTE, pcmk__str_casei)) { parsed_uuid = pcmk__xe_get(result, PCMK_XA_UNAME); parsed_is_remote = TRUE; } else { parsed_uuid = pcmk__xe_id(result); parsed_is_remote = FALSE; } } else if (pcmk__xe_is(result, PCMK_XE_PRIMITIVE)) { /* Result is for ocf:pacemaker:remote resource */ parsed_uuid = pcmk__xe_id(result); parsed_is_remote = TRUE; } else if (pcmk__xe_is(result, PCMK_XE_NVPAIR)) { /* Result is PCMK_META_REMOTE_NODE parameter of for guest * node */ parsed_uuid = pcmk__xe_get(result, PCMK_XA_VALUE); parsed_is_remote = TRUE; } else if (pcmk__xe_is(result, PCMK__XE_NODE_STATE)) { // Result is PCMK__XE_NODE_STATE element from PCMK_XE_STATUS section parsed_uuid = pcmk__xe_get(result, PCMK_XA_UNAME); if (pcmk__xe_attr_is_true(result, PCMK_XA_REMOTE_NODE)) { parsed_is_remote = TRUE; } } if (parsed_uuid) { if (uuid) { *uuid = strdup(parsed_uuid); } if (is_remote) { *is_remote = parsed_is_remote; } rc = pcmk_ok; } return rc; } /* Search string to find a node by name, as: * - cluster or remote node in nodes section * - remote node in resources section * - guest node in resources section - * - orphaned remote node or bundle guest node in status section + * - removed remote node or bundle guest node in status section */ #define XPATH_UPPER_TRANS "ABCDEFGHIJKLMNOPQRSTUVWXYZ" #define XPATH_LOWER_TRANS "abcdefghijklmnopqrstuvwxyz" #define XPATH_NODE \ "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_NODES \ "/" PCMK_XE_NODE "[translate(@" PCMK_XA_UNAME ",'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \ "/" PCMK_XE_PRIMITIVE \ "[@class='ocf'][@provider='pacemaker'][@type='remote'][translate(@id,'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \ "/" PCMK_XE_PRIMITIVE "/" PCMK_XE_META_ATTRIBUTES "/" PCMK_XE_NVPAIR \ "[@name='" PCMK_META_REMOTE_NODE "'][translate(@value,'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" PCMK_XE_CIB "/" PCMK_XE_STATUS "/" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_REMOTE_NODE "='true'][translate(@" PCMK_XA_ID ",'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" int query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_node) { int rc = pcmk_ok; char *xpath_string; xmlNode *xml_search = NULL; char *host_lowercase = NULL; pcmk__assert(uname != NULL); host_lowercase = g_ascii_strdown(uname, -1); if (uuid) { *uuid = NULL; } if (is_remote_node) { *is_remote_node = FALSE; } xpath_string = crm_strdup_printf(XPATH_NODE, host_lowercase, host_lowercase, host_lowercase, host_lowercase); if (cib_internal_op(the_cib, PCMK__CIB_REQUEST_QUERY, NULL, xpath_string, NULL, &xml_search, cib_sync_call|cib_xpath, NULL) == pcmk_ok) { rc = get_uuid_from_result(xml_search, uuid, is_remote_node); } else { rc = -ENXIO; } free(xpath_string); pcmk__xml_free(xml_search); g_free(host_lowercase); if (rc != pcmk_ok) { crm_debug("Could not map node name '%s' to a UUID: %s", uname, pcmk_strerror(rc)); } else { crm_info("Mapped node name '%s' to UUID %s", uname, (uuid? *uuid : "")); } return rc; } diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index d293683ccf..61caeb2dc6 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,1108 +1,1108 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pcmk__action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pcmk__action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pcmk__action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->priv->name) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = pcmk__xe_create(xml, PCMK_XE_NODE); crm_xml_add(node_xml, PCMK_XA_ID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pcmk_node_t *node, void *xml) { add_node_to_xml_by_id(node->priv->id, (xmlNode *) xml); } /*! * \internal * \brief Count (optionally add to XML) nodes needing maintenance state update * * \param[in,out] xml Parent XML tag to add to, if any * \param[in] scheduler Scheduler data * * \return Count of nodes added * \note Only Pacemaker Remote nodes are considered currently */ static int add_maintenance_nodes(xmlNode *xml, const pcmk_scheduler_t *scheduler) { xmlNode *maintenance = NULL; int count = 0; if (xml != NULL) { maintenance = pcmk__xe_create(xml, PCMK__XE_MAINTENANCE); } for (const GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { const pcmk_node_t *node = iter->data; if (!pcmk__is_pacemaker_remote_node(node)) { continue; } if ((node->details->maintenance && !pcmk_is_set(node->priv->flags, pcmk__node_remote_maint)) || (!node->details->maintenance && pcmk_is_set(node->priv->flags, pcmk__node_remote_maint))) { if (maintenance != NULL) { crm_xml_add(add_node_to_xml_by_id(node->priv->id, maintenance), PCMK__XA_NODE_IN_MAINTENANCE, (node->details->maintenance? "1" : "0")); } count++; } } crm_trace("%s %d nodes in need of maintenance mode update in state", ((maintenance == NULL)? "Counted" : "Added"), count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] scheduler Scheduler data */ static void add_maintenance_update(pcmk_scheduler_t *scheduler) { pcmk_action_t *action = NULL; if (add_maintenance_nodes(NULL, scheduler) != 0) { action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, scheduler); pcmk__set_action_flags(action, pcmk__action_always_in_graph); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes */ static void add_downed_nodes(xmlNode *xml, const pcmk_action_t *action) { CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL), return); if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* Shutdown makes the action's node down */ xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->node->priv->id, downed); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->node->priv->id, downed); pe_foreach_guest_node(action->node->priv->scheduler, action->node, add_node_to_xml, downed); } } else if ((action->rsc != NULL) && pcmk_is_set(action->rsc->flags, pcmk__rsc_is_remote_connection) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pcmk_action_t *input; bool migrating = false; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pcmk__related_action_t *) iter->data)->action; if ((input->rsc != NULL) && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none) && pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { migrating = true; break; } } if (!migrating) { xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } /*! * \internal * \brief Create a transition graph operation key for a clone action * * \param[in] action Clone action * \param[in] interval_ms Action interval in milliseconds * * \return Newly allocated string with transition graph operation key */ static char * clone_op_key(const pcmk_action_t *action, guint interval_ms) { if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); return pcmk__notify_key(action->rsc->priv->history_id, n_type, n_task); } return pcmk__op_key(action->rsc->priv->history_id, pcmk__s(action->cancel_task, action->task), interval_ms); } /*! * \internal * \brief Add node details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] xml Transition graph action XML for \p action */ static void add_node_details(const pcmk_action_t *action, xmlNode *xml) { pcmk_node_t *router_node = pcmk__connection_host_for_action(action); crm_xml_add(xml, PCMK__META_ON_NODE, action->node->priv->name); crm_xml_add(xml, PCMK__META_ON_NODE_UUID, action->node->priv->id); if (router_node != NULL) { crm_xml_add(xml, PCMK__XA_ROUTER_NODE, router_node->priv->name); } } /*! * \internal * \brief Add resource details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_resource_details(const pcmk_action_t *action, xmlNode *action_xml) { xmlNode *rsc_xml = NULL; const char *attr_list[] = { PCMK_XA_CLASS, PCMK_XA_PROVIDER, PCMK_XA_TYPE, }; /* If a resource is locked to a node via PCMK_OPT_SHUTDOWN_LOCK, mark its * actions so the controller can preserve the lock when the action * completes. */ if (pcmk__action_locks_rsc_to_node(action)) { crm_xml_add_ll(action_xml, PCMK_OPT_SHUTDOWN_LOCK, (long long) action->rsc->priv->lock_time); } // List affected resource rsc_xml = pcmk__xe_create(action_xml, (const char *) action->rsc->priv->xml->name); if (pcmk_is_set(action->rsc->flags, pcmk__rsc_removed) && (action->rsc->priv->history_id != NULL)) { /* Use the numbered instance name here, because if there is more * than one instance on a node, we need to make sure the command * goes to the right one. * * This is important even for anonymous clones, because the clone's * unique meta-attribute might have just been toggled from on to * off. */ - crm_debug("Using orphan clone name %s instead of history ID %s", + crm_debug("Using \"removed\" clone name %s instead of history ID %s", action->rsc->id, action->rsc->priv->history_id); crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->priv->history_id); crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id); } else if (!pcmk_is_set(action->rsc->flags, pcmk__rsc_unique)) { const char *xml_id = pcmk__xe_id(action->rsc->priv->xml); crm_debug("Using anonymous clone name %s for %s (aka %s)", xml_id, action->rsc->id, action->rsc->priv->history_id); /* ID is what we'd like client to use * LONG_ID is what they might know it as instead * * LONG_ID is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). (@TODO The effect of removing this on * regression tests suggests that it is still needed for unique clones) * * If anyone toggles the unique flag to 'on', the - * 'instance free' name will correspond to an orphan + * 'instance free' name will correspond to a removed resource * and fall into the clause above instead */ crm_xml_add(rsc_xml, PCMK_XA_ID, xml_id); if ((action->rsc->priv->history_id != NULL) && !pcmk__str_eq(xml_id, action->rsc->priv->history_id, pcmk__str_none)) { crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->priv->history_id); } else { crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id); } } else { pcmk__assert(action->rsc->priv->history_id == NULL); crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->id); } for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->priv->meta, attr_list[lpc])); } } /*! * \internal * \brief Add action attributes to transition graph action XML * * \param[in,out] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_action_attributes(pcmk_action_t *action, xmlNode *action_xml) { xmlNode *args_xml = NULL; pcmk_resource_t *rsc = action->rsc; /* We create free-standing XML to start, so we can sort the attributes * before adding it to action_xml, which keeps the scheduler regression * test graphs comparable. */ args_xml = pcmk__xe_create(action_xml, PCMK__XE_ATTRIBUTES); crm_xml_add(args_xml, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if ((rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node GHashTable *params = pe_rsc_params(rsc, action->node, rsc->priv->scheduler); pcmk__substitute_remote_addr(rsc, params); g_hash_table_foreach(params, hash2smartfield, args_xml); } else if ((rsc != NULL) && (rsc->priv->variant <= pcmk__rsc_variant_primitive)) { GHashTable *params = pe_rsc_params(rsc, NULL, rsc->priv->scheduler); g_hash_table_foreach(params, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (rsc != NULL) { pcmk_resource_t *parent = rsc; while (parent != NULL) { parent->priv->cmds->add_graph_meta(parent, args_xml); parent = parent->priv->parent; } pcmk__add_guest_meta_to_xml(args_xml, action); } pcmk__xe_sort_attrs(args_xml); } /*! * \internal * \brief Create the transition graph XML for a scheduled action * * \param[in,out] parent Parent XML element to add action to * \param[in,out] action Scheduled action * \param[in] skip_details If false, add action details as sub-elements * \param[in] scheduler Scheduler data */ static void create_graph_action(xmlNode *parent, pcmk_action_t *action, bool skip_details, const pcmk_scheduler_t *scheduler) { bool needs_node_info = true; bool needs_maintenance_info = false; xmlNode *action_xml = NULL; if ((action == NULL) || (scheduler == NULL)) { return; } // Create the top-level element based on task if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* All fences need node info; guest node fences are pseudo-events */ if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT); } else { action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); } } else if (pcmk__str_any_of(action->task, PCMK_ACTION_DO_SHUTDOWN, PCMK_ACTION_CLEAR_FAILCOUNT, NULL)) { action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); } else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { // CIB-only clean-up for shutdown locks action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); crm_xml_add(action_xml, PCMK__XA_MODE, PCMK__VALUE_CIB); } else if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_none)) { needs_maintenance_info = true; } action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT); needs_node_info = false; } else { action_xml = pcmk__xe_create(parent, PCMK__XE_RSC_OP); } crm_xml_add_int(action_xml, PCMK_XA_ID, action->id); crm_xml_add(action_xml, PCMK_XA_OPERATION, action->task); if ((action->rsc != NULL) && (action->rsc->priv->history_id != NULL)) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } clone_key = clone_op_key(action, interval_ms); crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, clone_key); crm_xml_add(action_xml, "internal_" PCMK__XA_OPERATION_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, action->uuid); } if (needs_node_info && (action->node != NULL)) { add_node_details(action, action_xml); pcmk__insert_dup(action->meta, PCMK__META_ON_NODE, action->node->priv->name); pcmk__insert_dup(action->meta, PCMK__META_ON_NODE_UUID, action->node->priv->id); } if (skip_details) { return; } if ((action->rsc != NULL) && !pcmk_is_set(action->flags, pcmk__action_pseudo)) { // This is a real resource action, so add resource details add_resource_details(action, action_xml); } /* List any attributes in effect */ add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, scheduler); } } /*! * \internal * \brief Check whether an action should be added to the transition graph * * \param[in,out] action Action to check * * \return true if action should be added to graph, otherwise false */ static bool should_add_action_to_graph(pcmk_action_t *action) { if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } if (pcmk_is_set(action->flags, pcmk__action_optional) && !pcmk_is_set(action->flags, pcmk__action_always_in_graph)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; } /* Actions for unmanaged resources should be excluded from the graph, * with the exception of monitors and cancellation of recurring monitors. */ if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pcmk__rsc_managed) && !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { const char *interval_ms_s; /* A cancellation of a recurring monitor will get here because the task * is cancel rather than monitor, but the interval can still be used to * recognize it. The interval has been normalized to milliseconds by * this point, so a string comparison is sufficient. */ interval_ms_s = g_hash_table_lookup(action->meta, PCMK_META_INTERVAL); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } /* Always add pseudo-actions, fence actions, and shutdown actions (already * determined to be required and runnable by this point) */ if (pcmk_is_set(action->flags, pcmk__action_pseudo) || pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH, PCMK_ACTION_DO_SHUTDOWN, NULL)) { return true; } if (action->node == NULL) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was not assigned to a node (bug?)", action->uuid, action->id); pcmk__log_action("Unassigned", action, false); return false; } if (pcmk_is_set(action->flags, pcmk__action_on_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, pcmk__node_name(action->node)); } else if (pcmk__is_guest_or_bundle_node(action->node) && !pcmk_is_set(action->node->priv->flags, pcmk__node_remote_reset)) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest %s", action->uuid, action->id, pcmk__node_name(action->node)); } else if (!action->node->details->online) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); pcmk__log_action("Offline node", action, false); return false; } else if (action->node->details->unclean) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); pcmk__log_action("Unclean node", action, false); return false; } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(const pcmk__related_action_t *ordering) { return pcmk_any_flags_set(ordering->flags, ~(pcmk__ar_then_implies_first_graphed |pcmk__ar_first_implies_then_graphed |pcmk__ar_ordered)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool should_add_input_to_graph(const pcmk_action_t *action, pcmk__related_action_t *input) { if (input->graphed) { return true; } if (input->flags == pcmk__ar_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pcmk__action_runnable) && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pcmk__action_runnable) && pcmk_is_set(input->flags, pcmk__ar_min_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "minimum number of instances required but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->flags, pcmk__ar_unmigratable_then_blocks) && !pcmk_is_set(input->action->flags, pcmk__action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input blocked if 'then' unmigratable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->flags, pcmk__ar_if_first_unmigratable) && pcmk_is_set(input->action->flags, pcmk__action_migratable)) { crm_trace("Ignoring %s (%d) input %s (%d): ordering applies " "only if input is unmigratable, but it is migratable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->flags == pcmk__ar_ordered) && pcmk_is_set(input->action->flags, pcmk__action_migratable) && g_str_has_suffix(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->flags == pcmk__ar_if_on_same_node_or_target) { pcmk_node_t *input_node = input->action->node; if ((action->rsc != NULL) && pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { pcmk_node_t *assigned = action->rsc->priv->assigned_node; /* For load_stopped -> migrate_to orderings, we care about where * the resource has been assigned, not where migrate_to will be * executed. */ if (!pcmk__same_node(input_node, assigned)) { crm_trace("Ignoring %s (%d) input %s (%d): " "migration target %s is not same as input node %s", action->uuid, action->id, input->action->uuid, input->action->id, (assigned? assigned->priv->name : ""), (input_node? input_node->priv->name : "")); input->flags = pcmk__ar_none; return false; } } else if (!pcmk__same_node(input_node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "not on same node (%s vs %s)", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->priv->name : ""), (input_node? input_node->priv->name : "")); input->flags = pcmk__ar_none; return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering optional", action->uuid, action->id, input->action->uuid, input->action->id); input->flags = pcmk__ar_none; return false; } } else if (input->flags == pcmk__ar_if_required_on_same_node) { if (input->action->node && action->node && !pcmk__same_node(input->action->node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "not on same node (%s vs %s)", action->uuid, action->id, input->action->uuid, input->action->id, pcmk__node_name(action->node), pcmk__node_name(input->action->node)); input->flags = pcmk__ar_none; return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): optional", action->uuid, action->id, input->action->uuid, input->action->id); input->flags = pcmk__ar_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk_is_set(input->action->rsc->flags, pcmk__rsc_failed) && !pcmk_is_set(input->action->rsc->flags, pcmk__rsc_managed) && g_str_has_suffix(input->action->uuid, "_stop_0") && pcmk__is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional) && !pcmk_any_flags_set(input->action->flags, pcmk__action_always_in_graph |pcmk__action_added_to_graph) && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->flags); return true; } /*! * \internal * \brief Check whether an ordering creates an ordering loop * * \param[in] init_action "First" action in ordering * \param[in] action Callers should always set this the same as * \p init_action (this function may use a different * value for recursive calls) * \param[in,out] input Action wrapper for "then" action in ordering * * \return true if the ordering creates a loop, otherwise false */ bool pcmk__graph_has_loop(const pcmk_action_t *init_action, const pcmk_action_t *action, pcmk__related_action_t *input) { bool has_loop = false; if (pcmk_is_set(input->action->flags, pcmk__action_detect_loop)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->priv->name : "", action->uuid, action->node? action->node->priv->name : "", input->flags); return false; } // Don't need to check inputs that won't be used if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->priv->name : "", init_action->uuid, init_action->node? init_action->node->priv->name : ""); return true; } pcmk__set_action_flags(input->action, pcmk__action_detect_loop); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->priv->name : "", input->action->uuid, input->action->node? input->action->node->priv->name : "", input->flags, init_action->uuid, init_action->node? init_action->node->priv->name : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pcmk__related_action_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; break; } } pcmk__clear_action_flags(input->action, pcmk__action_detect_loop); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->priv->name : "", action->uuid, action->node? action->node->priv->name : "", input->flags); } return has_loop; } /*! * \internal * \brief Create a synapse XML element for a transition graph * * \param[in] action Action that synapse is for * \param[in,out] scheduler Scheduler data containing graph * * \return Newly added XML element for new graph synapse */ static xmlNode * create_graph_synapse(const pcmk_action_t *action, pcmk_scheduler_t *scheduler) { int synapse_priority = 0; xmlNode *syn = pcmk__xe_create(scheduler->priv->graph, PCMK__XE_SYNAPSE); crm_xml_add_int(syn, PCMK_XA_ID, scheduler->priv->synapse_count++); if (action->rsc != NULL) { synapse_priority = action->rsc->priv->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, PCMK__XA_PRIORITY, synapse_priority); } return syn; } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in,out] data Action to possibly add * \param[in,out] user_data Scheduler data * * \note This will de-duplicate the action inputs, meaning that the * pcmk__related_action_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before * pcmk__create_graph().) */ static void add_action_to_graph(gpointer data, gpointer user_data) { pcmk_action_t *action = (pcmk_action_t *) data; pcmk_scheduler_t *scheduler = (pcmk_scheduler_t *) user_data; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; /* If we haven't already, de-duplicate inputs (even if we won't be adding * the action to the graph, so that crm_simulate's dot graphs don't have * duplicates). */ if (!pcmk_is_set(action->flags, pcmk__action_inputs_deduplicated)) { pcmk__deduplicate_action_inputs(action); pcmk__set_action_flags(action, pcmk__action_inputs_deduplicated); } if (pcmk_is_set(action->flags, pcmk__action_added_to_graph) || !should_add_action_to_graph(action)) { return; // Already added, or shouldn't be } pcmk__set_action_flags(action, pcmk__action_added_to_graph); crm_trace("Adding action %d (%s%s%s) to graph", action->id, action->uuid, ((action->node == NULL)? "" : " on "), ((action->node == NULL)? "" : action->node->priv->name)); syn = create_graph_synapse(action, scheduler); set = pcmk__xe_create(syn, PCMK__XE_ACTION_SET); in = pcmk__xe_create(syn, PCMK__XE_INPUTS); create_graph_action(set, action, false, scheduler); for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *input = lpc->data; if (should_add_input_to_graph(action, input)) { xmlNode *input_xml = pcmk__xe_create(in, PCMK__XE_TRIGGER); input->graphed = true; create_graph_action(input_xml, input->action, true, scheduler); } } } static int transition_id = 0; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] scheduler Scheduler data * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const pcmk_scheduler_t *scheduler, const char *filename) { if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_error) || pcmk__config_has_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_warning) || pcmk__config_has_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (pcmk__config_has_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__add_rsc_actions_to_graph(pcmk_resource_t *rsc) { GList *iter = NULL; pcmk__assert(rsc != NULL); pcmk__rsc_trace(rsc, "Adding actions for %s to graph", rsc->id); // First add the resource's own actions g_list_foreach(rsc->priv->actions, add_action_to_graph, rsc->priv->scheduler); // Then recursively add its children's actions (appropriate to variant) for (iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data; child_rsc->priv->cmds->add_actions_to_graph(child_rsc); } } /*! * \internal * \brief Create a transition graph with all cluster actions needed * * \param[in,out] scheduler Scheduler data */ void pcmk__create_graph(pcmk_scheduler_t *scheduler) { GList *iter = NULL; const char *value = NULL; long long limit = 0LL; GHashTable *config_hash = scheduler->priv->options; int rc = pcmk_rc_ok; transition_id++; crm_trace("Creating transition graph %d", transition_id); scheduler->priv->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH); value = pcmk__cluster_option(config_hash, PCMK_OPT_CLUSTER_DELAY); crm_xml_add(scheduler->priv->graph, PCMK_OPT_CLUSTER_DELAY, value); value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT); crm_xml_add(scheduler->priv->graph, PCMK_OPT_STONITH_TIMEOUT, value); crm_xml_add(scheduler->priv->graph, PCMK__XA_FAILED_STOP_OFFSET, PCMK_VALUE_INFINITY); if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) { crm_xml_add(scheduler->priv->graph, PCMK__XA_FAILED_START_OFFSET, PCMK_VALUE_INFINITY); } else { crm_xml_add(scheduler->priv->graph, PCMK__XA_FAILED_START_OFFSET, "1"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_BATCH_LIMIT); crm_xml_add(scheduler->priv->graph, PCMK_OPT_BATCH_LIMIT, value); crm_xml_add_int(scheduler->priv->graph, "transition_id", transition_id); value = pcmk__cluster_option(config_hash, PCMK_OPT_MIGRATION_LIMIT); rc = pcmk__scan_ll(value, &limit, 0LL); if (rc != pcmk_rc_ok) { crm_warn("Ignoring invalid value '%s' for " PCMK_OPT_MIGRATION_LIMIT ": %s", value, pcmk_rc_str(rc)); } else if (limit > 0) { crm_xml_add(scheduler->priv->graph, PCMK_OPT_MIGRATION_LIMIT, value); } if (scheduler->priv->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (unsigned long long) scheduler->priv->recheck_by); crm_xml_add(scheduler->priv->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ // Add resource actions to graph for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; pcmk__rsc_trace(rsc, "Processing actions for %s", rsc->id); rsc->priv->cmds->add_actions_to_graph(rsc); } // Add pseudo-action for list of nodes with maintenance state update add_maintenance_update(scheduler); // Add non-resource (node) actions for (iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; if ((action->rsc != NULL) && (action->node != NULL) && action->node->details->shutdown && !pcmk_is_set(action->rsc->flags, pcmk__rsc_maintenance) && !pcmk_any_flags_set(action->flags, pcmk__action_optional|pcmk__action_runnable) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Eventually we should just ignore the 'fence' case, but for now * it's the best way to detect (in CTS) when CIB resource updates * are being lost. */ if (pcmk_is_set(scheduler->flags, pcmk__sched_quorate) || (scheduler->no_quorum_policy == pcmk_no_quorum_ignore)) { const bool managed = pcmk_is_set(action->rsc->flags, pcmk__rsc_managed); const bool failed = pcmk_is_set(action->rsc->flags, pcmk__rsc_failed); crm_crit("Cannot %s %s because of %s:%s%s (%s)", action->node->details->unclean? "fence" : "shut down", pcmk__node_name(action->node), action->rsc->id, (managed? " blocked" : " unmanaged"), (failed? " failed" : ""), action->uuid); } } add_action_to_graph((gpointer) action, (gpointer) scheduler); } crm_log_xml_trace(scheduler->priv->graph, "graph"); } diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c index c6693c8424..8448821d5f 100644 --- a/lib/pacemaker/pcmk_sched_actions.c +++ b/lib/pacemaker/pcmk_sched_actions.c @@ -1,1957 +1,1957 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // bool, true, false #include #include #include #include // xmlNode #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Get the action flags relevant to ordering constraints * * \param[in,out] action Action to check * \param[in] node Node that *other* action in the ordering is on * (used only for clone resource actions) * * \return Action flags that should be used for orderings */ static uint32_t action_flags_for_ordering(pcmk_action_t *action, const pcmk_node_t *node) { bool runnable = false; uint32_t flags; // For non-resource actions, return the action flags if (action->rsc == NULL) { return action->flags; } /* For non-clone resources, or a clone action not assigned to a node, * return the flags as determined by the resource method without a node * specified. */ flags = action->rsc->priv->cmds->action_flags(action, NULL); if ((node == NULL) || !pcmk__is_clone(action->rsc)) { return flags; } /* Otherwise (i.e., for clone resource actions on a specific node), first * remember whether the non-node-specific action is runnable. */ runnable = pcmk_is_set(flags, pcmk__action_runnable); // Then recheck the resource method with the node flags = action->rsc->priv->cmds->action_flags(action, node); /* For clones in ordering constraints, the node-specific "runnable" doesn't * matter, just the non-node-specific setting (i.e., is the action runnable * anywhere). * * This applies only to runnable, and only for ordering constraints. This * function shouldn't be used for other types of constraints without * changes. Not very satisfying, but it's logical and appears to work well. */ if (runnable && !pcmk_is_set(flags, pcmk__action_runnable)) { pcmk__set_raw_action_flags(flags, action->rsc->id, pcmk__action_runnable); } return flags; } /*! * \internal * \brief Get action UUID that should be used with a resource ordering * * When an action is ordered relative to an action for a collective resource * (clone, group, or bundle), it actually needs to be ordered after all * instances of the collective have completed the relevant action (for example, * given "start CLONE then start RSC", RSC must wait until all instances of * CLONE have started). Given the UUID and resource of the first action in an * ordering, this returns the UUID of the action that should actually be used * for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0"). * * \param[in] first_uuid UUID of first action in ordering * \param[in] first_rsc Resource of first action in ordering * * \return Newly allocated copy of UUID to use with ordering * \note It is the caller's responsibility to free the return value. */ static char * action_uuid_for_ordering(const char *first_uuid, const pcmk_resource_t *first_rsc) { guint interval_ms = 0; char *uuid = NULL; char *rid = NULL; char *first_task_str = NULL; enum pcmk__action_type first_task = pcmk__action_unspecified; enum pcmk__action_type remapped_task = pcmk__action_unspecified; // Only non-notify actions for collective resources need remapping if ((strstr(first_uuid, PCMK_ACTION_NOTIFY) != NULL) || (first_rsc->priv->variant < pcmk__rsc_variant_group)) { goto done; } // Only non-recurring actions need remapping pcmk__assert(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms)); if (interval_ms > 0) { goto done; } first_task = pcmk__parse_action(first_task_str); switch (first_task) { case pcmk__action_stop: case pcmk__action_start: case pcmk__action_notify: case pcmk__action_promote: case pcmk__action_demote: remapped_task = first_task + 1; break; case pcmk__action_stopped: case pcmk__action_started: case pcmk__action_notified: case pcmk__action_promoted: case pcmk__action_demoted: remapped_task = first_task; break; case pcmk__action_monitor: case pcmk__action_shutdown: case pcmk__action_fence: break; default: crm_err("Unknown action '%s' in ordering", first_task_str); break; } if (remapped_task != pcmk__action_unspecified) { /* If a clone or bundle has notifications enabled, the ordering will be * relative to when notifications have been sent for the remapped task. */ if (pcmk_is_set(first_rsc->flags, pcmk__rsc_notify) && (pcmk__is_clone(first_rsc) || pcmk__is_bundled(first_rsc))) { uuid = pcmk__notify_key(rid, "confirmed-post", pcmk__action_text(remapped_task)); } else { uuid = pcmk__op_key(rid, pcmk__action_text(remapped_task), 0); } pcmk__rsc_trace(first_rsc, "Remapped action UUID %s to %s for ordering purposes", first_uuid, uuid); } done: free(first_task_str); free(rid); return (uuid != NULL)? uuid : pcmk__str_copy(first_uuid); } /*! * \internal * \brief Get actual action that should be used with an ordering * * When an action is ordered relative to an action for a collective resource * (clone, group, or bundle), it actually needs to be ordered after all * instances of the collective have completed the relevant action (for example, * given "start CLONE then start RSC", RSC must wait until all instances of * CLONE have started). Given the first action in an ordering, this returns the * the action that should actually be used for ordering (for example, the * started action instead of the start action). * * \param[in] action First action in an ordering * * \return Actual action that should be used for the ordering */ static pcmk_action_t * action_for_ordering(pcmk_action_t *action) { pcmk_action_t *result = action; pcmk_resource_t *rsc = action->rsc; if (rsc == NULL) { return result; } if ((rsc->priv->variant >= pcmk__rsc_variant_group) && (action->uuid != NULL)) { char *uuid = action_uuid_for_ordering(action->uuid, rsc); result = find_first_action(rsc->priv->actions, uuid, NULL, NULL); if (result == NULL) { crm_warn("Not remapping %s to %s because %s does not have " "remapped action", action->uuid, uuid, rsc->id); result = action; } free(uuid); } return result; } /*! * \internal * \brief Wrapper for update_ordered_actions() method for readability * * \param[in,out] rsc Resource to call method for * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this * node (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates * (may include pcmk__action_optional to affect only * mandatory actions, and pe_action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ static inline uint32_t update(pcmk_resource_t *rsc, pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { return rsc->priv->cmds->update_ordered_actions(first, then, node, flags, filter, type, scheduler); } /*! * \internal * \brief Update flags for ordering's actions appropriately for ordering's flags * * \param[in,out] first First action in an ordering * \param[in,out] then Then action in an ordering * \param[in] first_flags Action flags for \p first for ordering purposes * \param[in] then_flags Action flags for \p then for ordering purposes * \param[in,out] order Action wrapper for \p first in ordering * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags */ static uint32_t update_action_for_ordering_flags(pcmk_action_t *first, pcmk_action_t *then, uint32_t first_flags, uint32_t then_flags, pcmk__related_action_t *order, pcmk_scheduler_t *scheduler) { uint32_t changed = pcmk__updated_none; /* The node will only be used for clones. If interleaved, node will be NULL, * otherwise the ordering scope will be limited to the node. Normally, the * whole 'then' clone should restart if 'first' is restarted, so then->node * is needed. */ pcmk_node_t *node = then->node; if (pcmk_is_set(order->flags, pcmk__ar_first_implies_same_node_then)) { /* For unfencing, only instances of 'then' on the same node as 'first' * (the unfencing operation) should restart, so reset node to * first->node, at which point this case is handled like a normal * pcmk__ar_first_implies_then. */ pcmk__clear_relation_flags(order->flags, pcmk__ar_first_implies_same_node_then); pcmk__set_relation_flags(order->flags, pcmk__ar_first_implies_then); node = first->node; pcmk__rsc_trace(then->rsc, "%s then %s: mapped " "pcmk__ar_first_implies_same_node_then to " "pcmk__ar_first_implies_then on %s", first->uuid, then->uuid, pcmk__node_name(node)); } if (pcmk_is_set(order->flags, pcmk__ar_first_implies_then)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags & pcmk__action_optional, pcmk__action_optional, pcmk__ar_first_implies_then, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_optional) && pcmk_is_set(then->flags, pcmk__action_optional)) { pcmk__clear_action_flags(then, pcmk__action_optional); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_implies_then", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_intermediate_stop) && (then->rsc != NULL)) { enum pcmk__action_flags restart = pcmk__action_optional |pcmk__action_runnable; changed |= update(then->rsc, first, then, node, first_flags, restart, pcmk__ar_intermediate_stop, scheduler); pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_intermediate_stop", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_then_implies_first)) { if (first->rsc != NULL) { changed |= update(first->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_then_implies_first, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_optional) && pcmk_is_set(first->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(first, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_first); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_then_implies_first", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_promoted_then_implies_first)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags & pcmk__action_optional, pcmk__action_optional, pcmk__ar_promoted_then_implies_first, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after " "pcmk__ar_promoted_then_implies_first", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_min_runnable)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_min_runnable, scheduler); } else if (pcmk_is_set(first_flags, pcmk__action_runnable)) { // We have another runnable instance of "first" then->runnable_before++; /* Mark "then" as runnable if it requires a certain number of * "before" instances to be runnable, and they now are. */ if ((then->runnable_before >= then->required_runnable_before) && !pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__set_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_min_runnable", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_nested_remote_probe) && (then->rsc != NULL)) { if (!pcmk_is_set(first_flags, pcmk__action_runnable) && (first->rsc != NULL) && (first->rsc->priv->active_nodes != NULL)) { pcmk__rsc_trace(then->rsc, "%s then %s: ignoring because first is stopping", first->uuid, then->uuid); order->flags = pcmk__ar_none; } else { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_unrunnable_first_blocks, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_nested_remote_probe", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_unrunnable_first_blocks)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_unrunnable_first_blocks, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_runnable) && pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_unrunnable_first_blocks", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_unmigratable_then_blocks)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_unmigratable_then_blocks, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after " "pcmk__ar_unmigratable_then_blocks", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_first_else_then)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_first_else_then, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_else_then", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_ordered)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_ordered, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_ordered", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_asymmetric)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_asymmetric, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_asymmetric", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(first->flags, pcmk__action_runnable) && pcmk_is_set(order->flags, pcmk__ar_first_implies_then_graphed) && !pcmk_is_set(first_flags, pcmk__action_optional)) { pcmk__rsc_trace(then->rsc, "%s will be in graph because %s is required", then->uuid, first->uuid); pcmk__set_action_flags(then, pcmk__action_always_in_graph); // Don't bother marking 'then' as changed just for this } if (pcmk_is_set(order->flags, pcmk__ar_then_implies_first_graphed) && !pcmk_is_set(then_flags, pcmk__action_optional)) { pcmk__rsc_trace(then->rsc, "%s will be in graph because %s is required", first->uuid, then->uuid); pcmk__set_action_flags(first, pcmk__action_always_in_graph); // Don't bother marking 'first' as changed just for this } if (pcmk_any_flags_set(order->flags, pcmk__ar_first_implies_then |pcmk__ar_then_implies_first |pcmk__ar_intermediate_stop) && (first->rsc != NULL) && !pcmk_is_set(first->rsc->flags, pcmk__rsc_managed) && pcmk_is_set(first->rsc->flags, pcmk__rsc_blocked) && !pcmk_is_set(first->flags, pcmk__action_runnable) && pcmk__str_eq(first->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* @TODO This seems odd; why wouldn't an unrunnable "first" already * block "then" before this? Note that the unmanaged-stop-{1,2} * scheduler regression tests and the test CIB for T209 have tests for * "stop then stop" relations that would be good for checking any * changes. */ if (pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after checking whether first " "is blocked, unmanaged, unrunnable stop", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } return changed; } // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pcmk__action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pcmk__action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pcmk__action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->priv->name) /*! * \internal * \brief Update an action's flags for all orderings where it is "then" * * \param[in,out] then Action to update * \param[in,out] scheduler Scheduler data */ void pcmk__update_action_for_orderings(pcmk_action_t *then, pcmk_scheduler_t *scheduler) { GList *lpc = NULL; uint32_t changed = pcmk__updated_none; int last_flags = then->flags; pcmk__rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s", action_type_str(then->flags), then->uuid, action_optional_str(then->flags), action_runnable_str(then->flags), action_node_str(then)); if (then->required_runnable_before > 0) { /* Initialize current known "runnable before" actions. As * update_action_for_ordering_flags() is called for each of then's * before actions, this number will increment as runnable 'first' * actions are encountered. */ then->runnable_before = 0; /* The pcmk__ar_min_runnable clause of * update_action_for_ordering_flags() (called below) * will reset runnable if appropriate. */ pcmk__clear_action_flags(then, pcmk__action_runnable); } for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *other = lpc->data; pcmk_action_t *first = other->action; pcmk_node_t *then_node = then->node; pcmk_node_t *first_node = first->node; const uint32_t target = pcmk__rsc_node_assigned; if ((first->rsc != NULL) && pcmk__is_group(first->rsc) && pcmk__str_eq(first->task, PCMK_ACTION_START, pcmk__str_none)) { first_node = first->rsc->priv->fns->location(first->rsc, NULL, target); if (first_node != NULL) { pcmk__rsc_trace(first->rsc, "Found %s for 'first' %s", pcmk__node_name(first_node), first->uuid); } } if (pcmk__is_group(then->rsc) && pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none)) { then_node = then->rsc->priv->fns->location(then->rsc, NULL, target); if (then_node != NULL) { pcmk__rsc_trace(then->rsc, "Found %s for 'then' %s", pcmk__node_name(then_node), then->uuid); } } // Disable constraint if it only applies when on same node, but isn't if (pcmk_is_set(other->flags, pcmk__ar_if_on_same_node) && (first_node != NULL) && (then_node != NULL) && !pcmk__same_node(first_node, then_node)) { pcmk__rsc_trace(then->rsc, "Disabled ordering %s on %s then %s on %s: " "not same node", other->action->uuid, pcmk__node_name(first_node), then->uuid, pcmk__node_name(then_node)); other->flags = pcmk__ar_none; continue; } pcmk__clear_updated_flags(changed, then, pcmk__updated_first); if ((first->rsc != NULL) && pcmk_is_set(other->flags, pcmk__ar_then_cancels_first) && !pcmk_is_set(then->flags, pcmk__action_optional)) { /* 'then' is required, so we must abandon 'first' * (e.g. a required stop cancels any agent reload). */ pcmk__set_action_flags(other->action, pcmk__action_optional); if (!strcmp(first->task, PCMK_ACTION_RELOAD_AGENT)) { pcmk__clear_rsc_flags(first->rsc, pcmk__rsc_reload); } } if ((first->rsc != NULL) && (then->rsc != NULL) && (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) { first = action_for_ordering(first); } if (first != other->action) { pcmk__rsc_trace(then->rsc, "Ordering %s after %s instead of %s", then->uuid, first->uuid, other->action->uuid); } pcmk__rsc_trace(then->rsc, "%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s", first->uuid, first->flags, then->uuid, then->flags, other->flags, action_node_str(first)); if (first == other->action) { /* 'first' was not remapped (e.g. from 'start' to 'running'), which * could mean it is a non-resource action, a primitive resource * action, or already expanded. */ uint32_t first_flags, then_flags; first_flags = action_flags_for_ordering(first, then_node); then_flags = action_flags_for_ordering(then, first_node); changed |= update_action_for_ordering_flags(first, then, first_flags, then_flags, other, scheduler); /* 'first' was for a complex resource (clone, group, etc), * create a new dependency if necessary */ } else if (order_actions(first, then, other->flags)) { /* This was the first time 'first' and 'then' were associated, * start again to get the new actions_before list */ pcmk__set_updated_flags(changed, then, pcmk__updated_then); pcmk__rsc_trace(then->rsc, "Disabled ordering %s then %s in favor of %s " "then %s", other->action->uuid, then->uuid, first->uuid, then->uuid); other->flags = pcmk__ar_none; } if (pcmk_is_set(changed, pcmk__updated_first)) { crm_trace("Re-processing %s and its 'after' actions " "because it changed", first->uuid); for (GList *lpc2 = first->actions_after; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__related_action_t *other = lpc2->data; pcmk__update_action_for_orderings(other->action, scheduler); } pcmk__update_action_for_orderings(first, scheduler); } } if (then->required_runnable_before > 0) { if (last_flags == then->flags) { pcmk__clear_updated_flags(changed, then, pcmk__updated_then); } else { pcmk__set_updated_flags(changed, then, pcmk__updated_then); } } if (pcmk_is_set(changed, pcmk__updated_then)) { crm_trace("Re-processing %s and its 'after' actions because it changed", then->uuid); if (pcmk_is_set(last_flags, pcmk__action_runnable) && !pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__block_colocation_dependents(then); } pcmk__update_action_for_orderings(then, scheduler); for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *other = lpc->data; pcmk__update_action_for_orderings(other->action, scheduler); } } } static inline bool is_primitive_action(const pcmk_action_t *action) { return (action != NULL) && pcmk__is_primitive(action->rsc); } /*! * \internal * \brief Clear a single action flag and set reason text * * \param[in,out] action Action whose flag should be cleared * \param[in] flag Action flag that should be cleared * \param[in] reason Action that is the reason why flag is being cleared */ #define clear_action_flag_because(action, flag, reason) do { \ if (pcmk_is_set((action)->flags, (flag))) { \ pcmk__clear_action_flags(action, flag); \ if ((action)->rsc != (reason)->rsc) { \ char *reason_text = pe__action2reason((reason), (flag)); \ pe_action_set_reason((action), reason_text, false); \ free(reason_text); \ } \ } \ } while (0) /*! * \internal * \brief Update actions in an asymmetric ordering * * If the "first" action in an asymmetric ordering is unrunnable, make the * "second" action unrunnable as well, if appropriate. * * \param[in] first 'First' action in an asymmetric ordering * \param[in,out] then 'Then' action in an asymmetric ordering */ static void handle_asymmetric_ordering(const pcmk_action_t *first, pcmk_action_t *then) { /* Only resource actions after an unrunnable 'first' action need updates for * asymmetric ordering. */ if ((then->rsc == NULL) || pcmk_is_set(first->flags, pcmk__action_runnable)) { return; } // Certain optional 'then' actions are unaffected by unrunnable 'first' if (pcmk_is_set(then->flags, pcmk__action_optional)) { enum rsc_role_e then_rsc_role; then_rsc_role = then->rsc->priv->fns->state(then->rsc, true); if ((then_rsc_role == pcmk_role_stopped) && pcmk__str_eq(then->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* If 'then' should stop after 'first' but is already stopped, the * ordering is irrelevant. */ return; } else if ((then_rsc_role >= pcmk_role_started) && pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none) && pe__rsc_running_on_only(then->rsc, then->node)) { /* Similarly if 'then' should start after 'first' but is already * started on a single node. */ return; } } // 'First' can't run, so 'then' can't either clear_action_flag_because(then, pcmk__action_optional, first); clear_action_flag_because(then, pcmk__action_runnable, first); } /*! * \internal * \brief Set action bits appropriately when pcmk__ar_intermediate_stop is used * * \param[in,out] first 'First' action in ordering * \param[in,out] then 'Then' action in ordering * \param[in] filter What action flags to care about * * \note pcmk__ar_intermediate_stop is set for "stop resource before starting * it" and "stop later group member before stopping earlier group member" */ static void handle_restart_ordering(pcmk_action_t *first, pcmk_action_t *then, uint32_t filter) { const char *reason = NULL; pcmk__assert(is_primitive_action(first) && is_primitive_action(then)); // We need to update the action in two cases: // ... if 'then' is required if (pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(then->flags, pcmk__action_optional)) { reason = "restart"; } /* ... if 'then' is unrunnable action on same resource (if a resource * should restart but can't start, we still want to stop) */ if (pcmk_is_set(filter, pcmk__action_runnable) && !pcmk_is_set(then->flags, pcmk__action_runnable) && pcmk_is_set(then->rsc->flags, pcmk__rsc_managed) && (first->rsc == then->rsc)) { reason = "stop"; } if (reason == NULL) { return; } pcmk__rsc_trace(first->rsc, "Handling %s -> %s for %s", first->uuid, then->uuid, reason); // Make 'first' required if it is runnable if (pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(first, pcmk__action_optional, then); } // Make 'first' required if 'then' is required if (!pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } // Make 'first' unmigratable if 'then' is unmigratable if (!pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } // Make 'then' unrunnable if 'first' is required but unrunnable if (!pcmk_is_set(first->flags, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_runnable, first); } } /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' flags * (and runnable_before members if appropriate) as appropriate for the ordering. * Effects may cascade to other orderings involving the actions as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * (ignored) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pcmk__action_optional to affect only * mandatory actions, and pcmk__action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t pcmk__update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { uint32_t changed = pcmk__updated_none; uint32_t then_flags = 0U; uint32_t first_flags = 0U; pcmk__assert((first != NULL) && (then != NULL) && (scheduler != NULL)); then_flags = then->flags; first_flags = first->flags; if (pcmk_is_set(type, pcmk__ar_asymmetric)) { handle_asymmetric_ordering(first, then); } if (pcmk_is_set(type, pcmk__ar_then_implies_first) && !pcmk_is_set(then_flags, pcmk__action_optional)) { // Then is required, and implies first should be, too if (pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(flags, pcmk__action_optional) && pcmk_is_set(first_flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } if (pcmk_is_set(flags, pcmk__action_migratable) && !pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } } if (pcmk_is_set(type, pcmk__ar_promoted_then_implies_first) && (then->rsc != NULL) && (then->rsc->priv->orig_role == pcmk_role_promoted) && pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); if (pcmk_is_set(first->flags, pcmk__action_migratable) && !pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } } if (pcmk_is_set(type, pcmk__ar_unmigratable_then_blocks) && pcmk_is_set(filter, pcmk__action_optional)) { if (!pcmk_all_flags_set(then->flags, pcmk__action_migratable |pcmk__action_runnable)) { clear_action_flag_because(first, pcmk__action_runnable, then); } if (!pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } } if (pcmk_is_set(type, pcmk__ar_first_else_then) && pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_migratable, first); pcmk__clear_action_flags(then, pcmk__action_pseudo); } if (pcmk_is_set(type, pcmk__ar_unrunnable_first_blocks) && pcmk_is_set(filter, pcmk__action_runnable) && pcmk_is_set(then->flags, pcmk__action_runnable) && !pcmk_is_set(flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_runnable, first); clear_action_flag_because(then, pcmk__action_migratable, first); } if (pcmk_is_set(type, pcmk__ar_first_implies_then) && pcmk_is_set(filter, pcmk__action_optional) && pcmk_is_set(then->flags, pcmk__action_optional) && !pcmk_is_set(flags, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_migratable)) { clear_action_flag_because(then, pcmk__action_optional, first); } if (pcmk_is_set(type, pcmk__ar_intermediate_stop)) { handle_restart_ordering(first, then, filter); } if (then_flags != then->flags) { pcmk__set_updated_flags(changed, first, pcmk__updated_then); pcmk__rsc_trace(then->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'first' %s (%#.6x)", then->uuid, pcmk__node_name(then->node), then->flags, then_flags, first->uuid, first->flags); if ((then->rsc != NULL) && (then->rsc->priv->parent != NULL)) { // Required to handle "X_stop then X_start" for cloned groups pcmk__update_action_for_orderings(then, scheduler); } } if (first_flags != first->flags) { pcmk__set_updated_flags(changed, first, pcmk__updated_first); pcmk__rsc_trace(first->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'then' %s (%#.6x)", first->uuid, pcmk__node_name(first->node), first->flags, first_flags, then->uuid, then->flags); } return changed; } /*! * \internal * \brief Trace-log an action (optionally with its dependent actions) * * \param[in] pre_text If not NULL, prefix the log with this plus ": " * \param[in] action Action to log * \param[in] details If true, recursively log dependent actions */ void pcmk__log_action(const char *pre_text, const pcmk_action_t *action, bool details) { const char *node_uname = NULL; const char *node_uuid = NULL; const char *desc = NULL; CRM_CHECK(action != NULL, return); if (!pcmk_is_set(action->flags, pcmk__action_pseudo)) { if (action->node != NULL) { node_uname = action->node->priv->name; node_uuid = action->node->priv->id; } else { node_uname = ""; } } switch (pcmk__parse_action(action->task)) { case pcmk__action_fence: case pcmk__action_shutdown: if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { desc = "Pseudo "; } else if (pcmk_is_set(action->flags, pcmk__action_optional)) { desc = "Optional "; } else if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { desc = "!!Non-Startable!! "; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; default: if (pcmk_is_set(action->flags, pcmk__action_optional)) { desc = "Optional "; } else if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { desc = "Pseudo "; } else if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { desc = "!!Non-Startable!! "; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (action->rsc? action->rsc->id : ""), (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; } if (details) { const GList *iter = NULL; const pcmk__related_action_t *other = NULL; crm_trace("\t\t====== Preceding Actions"); for (iter = action->actions_before; iter != NULL; iter = iter->next) { other = (const pcmk__related_action_t *) iter->data; pcmk__log_action("\t\t", other->action, false); } crm_trace("\t\t====== Subsequent Actions"); for (iter = action->actions_after; iter != NULL; iter = iter->next) { other = (const pcmk__related_action_t *) iter->data; pcmk__log_action("\t\t", other->action, false); } crm_trace("\t\t====== End"); } else { crm_trace("\t\t(before=%d, after=%d)", g_list_length(action->actions_before), g_list_length(action->actions_after)); } } /*! * \internal * \brief Create a new shutdown action for a node * * \param[in,out] node Node being shut down * * \return Newly created shutdown action for \p node */ pcmk_action_t * pcmk__new_shutdown_action(pcmk_node_t *node) { char *shutdown_id = NULL; pcmk_action_t *shutdown_op = NULL; pcmk__assert(node != NULL); shutdown_id = crm_strdup_printf("%s-%s", PCMK_ACTION_DO_SHUTDOWN, node->priv->name); shutdown_op = custom_action(NULL, shutdown_id, PCMK_ACTION_DO_SHUTDOWN, node, FALSE, node->priv->scheduler); pcmk__order_stops_before_shutdown(node, shutdown_op); pcmk__insert_meta(shutdown_op, PCMK__META_OP_NO_WAIT, PCMK_VALUE_TRUE); return shutdown_op; } /*! * \internal * \brief Calculate and add an operation digest to XML * * Calculate an operation digest, which enables us to later determine when a * restart is needed due to the resource's parameters being changed, and add it * to given XML. * * \param[in] op Operation result from executor * \param[in,out] update XML to add digest to */ static void add_op_digest_to_xml(const lrmd_event_data_t *op, xmlNode *update) { char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = pcmk__xe_create(NULL, PCMK_XE_PARAMETERS); g_hash_table_foreach(op->params, hash2field, args_xml); pcmk__filter_op_for_digest(args_xml); digest = pcmk__digest_op_params(args_xml); crm_xml_add(update, PCMK__XA_OP_DIGEST, digest); pcmk__xml_free(args_xml); free(digest); } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" /*! * \internal * \brief Create XML for resource operation history update * * \param[in,out] parent Parent XML node to add to * \param[in,out] op Operation event data * \param[in] caller_version DC feature set * \param[in] target_rc Expected result of operation * \param[in] node Name of node on which operation was performed * \param[in] origin Arbitrary description of update source * * \return Newly created XML node for history update */ xmlNode * pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op, const char *caller_version, int target_rc, const char *node, const char *origin) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; CRM_CHECK(op != NULL, return NULL); crm_trace("Creating history XML for %s-interval %s action for %s on %s " "(DC version: %s, origin: %s)", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, ((node == NULL)? "no node" : node), caller_version, origin); task = op->op_type; /* Record a successful agent reload as a start, and a failed one as a * monitor, to make life easier for the scheduler when determining the * current state. * * @COMPAT We should check "reload" here only if the operation was for a * pre-OCF-1.1 resource agent, but we don't know that here, and we should * only ever get results for actions scheduled by us, so we can reasonably * assume any "reload" is actually a pre-1.1 agent reload. * * @TODO This remapping can make log messages with task confusing for users * (for example, an "Initiating reload ..." followed by "... start ... * confirmed"). Either do this remapping in the scheduler if possible, or * store the original task in a new XML attribute for later logging. */ if (pcmk__str_any_of(task, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { if (op->op_status == PCMK_EXEC_DONE) { task = PCMK_ACTION_START; } else { task = PCMK_ACTION_MONITOR; } } key = pcmk__op_key(op->rsc_id, task, op->interval_ms); if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = pcmk__notify_key(op->rsc_id, n_type, n_task); if (op->op_status != PCMK_EXEC_PENDING) { /* Ignore notify errors. * * @TODO It might be better to keep the correct result here, and * ignore it in process_graph_event(). */ lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } /* Migration history is preserved separately, which usually matters for * multiple nodes and is important for future cluster transitions. */ } else if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { op_id = strdup(key); } else if (did_rsc_op_fail(op, target_rc)) { op_id = pcmk__op_key(op->rsc_id, "last_failure", 0); if (op->interval_ms == 0) { /* Ensure 'last' gets updated, in case PCMK_META_RECORD_PENDING is * true */ op_id_additional = pcmk__op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval_ms > 0) { op_id = strdup(key); } else { op_id = pcmk__op_key(op->rsc_id, "last", 0); } again: xml_op = pcmk__xe_first_child(parent, PCMK__XE_LRM_RSC_OP, PCMK_XA_ID, op_id); if (xml_op == NULL) { xml_op = pcmk__xe_create(parent, PCMK__XE_LRM_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for: " PCMK__OP_FMT " %d from %s", op->rsc_id, op->op_type, op->interval_ms, op->call_id, origin); local_user_data = pcmk__transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if (magic == NULL) { magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc, (const char *) op->user_data); } crm_xml_add(xml_op, PCMK_XA_ID, op_id); crm_xml_add(xml_op, PCMK__XA_OPERATION_KEY, key); crm_xml_add(xml_op, PCMK_XA_OPERATION, task); crm_xml_add(xml_op, PCMK_XA_CRM_DEBUG_ORIGIN, origin); crm_xml_add(xml_op, PCMK_XA_CRM_FEATURE_SET, caller_version); crm_xml_add(xml_op, PCMK__XA_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, PCMK__XA_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, PCMK_XA_EXIT_REASON, pcmk__s(exit_reason, "")); crm_xml_add(xml_op, PCMK__META_ON_NODE, node); // For context during triage crm_xml_add_int(xml_op, PCMK__XA_CALL_ID, op->call_id); crm_xml_add_int(xml_op, PCMK__XA_RC_CODE, op->rc); crm_xml_add_int(xml_op, PCMK__XA_OP_STATUS, op->op_status); crm_xml_add_ms(xml_op, PCMK_META_INTERVAL, op->interval_ms); if ((op->t_run > 0) || (op->t_rcchange > 0) || (op->exec_time > 0) || (op->queue_time > 0)) { crm_trace("Timing data (" PCMK__OP_FMT "): " "last=%lld change=%lld exec=%u queue=%u", op->rsc_id, op->op_type, op->interval_ms, (long long) op->t_run, (long long) op->t_rcchange, op->exec_time, op->queue_time); if ((op->interval_ms > 0) && (op->t_rcchange > 0)) { // Recurring ops may have changed rc after initial run crm_xml_add_ll(xml_op, PCMK_XA_LAST_RC_CHANGE, (long long) op->t_rcchange); } else { crm_xml_add_ll(xml_op, PCMK_XA_LAST_RC_CHANGE, (long long) op->t_run); } crm_xml_add_int(xml_op, PCMK_XA_EXEC_TIME, op->exec_time); crm_xml_add_int(xml_op, PCMK_XA_QUEUE_TIME, op->queue_time); } if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Record PCMK__META_MIGRATE_SOURCE and PCMK__META_MIGRATE_TARGET always * for migrate ops. */ const char *name = PCMK__META_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = PCMK__META_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } add_op_digest_to_xml(op, xml_op); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } /*! * \internal * \brief Check whether an action shutdown-locks a resource to a node * * If the PCMK_OPT_SHUTDOWN_LOCK cluster property is set, resources will not be * recovered on a different node if cleanly stopped, and may start only on that * same node. This function checks whether that applies to a given action, so * that the transition graph can be marked appropriately. * * \param[in] action Action to check * * \return true if \p action locks its resource to the action's node, * otherwise false */ bool pcmk__action_locks_rsc_to_node(const pcmk_action_t *action) { // Only resource actions taking place on resource's lock node are locked if ((action == NULL) || (action->rsc == NULL) || !pcmk__same_node(action->node, action->rsc->priv->lock_node)) { return false; } /* During shutdown, only stops are locked (otherwise, another action such as * a demote would cause the controller to clear the lock) */ if (action->node->details->shutdown && (action->task != NULL) && (strcmp(action->task, PCMK_ACTION_STOP) != 0)) { return false; } return true; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const pcmk__related_action_t *action_wrapper2 = a; const pcmk__related_action_t *action_wrapper1 = b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } if (action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } return 0; } /*! * \internal * \brief Remove any duplicate action inputs, merging action flags * * \param[in,out] action Action whose inputs should be checked */ void pcmk__deduplicate_action_inputs(pcmk_action_t *action) { GList *item = NULL; GList *next = NULL; pcmk__related_action_t *last_input = NULL; action->actions_before = g_list_sort(action->actions_before, sort_action_id); for (item = action->actions_before; item != NULL; item = next) { pcmk__related_action_t *input = item->data; next = item->next; if ((last_input != NULL) && (input->action->id == last_input->action->id)) { crm_trace("Input %s (%d) duplicate skipped for action %s (%d)", input->action->uuid, input->action->id, action->uuid, action->id); /* For the purposes of scheduling, the ordering flags no longer * matter, but crm_simulate looks at certain ones when creating a * dot graph. Combining the flags is sufficient for that purpose. */ pcmk__set_relation_flags(last_input->flags, input->flags); if (input->graphed) { last_input->graphed = true; } free(item->data); action->actions_before = g_list_delete_link(action->actions_before, item); } else { last_input = input; input->graphed = false; } } } /*! * \internal * \brief Output all scheduled actions * * \param[in,out] scheduler Scheduler data */ void pcmk__output_actions(pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv->out; // Output node (non-resource) actions for (GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { char *node_name = NULL; char *task = NULL; pcmk_action_t *action = (pcmk_action_t *) iter->data; if (action->rsc != NULL) { continue; // Resource actions will be output later } else if (pcmk_is_set(action->flags, pcmk__action_optional)) { continue; // This action was not scheduled } if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { task = strdup("Shutdown"); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { const char *op = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); task = crm_strdup_printf("Fence (%s)", op); } else { continue; // Don't display other node action types } if (pcmk__is_guest_or_bundle_node(action->node)) { const pcmk_resource_t *remote = action->node->priv->remote; node_name = crm_strdup_printf("%s (resource: %s)", pcmk__node_name(action->node), remote->priv->launcher->id); } else if (action->node != NULL) { node_name = crm_strdup_printf("%s", pcmk__node_name(action->node)); } out->message(out, "node-action", task, node_name, action->reason); free(node_name); free(task); } // Output resource actions for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->priv->cmds->output_actions(rsc); } } /*! * \internal * \brief Get action name needed to compare digest for configuration changes * * \param[in] task Action name from history * \param[in] interval_ms Action interval (in milliseconds) * * \return Action name whose digest should be compared */ static const char * task_for_digest(const char *task, guint interval_ms) { /* Certain actions need to be compared against the parameters used to start * the resource. */ if ((interval_ms == 0) && pcmk__str_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_MIGRATE_FROM, PCMK_ACTION_PROMOTE, NULL)) { task = PCMK_ACTION_START; } return task; } /*! * \internal * \brief Check whether only sanitized parameters to an action changed * * When collecting CIB files for troubleshooting, crm_report will mask * sensitive resource parameters. If simulations were run using that, affected * resources would appear to need a restart, which would complicate * troubleshooting. To avoid that, we save a "secure digest" of non-sensitive * parameters. This function used that digest to check whether only masked * parameters are different. * * \param[in] xml_op Resource history entry with secure digest * \param[in] digest_data Operation digest information being compared * \param[in] scheduler Scheduler data * * \return true if only sanitized parameters changed, otherwise false */ static bool only_sanitized_changed(const xmlNode *xml_op, const pcmk__op_digest_t *digest_data, const pcmk_scheduler_t *scheduler) { const char *digest_secure = NULL; if (!pcmk_is_set(scheduler->flags, pcmk__sched_sanitized)) { // The scheduler is not being run as a simulation return false; } digest_secure = pcmk__xe_get(xml_op, PCMK__XA_OP_SECURE_DIGEST); return (digest_data->rc != pcmk__digest_match) && (digest_secure != NULL) && (digest_data->digest_secure_calc != NULL) && (strcmp(digest_data->digest_secure_calc, digest_secure) == 0); } /*! * \internal * \brief Force a restart due to a configuration change * * \param[in,out] rsc Resource that action is for * \param[in] task Name of action whose configuration changed * \param[in] interval_ms Action interval (in milliseconds) * \param[in,out] node Node where resource should be restarted */ static void force_restart(pcmk_resource_t *rsc, const char *task, guint interval_ms, pcmk_node_t *node) { char *key = pcmk__op_key(rsc->id, task, interval_ms); pcmk_action_t *required = custom_action(rsc, key, task, NULL, FALSE, rsc->priv->scheduler); pe_action_set_reason(required, "resource definition change", true); trigger_unfencing(rsc, node, "Device parameters changed", NULL, rsc->priv->scheduler); } /*! * \internal * \brief Schedule a reload of a resource on a node * * \param[in,out] data Resource to reload * \param[in] user_data Where resource should be reloaded */ static void schedule_reload(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; pcmk_action_t *reload = NULL; // For collective resources, just call recursively for children if (rsc->priv->variant > pcmk__rsc_variant_primitive) { g_list_foreach(rsc->priv->children, schedule_reload, user_data); return; } // Skip the reload in certain situations if ((node == NULL) || !pcmk_is_set(rsc->flags, pcmk__rsc_managed) || pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { pcmk__rsc_trace(rsc, "Skip reload of %s:%s%s %s", rsc->id, pcmk_is_set(rsc->flags, pcmk__rsc_managed)? "" : " unmanaged", pcmk_is_set(rsc->flags, pcmk__rsc_failed)? " failed" : "", (node == NULL)? "inactive" : node->priv->name); return; } /* If a resource's configuration changed while a start was pending, * force a full restart instead of a reload. */ if (pcmk_is_set(rsc->flags, pcmk__rsc_start_pending)) { pcmk__rsc_trace(rsc, "%s: preventing agent reload because start pending", rsc->id); custom_action(rsc, stop_key(rsc), PCMK_ACTION_STOP, node, FALSE, rsc->priv->scheduler); return; } // Schedule the reload pcmk__set_rsc_flags(rsc, pcmk__rsc_reload); reload = custom_action(rsc, reload_key(rsc), PCMK_ACTION_RELOAD_AGENT, node, FALSE, rsc->priv->scheduler); pe_action_set_reason(reload, "resource definition change", FALSE); // Set orderings so that a required stop or demote cancels the reload pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL, pcmk__ar_ordered|pcmk__ar_then_cancels_first, rsc->priv->scheduler); pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL, pcmk__ar_ordered|pcmk__ar_then_cancels_first, rsc->priv->scheduler); } /*! * \internal * \brief Handle any configuration change for an action * * Given an action from resource history, if the resource's configuration * changed since the action was done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, etc.). * * \param[in,out] rsc Resource that action is for * \param[in,out] node Node that action was on * \param[in] xml_op Action XML from resource history * * \return true if action configuration changed, otherwise false */ bool pcmk__check_action_config(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *xml_op) { guint interval_ms = 0; const char *task = NULL; const pcmk__op_digest_t *digest_data = NULL; CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL), return false); task = pcmk__xe_get(xml_op, PCMK_XA_OPERATION); CRM_CHECK(task != NULL, return false); pcmk__xe_get_guint(xml_op, PCMK_META_INTERVAL, &interval_ms); - // If this is a recurring action, check whether it has been orphaned + // If this is a recurring action, check whether it has been removed if (interval_ms > 0) { if (pcmk__find_action_config(rsc, task, interval_ms, false) != NULL) { pcmk__rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); } else if (pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_cancel_removed_actions)) { pcmk__schedule_cancel(rsc, pcmk__xe_get(xml_op, PCMK__XA_CALL_ID), - task, interval_ms, node, "orphan"); + task, interval_ms, node, "removed"); return true; } else { - pcmk__rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned", + pcmk__rsc_debug(rsc, "%s-interval %s for %s on %s is removed", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); return true; } } crm_trace("Checking %s-interval %s for %s on %s for configuration changes", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); task = task_for_digest(task, interval_ms); digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->priv->scheduler); if (only_sanitized_changed(xml_op, digest_data, rsc->priv->scheduler)) { if (!pcmk__is_daemon && (rsc->priv->scheduler->priv->out != NULL)) { pcmk__output_t *out = rsc->priv->scheduler->priv->out; out->info(out, "Only 'private' parameters to %s-interval %s for %s " "on %s changed: %s", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node), pcmk__xe_get(xml_op, PCMK__XA_TRANSITION_MAGIC)); } return false; } switch (digest_data->rc) { case pcmk__digest_restart: crm_log_xml_debug(digest_data->params_restart, "params:restart"); force_restart(rsc, task, interval_ms, node); return true; case pcmk__digest_unknown: case pcmk__digest_mismatch: // Changes that can potentially be handled by an agent reload if (interval_ms > 0) { /* Recurring actions aren't reloaded per se, they are just * re-scheduled so the next run uses the new parameters. * The old instance will be cancelled automatically. */ crm_log_xml_debug(digest_data->params_all, "params:reschedule"); pcmk__reschedule_recurring(rsc, task, interval_ms, node); } else if (pcmk__xe_get(xml_op, PCMK__XA_OP_RESTART_DIGEST) != NULL) { // Agent supports reload, so use it trigger_unfencing(rsc, node, "Device parameters changed (reload)", NULL, rsc->priv->scheduler); crm_log_xml_debug(digest_data->params_all, "params:reload"); schedule_reload((gpointer) rsc, (gpointer) node); } else { pcmk__rsc_trace(rsc, "Restarting %s " "because agent doesn't support reload", rsc->id); crm_log_xml_debug(digest_data->params_restart, "params:restart"); force_restart(rsc, task, interval_ms, node); } return true; default: break; } return false; } /*! * \internal * \brief Create a list of resource's action history entries, sorted by call ID * * \param[in] rsc_entry Resource's \c PCMK__XE_LRM_RSC_OP status XML * \param[out] start_index Where to store index of start-like action, if any * \param[out] stop_index Where to store index of stop action, if any */ static GList * rsc_history_as_list(const xmlNode *rsc_entry, int *start_index, int *stop_index) { GList *ops = NULL; for (xmlNode *rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) { ops = g_list_prepend(ops, rsc_op); } ops = g_list_sort(ops, sort_op_by_callid); calculate_active_ops(ops, start_index, stop_index); return ops; } /*! * \internal * \brief Process a resource's action history from the CIB status * * Given a resource's action history, if the resource's configuration * changed since the actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in] rsc_entry Resource's \c PCMK__XE_LRM_RSC_OP status XML * \param[in,out] rsc Resource whose history is being processed * \param[in,out] node Node whose history is being processed */ static void process_rsc_history(const xmlNode *rsc_entry, pcmk_resource_t *rsc, pcmk_node_t *node) { int offset = -1; int stop_index = 0; int start_index = 0; GList *sorted_op_list = NULL; if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { if (pcmk__is_anonymous_clone(pe__const_top_resource(rsc, false))) { /* @TODO Should this be done for bundled primitives as well? Added * by 2ac43ae31 */ pcmk__rsc_trace(rsc, "Skipping configuration check " - "for orphaned clone instance %s", + "for removed clone instance %s", rsc->id); } else { pcmk__rsc_trace(rsc, "Skipping configuration check and scheduling " - "clean-up for orphaned resource %s", rsc->id); + "clean-up for removed resource %s", rsc->id); pcmk__schedule_cleanup(rsc, node, false); } return; } if (pe_find_node_id(rsc->priv->active_nodes, node->priv->id) == NULL) { if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) { pcmk__schedule_cleanup(rsc, node, false); } pcmk__rsc_trace(rsc, "Skipping configuration check for %s " "because no longer active on %s", rsc->id, pcmk__node_name(node)); return; } pcmk__rsc_trace(rsc, "Checking for configuration changes for %s on %s", rsc->id, pcmk__node_name(node)); if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) { pcmk__schedule_cleanup(rsc, node, false); } sorted_op_list = rsc_history_as_list(rsc_entry, &start_index, &stop_index); if (start_index < stop_index) { return; // Resource is stopped } for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { xmlNode *rsc_op = (xmlNode *) iter->data; const char *task = NULL; guint interval_ms = 0; if (++offset < start_index) { // Skip actions that happened before a start continue; } task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION); pcmk__xe_get_guint(rsc_op, PCMK_META_INTERVAL, &interval_ms); if ((interval_ms > 0) && (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations pcmk__schedule_cancel(rsc, pcmk__xe_get(rsc_op, PCMK__XA_CALL_ID), task, interval_ms, node, "maintenance mode"); } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_PROMOTE, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ if (pe__bundle_needs_remote_name(rsc)) { /* We haven't assigned resources to nodes yet, so if the * REMOTE_CONTAINER_HACK is used, we may calculate the digest * based on the literal "#uname" value rather than the properly * substituted value. That would mistakenly make the action * definition appear to have been changed. Defer the check until * later in this case. */ pcmk__add_param_check(rsc_op, rsc, node, pcmk__check_active); } else if (pcmk__check_action_config(rsc, node, rsc_op) && (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) != 0)) { pe__clear_failcount(rsc, node, "action definition changed", rsc->priv->scheduler); } } } g_list_free(sorted_op_list); } /*! * \internal * \brief Process a node's action history from the CIB status * * Given a node's resource history, if the resource's configuration changed * since the actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in,out] node Node whose history is being processed * \param[in] lrm_rscs Node's \c PCMK__XE_LRM_RESOURCES from CIB status XML */ static void process_node_history(pcmk_node_t *node, const xmlNode *lrm_rscs) { crm_trace("Processing node history for %s", pcmk__node_name(node)); for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rscs, PCMK__XE_LRM_RESOURCE, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) { if (rsc_entry->children != NULL) { GList *result = pcmk__rscs_matching_id(pcmk__xe_id(rsc_entry), node->priv->scheduler); for (GList *iter = result; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (pcmk__is_primitive(rsc)) { process_rsc_history(rsc_entry, rsc, node); } } g_list_free(result); } } } // XPath to find a node's resource history #define XPATH_NODE_HISTORY "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']" \ "/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES /*! * \internal * \brief Process any resource configuration changes in the CIB status * * Go through all nodes' resource history, and if a resource's configuration * changed since its actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in,out] scheduler Scheduler data */ void pcmk__handle_rsc_config_changes(pcmk_scheduler_t *scheduler) { crm_trace("Check resource and action configuration for changes"); /* Rather than iterate through the status section, iterate through the nodes * and search for the appropriate status subsection for each. This skips - * orphaned nodes and lets us eliminate some cases before searching the XML. + * removed nodes and lets us eliminate some cases before searching the XML. */ for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; /* Don't bother checking actions for a node that can't run actions ... * unless it's in maintenance mode, in which case we still need to * cancel any existing recurring monitors. */ if (node->details->maintenance || pcmk__node_available(node, false, false)) { char *xpath = NULL; xmlNode *history = NULL; xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->priv->name); history = pcmk__xpath_find_one(scheduler->input->doc, xpath, LOG_NEVER); free(xpath); process_node_history(node, history); } } } diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c index b5b5af5c13..99bd157f7f 100644 --- a/lib/pacemaker/pcmk_sched_instances.c +++ b/lib/pacemaker/pcmk_sched_instances.c @@ -1,1717 +1,1717 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* This file is intended for code usable with both clone instances and bundle * replica containers. */ #include #include // g_str_has_suffix() #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Check whether a node is allowed to run an instance * * \param[in] instance Clone instance or bundle container to check * \param[in] node Node to check * \param[in] max_per_node Maximum number of instances allowed to run on a node * * \return true if \p node is allowed to run \p instance, otherwise false */ static bool can_run_instance(const pcmk_resource_t *instance, const pcmk_node_t *node, int max_per_node) { pcmk_node_t *allowed_node = NULL; if (pcmk_is_set(instance->flags, pcmk__rsc_removed)) { - pcmk__rsc_trace(instance, "%s cannot run on %s: orphaned", + pcmk__rsc_trace(instance, "%s cannot run on %s: removed", instance->id, pcmk__node_name(node)); return false; } if (!pcmk__node_available(node, false, false)) { pcmk__rsc_trace(instance, "%s cannot run on %s: node cannot run resources", instance->id, pcmk__node_name(node)); return false; } allowed_node = pcmk__top_allowed_node(instance, node); if (allowed_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", instance->id, pcmk__node_name(node)); return false; } if (allowed_node->assign->score < 0) { pcmk__rsc_trace(instance, "%s cannot run on %s: parent score is %s there", instance->id, pcmk__node_name(node), pcmk_readable_score(allowed_node->assign->score)); return false; } if (allowed_node->assign->count >= max_per_node) { pcmk__rsc_trace(instance, "%s cannot run on %s: node already has %d instance%s", instance->id, pcmk__node_name(node), max_per_node, pcmk__plural_s(max_per_node)); return false; } pcmk__rsc_trace(instance, "%s can run on %s (%d already running)", instance->id, pcmk__node_name(node), allowed_node->assign->count); return true; } /*! * \internal * \brief Ban a clone instance or bundle replica from unavailable allowed nodes * * \param[in,out] instance Clone instance or bundle replica to ban * \param[in] max_per_node Maximum instances allowed to run on a node */ static void ban_unavailable_allowed_nodes(pcmk_resource_t *instance, int max_per_node) { if (instance->priv->allowed_nodes != NULL) { GHashTableIter iter; pcmk_node_t *node = NULL; g_hash_table_iter_init(&iter, instance->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (!can_run_instance(instance, node, max_per_node)) { pcmk__rsc_trace(instance, "Banning %s from unavailable node %s", instance->id, pcmk__node_name(node)); node->assign->score = -PCMK_SCORE_INFINITY; for (GList *child_iter = instance->priv->children; child_iter != NULL; child_iter = child_iter->next) { pcmk_resource_t *child = child_iter->data; pcmk_node_t *child_node = NULL; child_node = g_hash_table_lookup(child->priv->allowed_nodes, node->priv->id); if (child_node != NULL) { pcmk__rsc_trace(instance, "Banning %s child %s " "from unavailable node %s", instance->id, child->id, pcmk__node_name(node)); child_node->assign->score = -PCMK_SCORE_INFINITY; } } } } } } /*! * \internal * \brief Create a hash table with a single node in it * * \param[in] node Node to copy into new table * * \return Newly created hash table containing a copy of \p node * \note The caller is responsible for freeing the result with * g_hash_table_destroy(). */ static GHashTable * new_node_table(pcmk_node_t *node) { GHashTable *table = pcmk__strkey_table(NULL, pcmk__free_node_copy); node = pe__copy_node(node); g_hash_table_insert(table, (gpointer) node->priv->id, node); return table; } /*! * \internal * \brief Apply a resource's parent's colocation scores to a node table * * \param[in] rsc Resource whose colocations should be applied * \param[in,out] nodes Node table to apply colocations to */ static void apply_parent_colocations(const pcmk_resource_t *rsc, GHashTable **nodes) { GList *colocations = pcmk__this_with_colocations(rsc); for (const GList *iter = colocations; iter != NULL; iter = iter->next) { const pcmk__colocation_t *colocation = iter->data; pcmk_resource_t *other = colocation->primary; float factor = colocation->score / (float) PCMK_SCORE_INFINITY; other->priv->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes, colocation, factor, pcmk__coloc_select_default); } g_list_free(colocations); colocations = pcmk__with_this_colocations(rsc); for (const GList *iter = colocations; iter != NULL; iter = iter->next) { const pcmk__colocation_t *colocation = iter->data; pcmk_resource_t *other = colocation->dependent; float factor = colocation->score / (float) PCMK_SCORE_INFINITY; if (!pcmk__colocation_has_influence(colocation, rsc)) { continue; } other->priv->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes, colocation, factor, pcmk__coloc_select_nonnegative); } g_list_free(colocations); } /*! * \internal * \brief Compare clone or bundle instances based on colocation scores * * Determine the relative order in which two clone or bundle instances should be * assigned to nodes, considering the scores of colocation constraints directly * or indirectly involving them. * * \param[in] instance1 First instance to compare * \param[in] instance2 Second instance to compare * * \return A negative number if \p instance1 should be assigned first, * a positive number if \p instance2 should be assigned first, * or 0 if assignment order doesn't matter */ static int cmp_instance_by_colocation(const pcmk_resource_t *instance1, const pcmk_resource_t *instance2) { int rc = 0; pcmk_node_t *node1 = NULL; pcmk_node_t *node2 = NULL; pcmk_node_t *current_node1 = pcmk__current_node(instance1); pcmk_node_t *current_node2 = pcmk__current_node(instance2); GHashTable *colocated_scores1 = NULL; GHashTable *colocated_scores2 = NULL; pcmk__assert((instance1 != NULL) && (instance1->priv->parent != NULL) && (instance2 != NULL) && (instance2->priv->parent != NULL) && (current_node1 != NULL) && (current_node2 != NULL)); // Create node tables initialized with each node colocated_scores1 = new_node_table(current_node1); colocated_scores2 = new_node_table(current_node2); // Apply parental colocations apply_parent_colocations(instance1, &colocated_scores1); apply_parent_colocations(instance2, &colocated_scores2); // Find original nodes again, with scores updated for colocations node1 = g_hash_table_lookup(colocated_scores1, current_node1->priv->id); node2 = g_hash_table_lookup(colocated_scores2, current_node2->priv->id); // Compare nodes by updated scores if (node1->assign->score < node2->assign->score) { crm_trace("Assign %s (%d on %s) after %s (%d on %s)", instance1->id, node1->assign->score, pcmk__node_name(node1), instance2->id, node2->assign->score, pcmk__node_name(node2)); rc = 1; } else if (node1->assign->score > node2->assign->score) { crm_trace("Assign %s (%d on %s) before %s (%d on %s)", instance1->id, node1->assign->score, pcmk__node_name(node1), instance2->id, node2->assign->score, pcmk__node_name(node2)); rc = -1; } g_hash_table_destroy(colocated_scores1); g_hash_table_destroy(colocated_scores2); return rc; } /*! * \internal * \brief Check whether a resource or any of its children are failed * * \param[in] rsc Resource to check * * \return true if \p rsc or any of its children are failed, otherwise false */ static bool did_fail(const pcmk_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { return true; } for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { if (did_fail((const pcmk_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether a node is allowed to run a resource * * \param[in] rsc Resource to check * \param[in,out] node Node to check (will be set NULL if not allowed) * * \return true if *node is either NULL or allowed for \p rsc, otherwise false */ static bool node_is_allowed(const pcmk_resource_t *rsc, pcmk_node_t **node) { if (*node != NULL) { pcmk_node_t *allowed = g_hash_table_lookup(rsc->priv->allowed_nodes, (*node)->priv->id); if ((allowed == NULL) || (allowed->assign->score < 0)) { pcmk__rsc_trace(rsc, "%s: current location (%s) is unavailable", rsc->id, pcmk__node_name(*node)); *node = NULL; return false; } } return true; } /*! * \internal * \brief Compare two clone or bundle instances' instance numbers * * \param[in] a First instance to compare * \param[in] b Second instance to compare * * \return A negative number if \p a's instance number is lower, * a positive number if \p b's instance number is lower, * or 0 if their instance numbers are the same */ gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b) { const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a; const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b; char *div1 = NULL; char *div2 = NULL; pcmk__assert((instance1 != NULL) && (instance2 != NULL)); // Clone numbers are after a colon, bundle numbers after a dash div1 = strrchr(instance1->id, ':'); if (div1 == NULL) { div1 = strrchr(instance1->id, '-'); } div2 = strrchr(instance2->id, ':'); if (div2 == NULL) { div2 = strrchr(instance2->id, '-'); } pcmk__assert((div1 != NULL) && (div2 != NULL)); return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10)); } /*! * \internal * \brief Compare clone or bundle instances according to assignment order * * Compare two clone or bundle instances according to the order they should be * assigned to nodes, preferring (in order): * * - Active instance that is less multiply active * - Instance that is not active on a disallowed node * - Instance with higher configured priority * - Active instance whose current node can run resources * - Active instance whose parent is allowed on current node * - Active instance whose current node has fewer other instances * - Active instance * - Instance that isn't failed * - Instance whose colocations result in higher score on current node * - Instance with lower ID in lexicographic order * * \param[in] a First instance to compare * \param[in] b Second instance to compare * * \return A negative number if \p a should be assigned first, * a positive number if \p b should be assigned first, * or 0 if assignment order doesn't matter */ gint pcmk__cmp_instance(gconstpointer a, gconstpointer b) { int rc = 0; pcmk_node_t *node1 = NULL; pcmk_node_t *node2 = NULL; unsigned int nnodes1 = 0; unsigned int nnodes2 = 0; bool can1 = true; bool can2 = true; const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a; const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b; pcmk__assert((instance1 != NULL) && (instance2 != NULL)); node1 = instance1->priv->fns->active_node(instance1, &nnodes1, NULL); node2 = instance2->priv->fns->active_node(instance2, &nnodes2, NULL); /* If both instances are running and at least one is multiply * active, prefer instance that's running on fewer nodes. */ if ((nnodes1 > 0) && (nnodes2 > 0)) { if (nnodes1 < nnodes2) { crm_trace("Assign %s (active on %d) before %s (active on %d): " "less multiply active", instance1->id, nnodes1, instance2->id, nnodes2); return -1; } else if (nnodes1 > nnodes2) { crm_trace("Assign %s (active on %d) after %s (active on %d): " "more multiply active", instance1->id, nnodes1, instance2->id, nnodes2); return 1; } } /* An instance that is either inactive or active on an allowed node is * preferred over an instance that is active on a no-longer-allowed node. */ can1 = node_is_allowed(instance1, &node1); can2 = node_is_allowed(instance2, &node2); if (can1 && !can2) { crm_trace("Assign %s before %s: not active on a disallowed node", instance1->id, instance2->id); return -1; } else if (!can1 && can2) { crm_trace("Assign %s after %s: active on a disallowed node", instance1->id, instance2->id); return 1; } // Prefer instance with higher configured priority if (instance1->priv->priority > instance2->priv->priority) { crm_trace("Assign %s before %s: priority (%d > %d)", instance1->id, instance2->id, instance1->priv->priority, instance2->priv->priority); return -1; } else if (instance1->priv->priority < instance2->priv->priority) { crm_trace("Assign %s after %s: priority (%d < %d)", instance1->id, instance2->id, instance1->priv->priority, instance2->priv->priority); return 1; } // Prefer active instance if ((node1 == NULL) && (node2 == NULL)) { crm_trace("No assignment preference for %s vs. %s: inactive", instance1->id, instance2->id); return 0; } else if (node1 == NULL) { crm_trace("Assign %s after %s: active", instance1->id, instance2->id); return 1; } else if (node2 == NULL) { crm_trace("Assign %s before %s: active", instance1->id, instance2->id); return -1; } // Prefer instance whose current node can run resources can1 = pcmk__node_available(node1, false, false); can2 = pcmk__node_available(node2, false, false); if (can1 && !can2) { crm_trace("Assign %s before %s: current node can run resources", instance1->id, instance2->id); return -1; } else if (!can1 && can2) { crm_trace("Assign %s after %s: current node can't run resources", instance1->id, instance2->id); return 1; } // Prefer instance whose parent is allowed to run on instance's current node node1 = pcmk__top_allowed_node(instance1, node1); node2 = pcmk__top_allowed_node(instance2, node2); if ((node1 == NULL) && (node2 == NULL)) { crm_trace("No assignment preference for %s vs. %s: " "parent not allowed on either instance's current node", instance1->id, instance2->id); return 0; } else if (node1 == NULL) { crm_trace("Assign %s after %s: parent not allowed on current node", instance1->id, instance2->id); return 1; } else if (node2 == NULL) { crm_trace("Assign %s before %s: parent allowed on current node", instance1->id, instance2->id); return -1; } // Prefer instance whose current node is running fewer other instances if (node1->assign->count < node2->assign->count) { crm_trace("Assign %s before %s: fewer active instances on current node", instance1->id, instance2->id); return -1; } else if (node1->assign->count > node2->assign->count) { crm_trace("Assign %s after %s: more active instances on current node", instance1->id, instance2->id); return 1; } // Prefer instance that isn't failed can1 = did_fail(instance1); can2 = did_fail(instance2); if (!can1 && can2) { crm_trace("Assign %s before %s: not failed", instance1->id, instance2->id); return -1; } else if (can1 && !can2) { crm_trace("Assign %s after %s: failed", instance1->id, instance2->id); return 1; } // Prefer instance with higher cumulative colocation score on current node rc = cmp_instance_by_colocation(instance1, instance2); if (rc != 0) { return rc; } // Prefer instance with lower instance number rc = pcmk__cmp_instance_number(instance1, instance2); if (rc < 0) { crm_trace("Assign %s before %s: instance number", instance1->id, instance2->id); } else if (rc > 0) { crm_trace("Assign %s after %s: instance number", instance1->id, instance2->id); } else { crm_trace("No assignment preference for %s vs. %s", instance1->id, instance2->id); } return rc; } /*! * \internal * \brief Increment the parent's instance count after assigning an instance * * An instance's parent tracks how many instances have been assigned to each * node via its pcmk_node_t:count member. After assigning an instance to a node, * find the corresponding node in the parent's allowed table and increment it. * * \param[in,out] instance Instance whose parent to update * \param[in] assigned_to Node to which the instance was assigned */ static void increment_parent_count(pcmk_resource_t *instance, const pcmk_node_t *assigned_to) { pcmk_node_t *allowed = NULL; if (assigned_to == NULL) { return; } allowed = pcmk__top_allowed_node(instance, assigned_to); if (allowed == NULL) { /* The instance is allowed on the node, but its parent isn't. This * shouldn't be possible if the resource is managed, and we won't be * able to limit the number of instances assigned to the node. */ CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pcmk__rsc_managed)); } else { allowed->assign->count++; } } /*! * \internal * \brief Assign an instance to a node * * \param[in,out] instance Clone instance or bundle replica container * \param[in] prefer If not NULL, attempt early assignment to this * node, if still the best choice; otherwise, * perform final assignment * \param[in] max_per_node Assign at most this many instances to one node * * \return Node to which \p instance is assigned */ static const pcmk_node_t * assign_instance(pcmk_resource_t *instance, const pcmk_node_t *prefer, int max_per_node) { pcmk_node_t *chosen = NULL; pcmk__rsc_trace(instance, "Assigning %s (preferring %s)", instance->id, ((prefer == NULL)? "no node" : prefer->priv->name)); if (pcmk_is_set(instance->flags, pcmk__rsc_assigning)) { pcmk__rsc_debug(instance, "Assignment loop detected involving %s colocations", instance->id); return NULL; } ban_unavailable_allowed_nodes(instance, max_per_node); // Failed early assignments are reversible (stop_if_fail=false) chosen = instance->priv->cmds->assign(instance, prefer, (prefer == NULL)); increment_parent_count(instance, chosen); return chosen; } /*! * \internal * \brief Try to assign an instance to its current node early * * \param[in] rsc Clone or bundle being assigned (for logs only) * \param[in] instance Clone instance or bundle replica container * \param[in] current Instance's current node * \param[in] max_per_node Maximum number of instances per node * \param[in] available Number of instances still available for assignment * * \return \c true if \p instance was successfully assigned to its current node, * or \c false otherwise */ static bool assign_instance_early(const pcmk_resource_t *rsc, pcmk_resource_t *instance, const pcmk_node_t *current, int max_per_node, int available) { const pcmk_node_t *chosen = NULL; int reserved = 0; pcmk_resource_t *parent = instance->priv->parent; GHashTable *allowed_orig = NULL; GHashTable *allowed_orig_parent = parent->priv->allowed_nodes; const pcmk_node_t *allowed_node = NULL; pcmk__rsc_trace(instance, "Trying to assign %s to its current node %s", instance->id, pcmk__node_name(current)); allowed_node = g_hash_table_lookup(instance->priv->allowed_nodes, current->priv->id); if (!pcmk__node_available(allowed_node, true, false)) { pcmk__rsc_info(instance, "Not assigning %s to current node %s: unavailable", instance->id, pcmk__node_name(current)); return false; } /* On each iteration, if instance gets assigned to a node other than its * current one, we reserve one instance for the chosen node, unassign * instance, restore instance's original node tables, and try again. This * way, instances are proportionally assigned to nodes based on preferences, * but shuffling of specific instances is minimized. If a node will be * assigned instances at all, it preferentially receives instances that are * currently active there. * * parent->private->allowed_nodes tracks the number of instances assigned to * each node. If a node already has max_per_node instances assigned, * ban_unavailable_allowed_nodes() marks it as unavailable. * * In the end, we restore the original parent->private->allowed_nodes to * undo the changes to counts during tentative assignments. If we * successfully assigned an instance to its current node, we increment that * node's counter. */ // Back up the allowed node tables of instance and its children recursively pcmk__copy_node_tables(instance, &allowed_orig); // Update instances-per-node counts in a scratch table parent->priv->allowed_nodes = pcmk__copy_node_table(allowed_orig_parent); while (reserved < available) { chosen = assign_instance(instance, current, max_per_node); if (pcmk__same_node(chosen, current)) { // Successfully assigned to current node break; } // Assignment updates scores, so restore to original state pcmk__rsc_debug(instance, "Rolling back node scores for %s", instance->id); pcmk__restore_node_tables(instance, allowed_orig); if (chosen == NULL) { // Assignment failed, so give up pcmk__rsc_info(instance, "Not assigning %s to current node %s: unavailable", instance->id, pcmk__node_name(current)); pcmk__set_rsc_flags(instance, pcmk__rsc_unassigned); break; } // We prefer more strongly to assign an instance to the chosen node pcmk__rsc_debug(instance, "Not assigning %s to current node %s: %s is better", instance->id, pcmk__node_name(current), pcmk__node_name(chosen)); // Reserve one instance for the chosen node and try again if (++reserved >= available) { pcmk__rsc_info(instance, "Not assigning %s to current node %s: " "other assignments are more important", instance->id, pcmk__node_name(current)); } else { pcmk__rsc_debug(instance, "Reserved an instance of %s for %s. Retrying " "assignment of %s to %s", rsc->id, pcmk__node_name(chosen), instance->id, pcmk__node_name(current)); } // Clear this assignment (frees chosen); leave instance counts in parent pcmk__unassign_resource(instance); chosen = NULL; } g_hash_table_destroy(allowed_orig); // Restore original instances-per-node counts g_hash_table_destroy(parent->priv->allowed_nodes); parent->priv->allowed_nodes = allowed_orig_parent; if (chosen == NULL) { // Couldn't assign instance to current node return false; } pcmk__rsc_trace(instance, "Assigned %s to current node %s", instance->id, pcmk__node_name(current)); increment_parent_count(instance, chosen); return true; } /*! * \internal * \brief Reset the node counts of a resource's allowed nodes to zero * * \param[in,out] rsc Resource to reset * * \return Number of nodes that are available to run resources */ static unsigned int reset_allowed_node_counts(pcmk_resource_t *rsc) { unsigned int available_nodes = 0; pcmk_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->assign->count = 0; if (pcmk__node_available(node, false, false)) { available_nodes++; } } return available_nodes; } /*! * \internal * \brief Check whether an instance has a preferred node * * \param[in] instance Clone instance or bundle replica container * \param[in] optimal_per_node Optimal number of instances per node * * \return Instance's current node if still available, otherwise NULL */ static const pcmk_node_t * preferred_node(const pcmk_resource_t *instance, int optimal_per_node) { const pcmk_node_t *node = NULL; const pcmk_node_t *parent_node = NULL; // Check whether instance is active, healthy, and not yet assigned if ((instance->priv->active_nodes == NULL) || !pcmk_is_set(instance->flags, pcmk__rsc_unassigned) || pcmk_is_set(instance->flags, pcmk__rsc_failed)) { return NULL; } // Check whether instance's current node can run resources node = pcmk__current_node(instance); if (!pcmk__node_available(node, true, false)) { pcmk__rsc_trace(instance, "Not assigning %s to %s early (unavailable)", instance->id, pcmk__node_name(node)); return NULL; } // Check whether node already has optimal number of instances assigned parent_node = pcmk__top_allowed_node(instance, node); if ((parent_node != NULL) && (parent_node->assign->count >= optimal_per_node)) { pcmk__rsc_trace(instance, "Not assigning %s to %s early " "(optimal instances already assigned)", instance->id, pcmk__node_name(node)); return NULL; } return node; } /*! * \internal * \brief Assign collective instances to nodes * * \param[in,out] collective Clone or bundle resource being assigned * \param[in,out] instances List of clone instances or bundle containers * \param[in] max_total Maximum instances to assign in total * \param[in] max_per_node Maximum instances to assign to any one node */ void pcmk__assign_instances(pcmk_resource_t *collective, GList *instances, int max_total, int max_per_node) { // Reuse node count to track number of assigned instances unsigned int available_nodes = reset_allowed_node_counts(collective); int optimal_per_node = 0; int assigned = 0; GList *iter = NULL; pcmk_resource_t *instance = NULL; const pcmk_node_t *current = NULL; if (available_nodes > 0) { optimal_per_node = max_total / available_nodes; } if (optimal_per_node < 1) { optimal_per_node = 1; } pcmk__rsc_debug(collective, "Assigning up to %d %s instance%s to up to %u node%s " "(at most %d per host, %d optimal)", max_total, collective->id, pcmk__plural_s(max_total), available_nodes, pcmk__plural_s(available_nodes), max_per_node, optimal_per_node); // Assign as many instances as possible to their current location for (iter = instances; (iter != NULL) && (assigned < max_total); iter = iter->next) { int available = max_total - assigned; instance = iter->data; if (!pcmk_is_set(instance->flags, pcmk__rsc_unassigned)) { continue; // Already assigned } current = preferred_node(instance, optimal_per_node); if ((current != NULL) && assign_instance_early(collective, instance, current, max_per_node, available)) { assigned++; } } pcmk__rsc_trace(collective, "Assigned %d of %d instance%s to current node", assigned, max_total, pcmk__plural_s(max_total)); for (iter = instances; iter != NULL; iter = iter->next) { instance = (pcmk_resource_t *) iter->data; if (!pcmk_is_set(instance->flags, pcmk__rsc_unassigned)) { continue; // Already assigned } if (instance->priv->active_nodes != NULL) { current = pcmk__current_node(instance); if (pcmk__top_allowed_node(instance, current) == NULL) { const char *unmanaged = ""; if (!pcmk_is_set(instance->flags, pcmk__rsc_managed)) { unmanaged = "Unmanaged resource "; } crm_notice("%s%s is running on %s which is no longer allowed", unmanaged, instance->id, pcmk__node_name(current)); } } if (assigned >= max_total) { pcmk__rsc_debug(collective, "Not assigning %s because maximum %d instances " "already assigned", instance->id, max_total); resource_location(instance, NULL, -PCMK_SCORE_INFINITY, "collective_limit_reached", collective->priv->scheduler); } else if (assign_instance(instance, NULL, max_per_node) != NULL) { assigned++; } } pcmk__rsc_debug(collective, "Assigned %d of %d possible instance%s of %s", assigned, max_total, pcmk__plural_s(max_total), collective->id); } enum instance_state { instance_starting = (1 << 0), instance_stopping = (1 << 1), /* This indicates that some instance is restarting. It's not the same as * instance_starting|instance_stopping, which would indicate that some * instance is starting, and some instance (not necessarily the same one) is * stopping. */ instance_restarting = (1 << 2), instance_active = (1 << 3), instance_all = instance_starting|instance_stopping |instance_restarting|instance_active, }; /*! * \internal * \brief Check whether an instance is active, starting, and/or stopping * * \param[in] instance Clone instance or bundle replica container * \param[in,out] state Whether any instance is starting, stopping, etc. */ static void check_instance_state(const pcmk_resource_t *instance, uint32_t *state) { const GList *iter = NULL; uint32_t instance_state = 0; // State of just this instance // No need to check further if all conditions have already been detected if (pcmk_all_flags_set(*state, instance_all)) { return; } // If instance is a collective (a cloned group), check its children instead if (instance->priv->variant > pcmk__rsc_variant_primitive) { for (iter = instance->priv->children; (iter != NULL) && !pcmk_all_flags_set(*state, instance_all); iter = iter->next) { check_instance_state((const pcmk_resource_t *) iter->data, state); } return; } // If we get here, instance is a primitive if (instance->priv->active_nodes != NULL) { instance_state |= instance_active; } // Check each of the instance's actions for runnable start or stop for (iter = instance->priv->actions; (iter != NULL) && !pcmk_all_flags_set(instance_state, instance_starting |instance_stopping); iter = iter->next) { const pcmk_action_t *action = (const pcmk_action_t *) iter->data; const bool optional = pcmk_is_set(action->flags, pcmk__action_optional); if (pcmk__str_eq(PCMK_ACTION_START, action->task, pcmk__str_none)) { if (!optional && pcmk_is_set(action->flags, pcmk__action_runnable)) { pcmk__rsc_trace(instance, "Instance is starting due to %s", action->uuid); instance_state |= instance_starting; } else { pcmk__rsc_trace(instance, "%s doesn't affect %s state (%s)", action->uuid, instance->id, (optional? "optional" : "unrunnable")); } } else if (pcmk__str_eq(PCMK_ACTION_STOP, action->task, pcmk__str_none)) { /* Only stop actions can be pseudo-actions for primitives. That * indicates that the node they are on is being fenced, so the stop * is implied rather than actually executed. */ if (!optional && pcmk_any_flags_set(action->flags, pcmk__action_pseudo |pcmk__action_runnable)) { pcmk__rsc_trace(instance, "Instance is stopping due to %s", action->uuid); instance_state |= instance_stopping; } else { pcmk__rsc_trace(instance, "%s doesn't affect %s state (%s)", action->uuid, instance->id, (optional? "optional" : "unrunnable")); } } } if (pcmk_all_flags_set(instance_state, instance_starting|instance_stopping)) { instance_state |= instance_restarting; } *state |= instance_state; } /*! * \internal * \brief Create actions for collective resource instances * * \param[in,out] collective Clone or bundle resource to create actions for * \param[in,out] instances List of clone instances or bundle containers */ void pcmk__create_instance_actions(pcmk_resource_t *collective, GList *instances) { uint32_t state = 0; pcmk_action_t *stop = NULL; pcmk_action_t *stopped = NULL; pcmk_action_t *start = NULL; pcmk_action_t *started = NULL; pcmk__rsc_trace(collective, "Creating collective instance actions for %s", collective->id); // Create actions for each instance appropriate to its variant for (GList *iter = instances; iter != NULL; iter = iter->next) { pcmk_resource_t *instance = (pcmk_resource_t *) iter->data; instance->priv->cmds->create_actions(instance); check_instance_state(instance, &state); } // Create pseudo-actions for rsc start and started start = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_START, !pcmk_is_set(state, instance_starting), true); started = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_RUNNING, !pcmk_is_set(state, instance_starting), false); started->priority = PCMK_SCORE_INFINITY; if (pcmk_any_flags_set(state, instance_active|instance_starting)) { pcmk__set_action_flags(started, pcmk__action_runnable); } // Create pseudo-actions for rsc stop and stopped stop = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOP, !pcmk_is_set(state, instance_stopping), true); stopped = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOPPED, !pcmk_is_set(state, instance_stopping), true); stopped->priority = PCMK_SCORE_INFINITY; if (!pcmk_is_set(state, instance_restarting)) { pcmk__set_action_flags(stop, pcmk__action_migratable); } if (pcmk__is_clone(collective)) { pe__create_clone_notif_pseudo_ops(collective, start, started, stop, stopped); } } /*! * \internal * \brief Get a list of clone instances or bundle replica containers * * \param[in] rsc Clone or bundle resource * * \return Clone instances if \p rsc is a clone, or a newly created list of * \p rsc's replica containers if \p rsc is a bundle * \note The caller must call free_instance_list() on the result when the list * is no longer needed. */ static inline GList * get_instance_list(const pcmk_resource_t *rsc) { if (pcmk__is_bundle(rsc)) { return pe__bundle_containers(rsc); } else { return rsc->priv->children; } } /*! * \internal * \brief Free any memory created by get_instance_list() * * \param[in] rsc Clone or bundle resource passed to get_instance_list() * \param[in,out] list Return value of get_instance_list() for \p rsc */ static inline void free_instance_list(const pcmk_resource_t *rsc, GList *list) { if (list != rsc->priv->children) { g_list_free(list); } } /*! * \internal * \brief Check whether an instance is compatible with a role and node * * \param[in] instance Clone instance or bundle replica container * \param[in] node Instance must match this node * \param[in] role If not pcmk_role_unknown, instance must match this role * \param[in] current If true, compare instance's original node and role, * otherwise compare assigned next node and role * * \return true if \p instance is compatible with \p node and \p role, * otherwise false */ bool pcmk__instance_matches(const pcmk_resource_t *instance, const pcmk_node_t *node, enum rsc_role_e role, bool current) { pcmk_node_t *instance_node = NULL; CRM_CHECK((instance != NULL) && (node != NULL), return false); if ((role != pcmk_role_unknown) && (role != instance->priv->fns->state(instance, current))) { pcmk__rsc_trace(instance, "%s is not a compatible instance (role is not %s)", instance->id, pcmk_role_text(role)); return false; } if (!is_set_recursive(instance, pcmk__rsc_blocked, true)) { uint32_t target = pcmk__rsc_node_assigned; if (current) { target = pcmk__rsc_node_current; } // We only want instances that haven't failed instance_node = instance->priv->fns->location(instance, NULL, target); } if (instance_node == NULL) { pcmk__rsc_trace(instance, "%s is not a compatible instance " "(not assigned to a node)", instance->id); return false; } if (!pcmk__same_node(instance_node, node)) { pcmk__rsc_trace(instance, "%s is not a compatible instance " "(assigned to %s not %s)", instance->id, pcmk__node_name(instance_node), pcmk__node_name(node)); return false; } return true; } #define display_role(r) \ (((r) == pcmk_role_unknown)? "matching" : pcmk_role_text(r)) /*! * \internal * \brief Find an instance that matches a given resource by node and role * * \param[in] match_rsc Resource that instance must match (for logging only) * \param[in] rsc Clone or bundle resource to check for matching instance * \param[in] node Instance must match this node * \param[in] role If not pcmk_role_unknown, instance must match this role * \param[in] current If true, compare instance's original node and role, * otherwise compare assigned next node and role * * \return \p rsc instance matching \p node and \p role if any, otherwise NULL */ static pcmk_resource_t * find_compatible_instance_on_node(const pcmk_resource_t *match_rsc, const pcmk_resource_t *rsc, const pcmk_node_t *node, enum rsc_role_e role, bool current) { GList *instances = NULL; instances = get_instance_list(rsc); for (GList *iter = instances; iter != NULL; iter = iter->next) { pcmk_resource_t *instance = (pcmk_resource_t *) iter->data; if (pcmk__instance_matches(instance, node, role, current)) { pcmk__rsc_trace(match_rsc, "Found %s %s instance %s compatible with %s on %s", display_role(role), rsc->id, instance->id, match_rsc->id, pcmk__node_name(node)); free_instance_list(rsc, instances); // Only frees list, not contents return instance; } } free_instance_list(rsc, instances); pcmk__rsc_trace(match_rsc, "No %s %s instance found compatible with %s on %s", display_role(role), rsc->id, match_rsc->id, pcmk__node_name(node)); return NULL; } /*! * \internal * \brief Find a clone instance or bundle container compatible with a resource * * \param[in] match_rsc Resource that instance must match * \param[in] rsc Clone or bundle resource to check for matching instance * \param[in] role If not pcmk_role_unknown, instance must match this role * \param[in] current If true, compare instance's original node and role, * otherwise compare assigned next node and role * * \return Compatible (by \p role and \p match_rsc location) instance of \p rsc * if any, otherwise NULL */ pcmk_resource_t * pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc, const pcmk_resource_t *rsc, enum rsc_role_e role, bool current) { pcmk_resource_t *instance = NULL; GList *nodes = NULL; const pcmk_node_t *node = NULL; GHashTable *allowed_nodes = match_rsc->priv->allowed_nodes; uint32_t target = pcmk__rsc_node_assigned; if (current) { target = pcmk__rsc_node_current; } // If match_rsc has a node, check only that node node = match_rsc->priv->fns->location(match_rsc, NULL, target); if (node != NULL) { return find_compatible_instance_on_node(match_rsc, rsc, node, role, current); } // Otherwise check for an instance matching any of match_rsc's allowed nodes nodes = pcmk__sort_nodes(g_hash_table_get_values(allowed_nodes), NULL); for (GList *iter = nodes; (iter != NULL) && (instance == NULL); iter = iter->next) { instance = find_compatible_instance_on_node(match_rsc, rsc, (pcmk_node_t *) iter->data, role, current); } if (instance == NULL) { pcmk__rsc_debug(rsc, "No %s instance found compatible with %s", rsc->id, match_rsc->id); } g_list_free(nodes); return instance; } /*! * \internal * \brief Unassign an instance if mandatory ordering has no interleave match * * \param[in] first 'First' action in an ordering * \param[in] then 'Then' action in an ordering * \param[in,out] then_instance 'Then' instance that has no interleave match * \param[in] type Group of enum pcmk__action_relation_flags * \param[in] current If true, "then" action is stopped or demoted * * \return true if \p then_instance was unassigned, otherwise false */ static bool unassign_if_mandatory(const pcmk_action_t *first, const pcmk_action_t *then, pcmk_resource_t *then_instance, uint32_t type, bool current) { // Allow "then" instance to go down even without an interleave match if (current) { pcmk__rsc_trace(then->rsc, "%s has no instance to order before stopping " "or demoting %s", first->rsc->id, then_instance->id); /* If the "first" action must be runnable, but there is no "first" * instance, the "then" instance must not be allowed to come up. */ } else if (pcmk_any_flags_set(type, pcmk__ar_unrunnable_first_blocks |pcmk__ar_first_implies_then)) { pcmk__rsc_info(then->rsc, "Inhibiting %s from being active " "because there is no %s instance to interleave", then_instance->id, first->rsc->id); return pcmk__assign_resource(then_instance, NULL, true, true); } return false; } /*! * \internal * \brief Find first matching action for a clone instance or bundle container * * \param[in] action Action in an interleaved ordering * \param[in] instance Clone instance or bundle container being interleaved * \param[in] action_name Action to look for * \param[in] node If not NULL, require action to be on this node * \param[in] for_first If true, \p instance is the 'first' resource in the * ordering, otherwise it is the 'then' resource * * \return First action for \p instance (or in some cases if \p instance is a * bundle container, its containerized resource) that matches * \p action_name and \p node if any, otherwise NULL */ static pcmk_action_t * find_instance_action(const pcmk_action_t *action, const pcmk_resource_t *instance, const char *action_name, const pcmk_node_t *node, bool for_first) { const pcmk_resource_t *rsc = NULL; pcmk_action_t *matching_action = NULL; /* If instance is a bundle container, sometimes we should interleave the * action for the container itself, and sometimes for the containerized * resource. * * For example, given "start bundle A then bundle B", B likely requires the * service inside A's container to be active, rather than just the * container, so we should interleave the action for A's containerized * resource. On the other hand, it's possible B's container itself requires * something from A, so we should interleave the action for B's container. * * Essentially, for 'first', we should use the containerized resource for * everything except stop, and for 'then', we should use the container for * everything except promote and demote (which can only be performed on the * containerized resource). */ if ((for_first && !pcmk__str_any_of(action->task, PCMK_ACTION_STOP, PCMK_ACTION_STOPPED, NULL)) || (!for_first && pcmk__str_any_of(action->task, PCMK_ACTION_PROMOTE, PCMK_ACTION_PROMOTED, PCMK_ACTION_DEMOTE, PCMK_ACTION_DEMOTED, NULL))) { rsc = pe__get_rsc_in_container(instance); } if (rsc == NULL) { rsc = instance; // No containerized resource, use instance itself } else { node = NULL; // Containerized actions are on bundle-created guest } matching_action = find_first_action(rsc->priv->actions, NULL, action_name, node); if (matching_action != NULL) { return matching_action; } if (pcmk_is_set(instance->flags, pcmk__rsc_removed) || pcmk__is_down_action(action_name)) { crm_trace("No %s action found for %s%s", action_name, - pcmk_is_set(instance->flags, pcmk__rsc_removed)? "orphan " : "", + pcmk_is_set(instance->flags, pcmk__rsc_removed)? "removed " : "", instance->id); } else { crm_err("No %s action found for %s to interleave (bug?)", action_name, instance->id); } return NULL; } /*! * \internal * \brief Get the original action name of a bundle or clone action * * Given an action for a bundle or clone, get the original action name, * mapping notify to the action being notified, and if the instances are * primitives, mapping completion actions to the action that was completed * (for example, stopped to stop). * * \param[in] action Clone or bundle action to check * * \return Original action name for \p action */ static const char * orig_action_name(const pcmk_action_t *action) { // Any instance will do const pcmk_resource_t *instance = action->rsc->priv->children->data; char *action_type = NULL; const char *action_name = action->task; enum pcmk__action_type orig_task = pcmk__action_unspecified; if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_NOTIFIED, NULL)) { // action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL), return pcmk__action_text(pcmk__action_unspecified)); action_name = strstr(action_type, "_notify_"); CRM_CHECK(action_name != NULL, return pcmk__action_text(pcmk__action_unspecified)); action_name += strlen("_notify_"); } orig_task = get_complex_task(instance, action_name); free(action_type); return pcmk__action_text(orig_task); } /*! * \internal * \brief Update two interleaved actions according to an ordering between them * * Given information about an ordering of two interleaved actions, update the * actions' flags (and runnable_before members if appropriate) as appropriate * for the ordering. Effects may cascade to other orderings involving the * actions as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * \param[in] filter Action flags to limit scope of certain updates (may * include pcmk__action_optional to affect only * mandatory actions, and pcmk__action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * * \return Group of enum pcmk__updated flags indicating what was updated */ static uint32_t update_interleaved_actions(pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t filter, uint32_t type) { GList *instances = NULL; uint32_t changed = pcmk__updated_none; const char *orig_first_task = orig_action_name(first); // Stops and demotes must be interleaved with instance on current node bool current = (first->uuid != NULL) && (g_str_has_suffix(first->uuid, "_" PCMK_ACTION_STOPPED "_0") || g_str_has_suffix(first->uuid, "_" PCMK_ACTION_DEMOTED "_0")); // Update the specified actions for each "then" instance individually instances = get_instance_list(then->rsc); for (GList *iter = instances; iter != NULL; iter = iter->next) { pcmk_resource_t *first_instance = NULL; pcmk_resource_t *then_instance = iter->data; pcmk_action_t *first_action = NULL; pcmk_action_t *then_action = NULL; // Find a "first" instance to interleave with this "then" instance first_instance = pcmk__find_compatible_instance(then_instance, first->rsc, pcmk_role_unknown, current); if (first_instance == NULL) { // No instance can be interleaved if (unassign_if_mandatory(first, then, then_instance, type, current)) { pcmk__set_updated_flags(changed, first, pcmk__updated_then); } continue; } first_action = find_instance_action(first, first_instance, orig_first_task, node, true); if (first_action == NULL) { continue; } then_action = find_instance_action(then, then_instance, then->task, node, false); if (then_action == NULL) { continue; } if (order_actions(first_action, then_action, type)) { pcmk__set_updated_flags(changed, first, pcmk__updated_first|pcmk__updated_then); } changed |= then_instance->priv->cmds->update_ordered_actions( first_action, then_action, node, first_instance->priv->cmds->action_flags(first_action, node), filter, type, then->rsc->priv->scheduler); } free_instance_list(then->rsc, instances); return changed; } /*! * \internal * \brief Check whether two actions in an ordering can be interleaved * * \param[in] first 'First' action in the ordering * \param[in] then 'Then' action in the ordering * * \return true if \p first and \p then can be interleaved, otherwise false */ static bool can_interleave_actions(const pcmk_action_t *first, const pcmk_action_t *then) { bool interleave = false; pcmk_resource_t *rsc = NULL; if ((first->rsc == NULL) || (then->rsc == NULL)) { crm_trace("Not interleaving %s with %s: not resource actions", first->uuid, then->uuid); return false; } if (first->rsc == then->rsc) { crm_trace("Not interleaving %s with %s: same resource", first->uuid, then->uuid); return false; } if ((first->rsc->priv->variant < pcmk__rsc_variant_clone) || (then->rsc->priv->variant < pcmk__rsc_variant_clone)) { crm_trace("Not interleaving %s with %s: not clones or bundles", first->uuid, then->uuid); return false; } if (g_str_has_suffix(then->uuid, "_stop_0") || g_str_has_suffix(then->uuid, "_demote_0")) { rsc = first->rsc; } else { rsc = then->rsc; } interleave = crm_is_true(g_hash_table_lookup(rsc->priv->meta, PCMK_META_INTERLEAVE)); pcmk__rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)", first->uuid, then->uuid, (interleave? "" : "not "), rsc->id); return interleave; } /*! * \internal * \brief Update non-interleaved instance actions according to an ordering * * Given information about an ordering of two non-interleaved actions, update * the actions' flags (and runnable_before members if appropriate) as * appropriate for the ordering. Effects may cascade to other orderings * involving the actions as well. * * \param[in,out] instance Clone instance or bundle container * \param[in,out] first "First" action in ordering * \param[in] then "Then" action in ordering (for \p instance's parent) * \param[in] node If not NULL, limit scope of ordering to this node * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pcmk__action_optional to affect only * mandatory actions, and pcmk__action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * * \return Group of enum pcmk__updated flags indicating what was updated */ static uint32_t update_noninterleaved_actions(pcmk_resource_t *instance, pcmk_action_t *first, const pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type) { pcmk_action_t *instance_action = NULL; pcmk_scheduler_t *scheduler = instance->priv->scheduler; uint32_t instance_flags = 0; uint32_t changed = pcmk__updated_none; // Check whether instance has an equivalent of "then" action instance_action = find_first_action(instance->priv->actions, NULL, then->task, node); if (instance_action == NULL) { return changed; } // Check whether action is runnable instance_flags = instance->priv->cmds->action_flags(instance_action, node); if (!pcmk_is_set(instance_flags, pcmk__action_runnable)) { return changed; } // If so, update actions for the instance changed = instance->priv->cmds->update_ordered_actions(first, instance_action, node, flags, filter, type, scheduler); // Propagate any changes to later actions if (pcmk_is_set(changed, pcmk__updated_then)) { for (GList *after_iter = instance_action->actions_after; after_iter != NULL; after_iter = after_iter->next) { pcmk__related_action_t *after = after_iter->data; pcmk__update_action_for_orderings(after->action, scheduler); } } return changed; } /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two clone or bundle actions, update * the actions' flags (and runnable_before members if appropriate) as * appropriate for the ordering. Effects may cascade to other orderings * involving the actions as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pcmk__action_optional to affect only * mandatory actions, and pcmk__action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t pcmk__instance_update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { pcmk__assert((first != NULL) && (then != NULL) && (scheduler != NULL)); if (then->rsc == NULL) { return pcmk__updated_none; } else if (can_interleave_actions(first, then)) { return update_interleaved_actions(first, then, node, filter, type); } else { uint32_t changed = pcmk__updated_none; GList *instances = get_instance_list(then->rsc); // Update actions for the clone or bundle resource itself changed |= pcmk__update_ordered_actions(first, then, node, flags, filter, type, scheduler); // Update the 'then' clone instances or bundle containers individually for (GList *iter = instances; iter != NULL; iter = iter->next) { pcmk_resource_t *instance = iter->data; changed |= update_noninterleaved_actions(instance, first, then, node, flags, filter, type); } free_instance_list(then->rsc, instances); return changed; } } #define pe__clear_action_summary_flags(flags, action, flag) do { \ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Action summary", action->rsc->id, \ flags, flag, #flag); \ } while (0) /*! * \internal * \brief Return action flags for a given clone or bundle action * * \param[in,out] action Action for a clone or bundle * \param[in] instances Clone instances or bundle containers * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__collective_action_flags(pcmk_action_t *action, const GList *instances, const pcmk_node_t *node) { bool any_runnable = false; const char *action_name = orig_action_name(action); // Set original assumptions (optional and runnable may be cleared below) uint32_t flags = pcmk__action_optional |pcmk__action_runnable |pcmk__action_pseudo; for (const GList *iter = instances; iter != NULL; iter = iter->next) { const pcmk_resource_t *instance = iter->data; const pcmk_node_t *instance_node = NULL; pcmk_action_t *instance_action = NULL; uint32_t instance_flags; // Node is relevant only to primitive instances if (pcmk__is_primitive(instance)) { instance_node = node; } instance_action = find_first_action(instance->priv->actions, NULL, action_name, instance_node); if (instance_action == NULL) { pcmk__rsc_trace(action->rsc, "%s has no %s action on %s", instance->id, action_name, pcmk__node_name(node)); continue; } pcmk__rsc_trace(action->rsc, "%s has %s for %s on %s", instance->id, instance_action->uuid, action_name, pcmk__node_name(node)); instance_flags = instance->priv->cmds->action_flags(instance_action, node); // If any instance action is mandatory, so is the collective action if (pcmk_is_set(flags, pcmk__action_optional) && !pcmk_is_set(instance_flags, pcmk__action_optional)) { pcmk__rsc_trace(instance, "%s is mandatory because %s is", action->uuid, instance_action->uuid); pe__clear_action_summary_flags(flags, action, pcmk__action_optional); pcmk__clear_action_flags(action, pcmk__action_optional); } // If any instance action is runnable, so is the collective action if (pcmk_is_set(instance_flags, pcmk__action_runnable)) { any_runnable = true; } } if (!any_runnable) { pcmk__rsc_trace(action->rsc, "%s is not runnable because no instance can run %s", action->uuid, action_name); pe__clear_action_summary_flags(flags, action, pcmk__action_runnable); if (node == NULL) { pcmk__clear_action_flags(action, pcmk__action_runnable); } } return flags; } diff --git a/lib/pacemaker/pcmk_sched_probes.c b/lib/pacemaker/pcmk_sched_probes.c index a84e382572..a3dd8c0a4f 100644 --- a/lib/pacemaker/pcmk_sched_probes.c +++ b/lib/pacemaker/pcmk_sched_probes.c @@ -1,905 +1,905 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Add the expected result to a newly created probe * * \param[in,out] probe Probe action to add expected result to * \param[in] rsc Resource that probe is for * \param[in] node Node that probe will run on */ static void add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc, const pcmk_node_t *node) { // Check whether resource is currently active on node pcmk_node_t *running = pe_find_node_id(rsc->priv->active_nodes, node->priv->id); // The expected result is what we think the resource's current state is if (running == NULL) { pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING); } else if (rsc->priv->orig_role == pcmk_role_promoted) { pe__add_action_expected_result(probe, CRM_EX_PROMOTED); } } /*! * \internal * \brief Create any needed robes on a node for a list of resources * * \param[in,out] rscs List of resources to create probes for * \param[in,out] node Node to create probes on * * \return true if any probe was created, otherwise false */ bool pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node) { bool any_created = false; for (GList *iter = rscs; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (rsc->priv->cmds->create_probe(rsc, node)) { any_created = true; } } return any_created; } /*! * \internal * \brief Order one resource's start after another's start-up probe * * \param[in,out] rsc1 Resource that might get start-up probe * \param[in] rsc2 Resource that might be started */ static void probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2) { const pcmk_node_t *rsc1_node = rsc1->priv->assigned_node; if ((rsc1_node != NULL) && (g_hash_table_lookup(rsc1->priv->probed_nodes, rsc1_node->priv->id) == NULL)) { pcmk__new_ordering(rsc1, pcmk__op_key(rsc1->id, PCMK_ACTION_MONITOR, 0), NULL, rsc2, pcmk__op_key(rsc2->id, PCMK_ACTION_START, 0), NULL, pcmk__ar_ordered, rsc1->priv->scheduler); } } /*! * \internal * \brief Check whether a guest resource will stop * * \param[in] node Guest node to check * * \return true if guest resource will likely stop, otherwise false */ static bool guest_resource_will_stop(const pcmk_node_t *node) { const pcmk_resource_t *guest_rsc = NULL; const pcmk_node_t *guest_node = NULL; guest_rsc = node->priv->remote->priv->launcher; guest_node = guest_rsc->priv->assigned_node; /* Ideally, we'd check whether the guest has a required stop, but that * information doesn't exist yet, so approximate it ... */ return pcmk_is_set(node->priv->flags, pcmk__node_remote_reset) || node->details->unclean || pcmk_is_set(guest_rsc->flags, pcmk__rsc_failed) || (guest_rsc->priv->next_role == pcmk_role_stopped) // Guest is moving || ((guest_rsc->priv->orig_role > pcmk_role_stopped) && (guest_node != NULL) && pcmk__find_node_in_list(guest_rsc->priv->active_nodes, guest_node->priv->name) == NULL); } /*! * \internal * \brief Create a probe action for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return Newly created probe action */ static pcmk_action_t * probe_action(pcmk_resource_t *rsc, pcmk_node_t *node) { pcmk_action_t *probe = NULL; char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0); crm_debug("Scheduling probe of %s %s on %s", pcmk_role_text(rsc->priv->orig_role), rsc->id, pcmk__node_name(node)); probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE, rsc->priv->scheduler); pcmk__clear_action_flags(probe, pcmk__action_optional); pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered); add_expected_result(probe, rsc, node); return probe; } /*! * \internal * \brief Create probes for a resource on a node, if needed * * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node) { uint32_t flags = pcmk__ar_ordered; pcmk_action_t *probe = NULL; pcmk_node_t *allowed = NULL; pcmk_resource_t *top = uber_parent(rsc); const char *reason = NULL; pcmk__assert((rsc != NULL) && (node != NULL)); if (!pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_probe_resources)) { reason = "start-up probes are disabled"; goto no_probe; } if (pcmk__is_pacemaker_remote_node(node)) { if (pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) { reason = "Pacemaker Remote nodes cannot run stonith agents"; goto no_probe; } else if (pcmk__is_guest_or_bundle_node(node) && pe__resource_contains_guest_node(rsc->priv->scheduler, rsc)) { reason = "guest nodes cannot run resources containing guest nodes"; goto no_probe; } else if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { reason = "Pacemaker Remote nodes cannot host remote connections"; goto no_probe; } } // If this is a collective resource, probes are created for its children if (rsc->priv->children != NULL) { return pcmk__probe_resource_list(rsc->priv->children, node); } if ((rsc->priv->launcher != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { reason = "resource is inside a container"; goto no_probe; } else if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { - reason = "resource is orphaned"; + reason = "resource is removed"; goto no_probe; } else if (g_hash_table_lookup(rsc->priv->probed_nodes, node->priv->id) != NULL) { reason = "resource state is already known"; goto no_probe; } allowed = g_hash_table_lookup(rsc->priv->allowed_nodes, node->priv->id); if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes) || pcmk_is_set(top->flags, pcmk__rsc_exclusive_probes)) { // Exclusive discovery is enabled ... if (allowed == NULL) { // ... but this node is not allowed to run the resource reason = "resource has exclusive discovery but is not allowed " "on node"; goto no_probe; } else if (allowed->assign->probe_mode != pcmk__probe_exclusive) { // ... but no constraint marks this node for discovery of resource reason = "resource has exclusive discovery but is not enabled " "on node"; goto no_probe; } } if (allowed == NULL) { allowed = node; } if (allowed->assign->probe_mode == pcmk__probe_never) { reason = "node has discovery disabled"; goto no_probe; } if (pcmk__is_guest_or_bundle_node(node)) { pcmk_resource_t *guest = node->priv->remote->priv->launcher; if (guest->priv->orig_role == pcmk_role_stopped) { // The guest is stopped, so we know no resource is active there reason = "node's guest is stopped"; probe_then_start(guest, top); goto no_probe; } else if (guest_resource_will_stop(node)) { reason = "node's guest will stop"; // Order resource start after guest stop (in case it's restarting) pcmk__new_ordering(guest, pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0), NULL, top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL, pcmk__ar_ordered, rsc->priv->scheduler); goto no_probe; } } // We've eliminated all cases where a probe is not needed, so now it is probe = probe_action(rsc, node); /* Below, we will order the probe relative to start or reload. If this is a * clone instance, the start or reload is for the entire clone rather than * just the instance. Otherwise, the start or reload is for the resource * itself. */ if (!pcmk__is_clone(top)) { top = rsc; } /* Prevent a start if the resource can't be probed, but don't cause the * resource or entire clone to stop if already active. */ if (!pcmk_is_set(probe->flags, pcmk__action_runnable) && (top->priv->active_nodes == NULL)) { pcmk__set_relation_flags(flags, pcmk__ar_unrunnable_first_blocks); } // Start or reload after probing the resource pcmk__new_ordering(rsc, NULL, probe, top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL, flags, rsc->priv->scheduler); pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL, pcmk__ar_ordered, rsc->priv->scheduler); return true; no_probe: pcmk__rsc_trace(rsc, "Skipping probe for %s on %s because %s", rsc->id, node->priv->id, reason); return false; } /*! * \internal * \brief Check whether a probe should be ordered before another action * * \param[in] probe Probe action to check * \param[in] then Other action to check * * \return true if \p probe should be ordered before \p then, otherwise false */ static bool probe_needed_before_action(const pcmk_action_t *probe, const pcmk_action_t *then) { // Probes on a node are performed after unfencing it, not before if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none) && pcmk__same_node(probe->node, then->node)) { const char *op = g_hash_table_lookup(then->meta, PCMK__META_STONITH_ACTION); if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) { return false; } } // Probes should be done on a node before shutting it down if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none) && (probe->node != NULL) && (then->node != NULL) && !pcmk__same_node(probe->node, then->node)) { return false; } // Otherwise probes should always be done before any other action return true; } /*! * \internal * \brief Add implicit "probe then X" orderings for "stop then X" orderings * * If the state of a resource is not known yet, a probe will be scheduled, * expecting a "not running" result. If the probe fails, a stop will not be * scheduled until the next transition. Thus, if there are ordering constraints * like "stop this resource then do something else that's not for the same * resource", add implicit "probe this resource then do something" equivalents * so the relation is upheld until we know whether a stop is needed. * * \param[in,out] scheduler Scheduler data */ static void add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler) { for (GList *iter = scheduler->priv->ordering_constraints; iter != NULL; iter = iter->next) { pcmk__action_relation_t *order = iter->data; uint32_t order_flags = pcmk__ar_ordered; GList *probes = NULL; GList *then_actions = NULL; pcmk_action_t *first = NULL; pcmk_action_t *then = NULL; // Skip disabled orderings if (order->flags == pcmk__ar_none) { continue; } // Skip non-resource orderings, and orderings for the same resource if ((order->rsc1 == NULL) || (order->rsc1 == order->rsc2)) { continue; } // Skip invalid orderings (shouldn't be possible) first = order->action1; then = order->action2; if (((first == NULL) && (order->task1 == NULL)) || ((then == NULL) && (order->task2 == NULL))) { continue; } // Skip orderings for first actions other than stop if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP, pcmk__str_none)) { continue; } else if ((first == NULL) && !g_str_has_suffix(order->task1, "_" PCMK_ACTION_STOP "_0")) { continue; } /* Do not imply a probe ordering for a resource inside of a stopping * launcher. Otherwise, it might introduce a transition loop, since a * probe could be scheduled after the launcher starts again. */ if ((order->rsc2 != NULL) && (order->rsc1->priv->launcher == order->rsc2)) { if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP, pcmk__str_none)) { continue; } else if ((then == NULL) && g_str_has_suffix(order->task2, "_" PCMK_ACTION_STOP "_0")) { continue; } } // Preserve certain order options for future filtering if (pcmk_is_set(order->flags, pcmk__ar_if_first_unmigratable)) { pcmk__set_relation_flags(order_flags, pcmk__ar_if_first_unmigratable); } if (pcmk_is_set(order->flags, pcmk__ar_if_on_same_node)) { pcmk__set_relation_flags(order_flags, pcmk__ar_if_on_same_node); } // Preserve certain order types for future filtering if ((order->flags == pcmk__ar_if_required_on_same_node) || (order->flags == pcmk__ar_if_on_same_node_or_target)) { order_flags = order->flags; } // List all scheduled probes for the first resource probes = pe__resource_actions(order->rsc1, NULL, PCMK_ACTION_MONITOR, FALSE); if (probes == NULL) { // There aren't any continue; } // List all relevant "then" actions if (then != NULL) { then_actions = g_list_prepend(NULL, then); } else if (order->rsc2 != NULL) { then_actions = find_actions(order->rsc2->priv->actions, order->task2, NULL); if (then_actions == NULL) { // There aren't any g_list_free(probes); continue; } } crm_trace("Implying 'probe then' orderings for '%s then %s' " "(id=%d, type=%.6x)", ((first == NULL)? order->task1 : first->uuid), ((then == NULL)? order->task2 : then->uuid), order->id, order->flags); for (GList *probe_iter = probes; probe_iter != NULL; probe_iter = probe_iter->next) { pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data; for (GList *then_iter = then_actions; then_iter != NULL; then_iter = then_iter->next) { pcmk_action_t *then = (pcmk_action_t *) then_iter->data; if (probe_needed_before_action(probe, then)) { order_actions(probe, then, order_flags); } } } g_list_free(then_actions); g_list_free(probes); } } /*! * \internal * \brief Add necessary orderings between probe and starts of clone instances * * , in additon to the ordering with the parent resource added upon creating * the probe. * * \param[in,out] probe Probe as 'first' action in an ordering * \param[in,out] after 'then' action wrapper in the ordering */ static void add_start_orderings_for_probe(pcmk_action_t *probe, pcmk__related_action_t *after) { uint32_t flags = pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks; /* Although the ordering between the probe of the clone instance and the * start of its parent has been added in pcmk__probe_rsc_on_node(), we * avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that * as long as any of the clone instances are running to prevent them from * being unexpectedly stopped. * * On the other hand, we still need to prevent any inactive instances from * starting unless the probe is runnable so that we don't risk starting too * many instances before we know the state on all nodes. */ if ((after->action->rsc->priv->variant <= pcmk__rsc_variant_group) || pcmk_is_set(probe->flags, pcmk__action_runnable) // The order type is already enforced for its parent. || pcmk_is_set(after->flags, pcmk__ar_unrunnable_first_blocks) || (pe__const_top_resource(probe->rsc, false) != after->action->rsc) || !pcmk__str_eq(after->action->task, PCMK_ACTION_START, pcmk__str_none)) { return; } crm_trace("Adding probe start orderings for 'unrunnable %s@%s " "then instances of %s@%s'", probe->uuid, pcmk__node_name(probe->node), after->action->uuid, pcmk__node_name(after->action->node)); for (GList *then_iter = after->action->actions_after; then_iter != NULL; then_iter = then_iter->next) { pcmk__related_action_t *then = then_iter->data; if ((then->action->rsc->priv->active_nodes != NULL) || (pe__const_top_resource(then->action->rsc, false) != after->action->rsc) || !pcmk__str_eq(then->action->task, PCMK_ACTION_START, pcmk__str_none)) { continue; } crm_trace("Adding probe start ordering for 'unrunnable %s@%s " "then %s@%s' (type=%#.6x)", probe->uuid, pcmk__node_name(probe->node), then->action->uuid, pcmk__node_name(then->action->node), flags); /* Prevent the instance from starting if the instance can't, but don't * cause any other intances to stop if already active. */ order_actions(probe, then->action, flags); } return; } /*! * \internal * \brief Order probes before restarts and re-promotes * * If a given ordering is a "probe then start" or "probe then promote" ordering, * add an implicit "probe then stop/demote" ordering in case the action is part * of a restart/re-promote, and do the same recursively for all actions ordered * after the "then" action. * * \param[in,out] probe Probe as 'first' action in an ordering * \param[in,out] after 'then' action in the ordering */ static void add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after) { GList *iter = NULL; bool interleave = false; pcmk_resource_t *compatible_rsc = NULL; // Validate that this is a resource probe followed by some action if ((after == NULL) || (probe == NULL) || !pcmk__is_primitive(probe->rsc) || !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { return; } // Avoid running into any possible loop if (pcmk_is_set(after->flags, pcmk__action_detect_loop)) { return; } pcmk__set_action_flags(after, pcmk__action_detect_loop); crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'", probe->uuid, pcmk__node_name(probe->node), after->uuid, pcmk__node_name(after->node)); /* Add restart orderings if "then" is for a different primitive. * Orderings for collective resources will be added later. */ if (pcmk__is_primitive(after->rsc) && (probe->rsc != after->rsc)) { GList *then_actions = NULL; if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) { then_actions = pe__resource_actions(after->rsc, NULL, PCMK_ACTION_STOP, FALSE); } else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE, pcmk__str_none)) { then_actions = pe__resource_actions(after->rsc, NULL, PCMK_ACTION_DEMOTE, FALSE); } for (iter = then_actions; iter != NULL; iter = iter->next) { pcmk_action_t *then = (pcmk_action_t *) iter->data; // Skip pseudo-actions (for example, those implied by fencing) if (!pcmk_is_set(then->flags, pcmk__action_pseudo)) { order_actions(probe, then, pcmk__ar_ordered); } } g_list_free(then_actions); } /* Detect whether "then" is an interleaved clone action. For these, we want * to add orderings only for the relevant instance. */ if ((after->rsc != NULL) && (after->rsc->priv->variant > pcmk__rsc_variant_group)) { interleave = crm_is_true(g_hash_table_lookup(after->rsc->priv->meta, PCMK_META_INTERLEAVE)); if (interleave) { compatible_rsc = pcmk__find_compatible_instance(probe->rsc, after->rsc, pcmk_role_unknown, false); } } /* Now recursively do the same for all actions ordered after "then". This * also handles collective resources since the collective action will be * ordered before its individual instances' actions. */ for (iter = after->actions_after; iter != NULL; iter = iter->next) { pcmk__related_action_t *after_wrapper = iter->data; const pcmk_resource_t *chained_rsc = NULL; /* pcmk__ar_first_implies_then is the reason why a required A.start * implies/enforces B.start to be required too, which is the cause of * B.restart/re-promote. * * Not sure about pcmk__ar_first_implies_same_node_then though. It's now * only used for unfencing case, which tends to introduce transition * loops... */ if (!pcmk_is_set(after_wrapper->flags, pcmk__ar_first_implies_then)) { /* The order type between a group/clone and its child such as * B.start-> B_child.start is: * pcmk__ar_then_implies_first_graphed * |pcmk__ar_unrunnable_first_blocks * * Proceed through the ordering chain and build dependencies with * its children. */ if ((after->rsc == NULL) || (after->rsc->priv->variant < pcmk__rsc_variant_group) || (probe->rsc->priv->parent == after->rsc) || (after_wrapper->action->rsc == NULL)) { continue; } chained_rsc = after_wrapper->action->rsc; if ((chained_rsc->priv->variant > pcmk__rsc_variant_group) || (after->rsc != chained_rsc->priv->parent)) { continue; } /* Proceed to the children of a group or a non-interleaved clone. * For an interleaved clone, proceed only to the relevant child. */ if ((after->rsc->priv->variant > pcmk__rsc_variant_group) && interleave && ((compatible_rsc == NULL) || (compatible_rsc != chained_rsc))) { continue; } } crm_trace("Recursively adding probe restart orderings for " "'%s@%s then %s@%s' (type=%#.6x)", after->uuid, pcmk__node_name(after->node), after_wrapper->action->uuid, pcmk__node_name(after_wrapper->action->node), after_wrapper->flags); add_restart_orderings_for_probe(probe, after_wrapper->action); } } /*! * \internal * \brief Clear the tracking flag on all scheduled actions * * \param[in,out] scheduler Scheduler data */ static void clear_actions_tracking_flag(pcmk_scheduler_t *scheduler) { for (GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = iter->data; pcmk__clear_action_flags(action, pcmk__action_detect_loop); } } /*! * \internal * \brief Add start and restart orderings for probes scheduled for a resource * * \param[in,out] data Resource whose probes should be ordered * \param[in] user_data Unused */ static void add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; GList *probes = NULL; // For collective resources, order each instance recursively if (!pcmk__is_primitive(rsc)) { g_list_foreach(rsc->priv->children, add_start_restart_orderings_for_rsc, NULL); return; } // Find all probes for given resource probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE); // Add probe restart orderings for each probe found for (GList *iter = probes; iter != NULL; iter = iter->next) { pcmk_action_t *probe = (pcmk_action_t *) iter->data; for (GList *then_iter = probe->actions_after; then_iter != NULL; then_iter = then_iter->next) { pcmk__related_action_t *then = then_iter->data; add_start_orderings_for_probe(probe, then); add_restart_orderings_for_probe(probe, then->action); clear_actions_tracking_flag(rsc->priv->scheduler); } } g_list_free(probes); } /*! * \internal * \brief Add "A then probe B" orderings for "A then B" orderings * * \param[in,out] scheduler Scheduler data * * \note This function is currently disabled (see next comment). */ static void order_then_probes(pcmk_scheduler_t *scheduler) { #if 0 /* Given an ordering "A then B", we would prefer to wait for A to be started * before probing B. * * For example, if A is a filesystem which B can't even run without, it * would be helpful if the author of B's agent could assume that A is * running before B.monitor will be called. * * However, we can't _only_ probe after A is running, otherwise we wouldn't * detect the state of B if A could not be started. We can't even do an * opportunistic version of this, because B may be moving: * * A.stop -> A.start -> B.probe -> B.stop -> B.start * * and if we add B.stop -> A.stop here, we get a loop: * * A.stop -> A.start -> B.probe -> B.stop -> A.stop * * We could kill the "B.probe -> B.stop" dependency, but that could mean * stopping B "too" soon, because B.start must wait for the probe, and * we don't want to stop B if we can't start it. * * We could add the ordering only if A is an anonymous clone with * clone-max == node-max (since we'll never be moving it). However, we could * still be stopping one instance at the same time as starting another. * * The complexity of checking for allowed conditions combined with the ever * narrowing use case suggests that this code should remain disabled until * someone gets smarter. */ for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; pcmk_action_t *start = NULL; GList *actions = NULL; GList *probes = NULL; actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE); if (actions) { start = actions->data; g_list_free(actions); } if (start == NULL) { crm_debug("No start action for %s", rsc->id); continue; } probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE); for (actions = start->actions_before; actions != NULL; actions = actions->next) { pcmk__related_action_t *before = actions->data; pcmk_action_t *first = before->action; pcmk_resource_t *first_rsc = first->rsc; if (first->required_runnable_before) { for (GList *clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) { before = clone_actions->data; crm_trace("Testing '%s then %s' for %s", first->uuid, before->action->uuid, start->uuid); pcmk__assert(before->action->rsc != NULL); first_rsc = before->action->rsc; break; } } else if (!pcmk__str_eq(first->task, PCMK_ACTION_START, pcmk__str_none)) { crm_trace("Not a start op %s for %s", first->uuid, start->uuid); } if (first_rsc == NULL) { continue; } else if (pe__const_top_resource(first_rsc, false) == pe__const_top_resource(start->rsc, false)) { crm_trace("Same parent %s for %s", first_rsc->id, start->uuid); continue; } else if (!pcmk__is_clone(pe__const_top_resource(first_rsc, false))) { crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid); continue; } crm_debug("Applying %s before %s", first->uuid, start->uuid); for (GList *probe_iter = probes; probe_iter != NULL; probe_iter = probe_iter->next) { pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data; crm_debug("Ordering %s before %s", first->uuid, probe->uuid); order_actions(first, probe, pcmk__ar_ordered); } } } #endif } void pcmk__order_probes(pcmk_scheduler_t *scheduler) { // Add orderings for "probe then X" g_list_foreach(scheduler->priv->resources, add_start_restart_orderings_for_rsc, NULL); add_probe_orderings_for_stops(scheduler); order_then_probes(scheduler); } /*! * \internal * \brief Schedule any probes needed * * \param[in,out] scheduler Scheduler data * * \note This may also schedule fencing of failed remote nodes. */ void pcmk__schedule_probes(pcmk_scheduler_t *scheduler) { // Schedule probes on each node in the cluster as needed for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; if (!node->details->online) { // Don't probe offline nodes if (pcmk__is_failed_remote_node(node)) { pe_fence_node(scheduler, node, "the connection is unrecoverable", FALSE); } continue; } if (node->details->unclean) { // Don't probe nodes that need fencing continue; } if (!pcmk_is_set(node->priv->flags, pcmk__node_probes_allowed)) { // The user requested that probes not be done on this node continue; } // Probe each resource in the cluster on this node, as needed pcmk__probe_resource_list(scheduler->priv->resources, node); } } diff --git a/lib/pacemaker/pcmk_sched_remote.c b/lib/pacemaker/pcmk_sched_remote.c index 90b43a3995..f5e6182bf0 100644 --- a/lib/pacemaker/pcmk_sched_remote.c +++ b/lib/pacemaker/pcmk_sched_remote.c @@ -1,734 +1,734 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" enum remote_connection_state { remote_state_unknown = 0, remote_state_alive = 1, remote_state_resting = 2, remote_state_failed = 3, remote_state_stopped = 4 }; static const char * state2text(enum remote_connection_state state) { switch (state) { case remote_state_unknown: return "unknown"; case remote_state_alive: return "alive"; case remote_state_resting: return "resting"; case remote_state_failed: return "failed"; case remote_state_stopped: return "stopped"; } return "impossible"; } /* We always use pcmk__ar_guest_allowed with these convenience functions to * exempt internally generated constraints from the prohibition of user * constraints involving remote connection resources. * * The start ordering additionally uses pcmk__ar_unrunnable_first_blocks so that * the specified action is not runnable if the start is not runnable. */ static inline void order_start_then_action(pcmk_resource_t *first_rsc, pcmk_action_t *then_action, uint32_t extra) { if ((first_rsc != NULL) && (then_action != NULL)) { pcmk__new_ordering(first_rsc, start_key(first_rsc), NULL, then_action->rsc, NULL, then_action, pcmk__ar_guest_allowed |pcmk__ar_unrunnable_first_blocks |extra, first_rsc->priv->scheduler); } } static inline void order_action_then_stop(pcmk_action_t *first_action, pcmk_resource_t *then_rsc, uint32_t extra) { if ((first_action != NULL) && (then_rsc != NULL)) { pcmk__new_ordering(first_action->rsc, NULL, first_action, then_rsc, stop_key(then_rsc), NULL, pcmk__ar_guest_allowed|extra, then_rsc->priv->scheduler); } } static enum remote_connection_state get_remote_node_state(const pcmk_node_t *node) { const pcmk_resource_t *remote_rsc = NULL; const pcmk_node_t *cluster_node = NULL; pcmk__assert(node != NULL); remote_rsc = node->priv->remote; pcmk__assert(remote_rsc != NULL); cluster_node = pcmk__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations * on that remote node until after it starts elsewhere. */ if ((remote_rsc->priv->next_role == pcmk_role_stopped) || (remote_rsc->priv->assigned_node == NULL)) { // The connection resource is not going to run anywhere if ((cluster_node != NULL) && cluster_node->details->unclean) { /* The remote connection is failed because its resource is on a * failed node and can't be recovered elsewhere, so we must fence. */ return remote_state_failed; } if (!pcmk_is_set(remote_rsc->flags, pcmk__rsc_failed)) { /* Connection resource is cleanly stopped */ return remote_state_stopped; } /* Connection resource is failed */ if ((remote_rsc->priv->next_role == pcmk_role_stopped) && (remote_rsc->priv->remote_reconnect_ms > 0U) && pcmk_is_set(node->priv->flags, pcmk__node_remote_fenced) && !pe__shutdown_requested(node)) { /* We won't know whether the connection is recoverable until the * reconnect interval expires and we reattempt connection. */ return remote_state_unknown; } /* The remote connection is in a failed state. If there are any * resources known to be active on it (stop) or in an unknown state * (probe), we must assume the worst and fence it. */ return remote_state_failed; } else if (cluster_node == NULL) { /* Connection is recoverable but not currently running anywhere, so see * if we can recover it first */ return remote_state_unknown; } else if (cluster_node->details->unclean || !(cluster_node->details->online)) { // Connection is running on a dead node, see if we can recover it first return remote_state_resting; } else if (pcmk__list_of_multiple(remote_rsc->priv->active_nodes) && (remote_rsc->priv->partial_migration_source != NULL) && (remote_rsc->priv->partial_migration_target != NULL)) { /* We're in the middle of migrating a connection resource, so wait until * after the migration completes before performing any actions. */ return remote_state_resting; } return remote_state_alive; } /*! * \internal * \brief Order actions on remote node relative to actions for the connection * * \param[in,out] action An action scheduled on a Pacemaker Remote node */ static void apply_remote_ordering(pcmk_action_t *action) { pcmk_resource_t *remote_rsc = NULL; enum pcmk__action_type task = pcmk__parse_action(action->task); enum remote_connection_state state = get_remote_node_state(action->node); uint32_t order_opts = pcmk__ar_none; if (action->rsc == NULL) { return; } pcmk__assert(pcmk__is_pacemaker_remote_node(action->node)); remote_rsc = action->node->priv->remote; pcmk__assert(remote_rsc != NULL); crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pcmk__rsc_failed)? "failed " : "", remote_rsc->id, state2text(state)); if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Migration ops map to pcmk__action_unspecified, but we need to apply * the same ordering as for stop or demote (see get_router_node()). */ task = pcmk__action_stop; } switch (task) { case pcmk__action_start: case pcmk__action_promote: order_opts = pcmk__ar_none; if (state == remote_state_failed) { /* Force recovery, by making this action required */ pcmk__set_relation_flags(order_opts, pcmk__ar_first_implies_then); } /* Ensure connection is up before running this action */ order_start_then_action(remote_rsc, action, order_opts); break; case pcmk__action_stop: if (state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pcmk__ar_then_implies_first); } else if (state == remote_state_failed) { /* The resource is active on the node, but since we don't have a * valid connection, the only way to stop the resource is by * fencing the node. There is no need to order the stop relative * to the remote connection, since the stop will become implied * by the fencing. */ pe_fence_node(remote_rsc->priv->scheduler, action->node, "resources are active but " "connection is unrecoverable", FALSE); } else if (remote_rsc->priv->next_role == pcmk_role_stopped) { /* State must be remote_state_unknown or remote_state_stopped. * Since the connection is not coming back up in this * transition, stop this resource first. */ order_action_then_stop(action, remote_rsc, pcmk__ar_then_implies_first); } else { /* The connection is going to be started somewhere else, so * stop this resource after that completes. */ order_start_then_action(remote_rsc, action, pcmk__ar_none); } break; case pcmk__action_demote: /* Only order this demote relative to the connection start if the * connection isn't being torn down. Otherwise, the demote would be * blocked because the connection start would not be allowed. */ if ((state == remote_state_resting) || (state == remote_state_unknown)) { order_start_then_action(remote_rsc, action, pcmk__ar_none); } /* Otherwise we can rely on the stop ordering */ break; default: /* Wait for the connection resource to be up */ if (pcmk__action_is_recurring(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ order_start_then_action(remote_rsc, action, pcmk__ar_first_implies_then); } else { pcmk_node_t *cluster_node = pcmk__current_node(remote_rsc); if ((task == pcmk__action_monitor) && (state == remote_state_failed)) { /* We would only be here if we do not know the state of the * resource on the remote node. Since we have no way to find * out, it is necessary to fence the node. */ pe_fence_node(remote_rsc->priv->scheduler, action->node, "resources are in unknown state " "and connection is unrecoverable", FALSE); } if ((cluster_node != NULL) && (state == remote_state_stopped)) { /* The connection is currently up, but is going down * permanently. Make sure we check services are actually * stopped _before_ we let the connection get closed. */ order_action_then_stop(action, remote_rsc, pcmk__ar_unrunnable_first_blocks); } else { order_start_then_action(remote_rsc, action, pcmk__ar_none); } } break; } } static void apply_launcher_ordering(pcmk_action_t *action) { pcmk_resource_t *remote_rsc = NULL; pcmk_resource_t *launcher = NULL; enum pcmk__action_type task = pcmk__parse_action(action->task); pcmk__assert(action->rsc != NULL); pcmk__assert(pcmk__is_pacemaker_remote_node(action->node)); remote_rsc = action->node->priv->remote; pcmk__assert(remote_rsc != NULL); launcher = remote_rsc->priv->launcher; pcmk__assert(launcher != NULL); if (pcmk_is_set(launcher->flags, pcmk__rsc_failed)) { pe_fence_node(action->rsc->priv->scheduler, action->node, "container failed", FALSE); } crm_trace("Order %s action %s relative to %s%s for %s%s", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pcmk__rsc_failed)? "failed " : "", remote_rsc->id, pcmk_is_set(launcher->flags, pcmk__rsc_failed)? "failed " : "", launcher->id); if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Migration ops map to pcmk__action_unspecified, but we need to apply * the same ordering as for stop or demote (see get_router_node()). */ task = pcmk__action_stop; } switch (task) { case pcmk__action_start: case pcmk__action_promote: // Force resource recovery if the launcher is recovered order_start_then_action(launcher, action, pcmk__ar_first_implies_then); // Wait for the connection resource to be up, too order_start_then_action(remote_rsc, action, pcmk__ar_none); break; case pcmk__action_stop: case pcmk__action_demote: if (pcmk_is_set(launcher->flags, pcmk__rsc_failed)) { /* When the launcher representing a guest node fails, any stop * or demote actions for resources running on the guest node * are implied by the launcher stopping. This is similar to * how fencing operations work for cluster nodes and remote * nodes. */ } else { /* Ensure the operation happens before the connection is brought * down. * * If we really wanted to, we could order these after the * connection start, IFF the launcher's current role was * stopped (otherwise we re-introduce an ordering loop when the * connection is restarting). */ order_action_then_stop(action, remote_rsc, pcmk__ar_none); } break; default: /* Wait for the connection resource to be up */ if (pcmk__action_is_recurring(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ if (task != pcmk__action_unspecified) { order_start_then_action(remote_rsc, action, pcmk__ar_first_implies_then); } } else { order_start_then_action(remote_rsc, action, pcmk__ar_none); } break; } } /*! * \internal * \brief Order all relevant actions relative to remote connection actions * * \param[in,out] scheduler Scheduler data */ void pcmk__order_remote_connection_actions(pcmk_scheduler_t *scheduler) { if (!pcmk_is_set(scheduler->flags, pcmk__sched_have_remote_nodes)) { return; } crm_trace("Creating remote connection orderings"); for (GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = iter->data; pcmk_resource_t *remote = NULL; // We are only interested in resource actions if (action->rsc == NULL) { continue; } /* Special case: If we are clearing the failcount of an actual * remote connection resource, then make sure this happens before * any start of the resource in this transition. */ if (pcmk_is_set(action->rsc->flags, pcmk__rsc_is_remote_connection) && pcmk__str_eq(action->task, PCMK_ACTION_CLEAR_FAILCOUNT, pcmk__str_none)) { pcmk__new_ordering(action->rsc, NULL, action, action->rsc, pcmk__op_key(action->rsc->id, PCMK_ACTION_START, 0), NULL, pcmk__ar_ordered, scheduler); continue; } // We are only interested in actions assigned to a node if (action->node == NULL) { continue; } if (!pcmk__is_pacemaker_remote_node(action->node)) { continue; } /* We are only interested in real actions. * * @TODO This is probably wrong; pseudo-actions might be converted to * real actions and vice versa later in update_actions() at the end of * pcmk__apply_orderings(). */ if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { continue; } remote = action->node->priv->remote; if (remote == NULL) { - // Orphaned + // Removed continue; } /* Another special case: if a resource is moving to a Pacemaker Remote * node, order the stop on the original node after any start of the * remote connection. This ensures that if the connection fails to * start, we leave the resource running on the original node. */ if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_none)) { for (GList *item = action->rsc->priv->actions; item != NULL; item = item->next) { pcmk_action_t *rsc_action = item->data; if (!pcmk__same_node(rsc_action->node, action->node) && pcmk__str_eq(rsc_action->task, PCMK_ACTION_STOP, pcmk__str_none)) { pcmk__new_ordering(remote, start_key(remote), NULL, action->rsc, NULL, rsc_action, pcmk__ar_ordered, scheduler); } } } /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). * * This is somewhat brittle in that we need to make sure the results of * this ordering are compatible with the result of get_router_node(). * It would probably be better to add PCMK__XA_ROUTER_NODE as part of * this logic rather than create_graph_action(). */ if (remote->priv->launcher != NULL) { crm_trace("Container ordering for %s", action->uuid); apply_launcher_ordering(action); } else { crm_trace("Remote ordering for %s", action->uuid); apply_remote_ordering(action); } } } /*! * \internal * \brief Check whether a node is a failed remote node * * \param[in] node Node to check * * \return true if \p node is a failed remote node, false otherwise */ bool pcmk__is_failed_remote_node(const pcmk_node_t *node) { return pcmk__is_remote_node(node) && (node->priv->remote != NULL) && (get_remote_node_state(node) == remote_state_failed); } /*! * \internal * \brief Check whether a given resource corresponds to a given node as guest * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p node is a guest node and \p rsc is its containing * resource, otherwise false */ bool pcmk__rsc_corresponds_to_guest(const pcmk_resource_t *rsc, const pcmk_node_t *node) { return (rsc != NULL) && (rsc->priv->launched != NULL) && (node != NULL) && (node->priv->remote != NULL) && (node->priv->remote->priv->launcher == rsc); } /*! * \internal * \brief Get proper connection host that a remote action must be routed through * * A remote connection resource might be starting, stopping, or migrating in the * same transition that an action needs to be executed on its Pacemaker Remote * node. Determine the proper node that the remote action should be routed * through. * * \param[in] action (Potentially remote) action to route * * \return Connection host that action should be routed through if remote, * otherwise NULL */ pcmk_node_t * pcmk__connection_host_for_action(const pcmk_action_t *action) { pcmk_node_t *began_on = NULL; pcmk_node_t *ended_on = NULL; bool partial_migration = false; const char *task = action->task; pcmk_resource_t *remote = NULL; if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_none) || !pcmk__is_pacemaker_remote_node(action->node)) { return NULL; } remote = action->node->priv->remote; pcmk__assert(remote != NULL); began_on = pcmk__current_node(remote); ended_on = remote->priv->assigned_node; if ((remote->priv->launcher == NULL) && (remote->priv->partial_migration_target != NULL)) { partial_migration = true; } if (began_on == NULL) { crm_trace("Routing %s for %s through remote connection's " "next node %s (starting)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (ended_on? ended_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return ended_on; } if (ended_on == NULL) { crm_trace("Routing %s for %s through remote connection's " "current node %s (stopping)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return began_on; } if (pcmk__same_node(began_on, ended_on)) { crm_trace("Routing %s for %s through remote connection's " "current node %s (not moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return began_on; } /* If we get here, the remote connection is moving during this transition. * This means some actions for resources behind the connection will get * routed through the cluster node the connection resource is currently on, * and others are routed through the cluster node the connection will end up * on. */ if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { task = g_hash_table_lookup(action->meta, "notify_operation"); } /* * Stop, demote, and migration actions must occur before the connection can * move (these actions are required before the remote resource can stop). In * this case, we know these actions have to be routed through the initial * cluster node the connection resource lived on before the move takes * place. * * The exception is a partial migration of a (non-guest) remote connection * resource; in that case, all actions (even these) will be ordered after * the connection's pseudo-start on the migration target, so the target is * the router node. */ if (pcmk__strcase_any_of(task, PCMK_ACTION_CANCEL, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_FROM, PCMK_ACTION_MIGRATE_TO, NULL) && !partial_migration) { crm_trace("Routing %s for %s through remote connection's " "current node %s (moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return began_on; } /* Everything else (start, promote, monitor, probe, refresh, * clear failcount, delete, ...) must occur after the connection starts on * the node it is moving to. */ crm_trace("Routing %s for %s through remote connection's " "next node %s (moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (ended_on? ended_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return ended_on; } /*! * \internal * \brief Replace remote connection's addr="#uname" with actual address * * REMOTE_CONTAINER_HACK: If a given resource is a remote connection resource * with its "addr" parameter set to "#uname", pull the actual value from the * parameters evaluated without a node (which was put there earlier in * pcmk__create_graph() when the bundle's expand() method was called). * * \param[in,out] rsc Resource to check * \param[in,out] params Resource parameters evaluated per node */ void pcmk__substitute_remote_addr(pcmk_resource_t *rsc, GHashTable *params) { const char *remote_addr = g_hash_table_lookup(params, PCMK_REMOTE_RA_ADDR); if (pcmk__str_eq(remote_addr, "#uname", pcmk__str_none)) { GHashTable *base = pe_rsc_params(rsc, NULL, rsc->priv->scheduler); remote_addr = g_hash_table_lookup(base, PCMK_REMOTE_RA_ADDR); if (remote_addr != NULL) { pcmk__insert_dup(params, PCMK_REMOTE_RA_ADDR, remote_addr); } } } /*! * \brief Add special guest node meta-attributes to XML * * If a given action will be executed on a guest node, add the following as XML * attributes (using meta-attribute naming): * * The resource's \c PCMK_META_CONTAINER_ATTRIBUTE_TARGET meta-attribute * (usually set only for bundles), as \c PCMK_META_CONTAINER_ATTRIBUTE_TARGET * * The guest's physical host (current host for "down" actions, next host for * "up" actions), as \c PCMK__META_PHYSICAL_HOST * * If the guest node has no physical host, then don't add either attribute. * * \param[in,out] args_xml XML to add attributes to * \param[in] action Action to check */ void pcmk__add_guest_meta_to_xml(xmlNode *args_xml, const pcmk_action_t *action) { const pcmk_node_t *guest = action->node; const pcmk_node_t *host = NULL; const pcmk_resource_t *launcher = NULL; enum pcmk__action_type task; if (!pcmk__is_guest_or_bundle_node(guest)) { return; } launcher = guest->priv->remote->priv->launcher; task = pcmk__parse_action(action->task); if ((task == pcmk__action_notify) || (task == pcmk__action_notified)) { task = pcmk__parse_action(g_hash_table_lookup(action->meta, "notify_operation")); } switch (task) { case pcmk__action_stop: case pcmk__action_stopped: case pcmk__action_demote: case pcmk__action_demoted: // "Down" actions take place on guest's current host host = pcmk__current_node(launcher); break; case pcmk__action_start: case pcmk__action_started: case pcmk__action_monitor: case pcmk__action_promote: case pcmk__action_promoted: // "Up" actions take place on guest's next host host = launcher->priv->assigned_node; break; default: break; } if (host != NULL) { gpointer target = g_hash_table_lookup(action->rsc->priv->meta, PCMK_META_CONTAINER_ATTRIBUTE_TARGET); hash2metafield((gpointer) PCMK_META_CONTAINER_ATTRIBUTE_TARGET, target, (gpointer) args_xml); hash2metafield((gpointer) PCMK__META_PHYSICAL_HOST, (gpointer) host->priv->name, (gpointer) args_xml); } } diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c index 372cfa7db0..7626acb4f9 100644 --- a/lib/pacemaker/pcmk_sched_resource.c +++ b/lib/pacemaker/pcmk_sched_resource.c @@ -1,800 +1,800 @@ /* * Copyright 2014-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include "libpacemaker_private.h" // Resource assignment methods by resource variant static pcmk__assignment_methods_t assignment_methods[] = { { pcmk__primitive_assign, pcmk__primitive_create_actions, pcmk__probe_rsc_on_node, pcmk__primitive_internal_constraints, pcmk__primitive_apply_coloc_score, pcmk__colocated_resources, pcmk__with_primitive_colocations, pcmk__primitive_with_colocations, pcmk__add_colocated_node_scores, pcmk__apply_location, pcmk__primitive_action_flags, pcmk__update_ordered_actions, pcmk__output_resource_actions, pcmk__add_rsc_actions_to_graph, pcmk__primitive_add_graph_meta, pcmk__primitive_add_utilization, pcmk__primitive_shutdown_lock, }, { pcmk__group_assign, pcmk__group_create_actions, pcmk__probe_rsc_on_node, pcmk__group_internal_constraints, pcmk__group_apply_coloc_score, pcmk__group_colocated_resources, pcmk__with_group_colocations, pcmk__group_with_colocations, pcmk__group_add_colocated_node_scores, pcmk__group_apply_location, pcmk__group_action_flags, pcmk__group_update_ordered_actions, pcmk__output_resource_actions, pcmk__add_rsc_actions_to_graph, pcmk__noop_add_graph_meta, pcmk__group_add_utilization, pcmk__group_shutdown_lock, }, { pcmk__clone_assign, pcmk__clone_create_actions, pcmk__clone_create_probe, pcmk__clone_internal_constraints, pcmk__clone_apply_coloc_score, pcmk__colocated_resources, pcmk__with_clone_colocations, pcmk__clone_with_colocations, pcmk__add_colocated_node_scores, pcmk__clone_apply_location, pcmk__clone_action_flags, pcmk__instance_update_ordered_actions, pcmk__output_resource_actions, pcmk__clone_add_actions_to_graph, pcmk__clone_add_graph_meta, pcmk__clone_add_utilization, pcmk__clone_shutdown_lock, }, { pcmk__bundle_assign, pcmk__bundle_create_actions, pcmk__bundle_create_probe, pcmk__bundle_internal_constraints, pcmk__bundle_apply_coloc_score, pcmk__colocated_resources, pcmk__with_bundle_colocations, pcmk__bundle_with_colocations, pcmk__add_colocated_node_scores, pcmk__bundle_apply_location, pcmk__bundle_action_flags, pcmk__instance_update_ordered_actions, pcmk__output_bundle_actions, pcmk__bundle_add_actions_to_graph, pcmk__noop_add_graph_meta, pcmk__bundle_add_utilization, pcmk__bundle_shutdown_lock, } }; /*! * \internal * \brief Check whether a resource's agent standard, provider, or type changed * * \param[in,out] rsc Resource to check * \param[in,out] node Node needing unfencing if agent changed * \param[in] rsc_entry XML with previously known agent information * \param[in] active_on_node Whether \p rsc is active on \p node * * \return true if agent for \p rsc changed, otherwise false */ bool pcmk__rsc_agent_changed(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *rsc_entry, bool active_on_node) { bool changed = false; const char *attr_list[] = { PCMK_XA_TYPE, PCMK_XA_CLASS, PCMK_XA_PROVIDER, }; for (int i = 0; i < PCMK__NELEM(attr_list); i++) { const char *value = pcmk__xe_get(rsc->priv->xml, attr_list[i]); const char *old_value = pcmk__xe_get(rsc_entry, attr_list[i]); if (!pcmk__str_eq(value, old_value, pcmk__str_none)) { changed = true; trigger_unfencing(rsc, node, "Device definition changed", NULL, rsc->priv->scheduler); if (active_on_node) { crm_notice("Forcing restart of %s on %s " "because %s changed from '%s' to '%s'", rsc->id, pcmk__node_name(node), attr_list[i], pcmk__s(old_value, ""), pcmk__s(value, "")); } } } if (changed && active_on_node) { // Make sure the resource is restarted custom_action(rsc, stop_key(rsc), PCMK_ACTION_STOP, node, FALSE, rsc->priv->scheduler); pcmk__set_rsc_flags(rsc, pcmk__rsc_start_pending); } return changed; } /*! * \internal * \brief Add resource (and any matching children) to list if it matches ID * * \param[in] result List to add resource to * \param[in] rsc Resource to check * \param[in] id ID to match * * \return (Possibly new) head of list */ static GList * add_rsc_if_matching(GList *result, pcmk_resource_t *rsc, const char *id) { if (pcmk__str_eq(id, rsc->id, pcmk__str_none) || pcmk__str_eq(id, rsc->priv->history_id, pcmk__str_none)) { result = g_list_prepend(result, rsc); } for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child = (pcmk_resource_t *) iter->data; result = add_rsc_if_matching(result, child, id); } return result; } /*! * \internal * \brief Find all resources matching a given ID by either ID or clone name * * \param[in] id Resource ID to check * \param[in] scheduler Scheduler data * * \return List of all resources that match \p id * \note The caller is responsible for freeing the return value with * g_list_free(). */ GList * pcmk__rscs_matching_id(const char *id, const pcmk_scheduler_t *scheduler) { GList *result = NULL; CRM_CHECK((id != NULL) && (scheduler != NULL), return NULL); for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { result = add_rsc_if_matching(result, (pcmk_resource_t *) iter->data, id); } return result; } /*! * \internal * \brief Set the variant-appropriate assignment methods for a resource * * \param[in,out] data Resource to set assignment methods for * \param[in] user_data Ignored */ static void set_assignment_methods_for_rsc(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; rsc->priv->cmds = &assignment_methods[rsc->priv->variant]; g_list_foreach(rsc->priv->children, set_assignment_methods_for_rsc, NULL); } /*! * \internal * \brief Set the variant-appropriate assignment methods for all resources * * \param[in,out] scheduler Scheduler data */ void pcmk__set_assignment_methods(pcmk_scheduler_t *scheduler) { g_list_foreach(scheduler->priv->resources, set_assignment_methods_for_rsc, NULL); } /*! * \internal * \brief Wrapper for colocated_resources() method for readability * * \param[in] rsc Resource to add to colocated list * \param[in] orig_rsc Resource originally requested * \param[in,out] list Pointer to list to add to * * \return (Possibly new) head of list */ static inline void add_colocated_resources(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { *list = rsc->priv->cmds->colocated_resources(rsc, orig_rsc, *list); } // Shared implementation of pcmk__assignment_methods_t:colocated_resources() GList * pcmk__colocated_resources(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList *colocated_rscs) { const GList *iter = NULL; GList *colocations = NULL; if (orig_rsc == NULL) { orig_rsc = rsc; } if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) { return colocated_rscs; } pcmk__rsc_trace(orig_rsc, "%s is in colocation chain with %s", rsc->id, orig_rsc->id); colocated_rscs = g_list_prepend(colocated_rscs, (gpointer) rsc); // Follow colocations where this resource is the dependent resource colocations = pcmk__this_with_colocations(rsc); for (iter = colocations; iter != NULL; iter = iter->next) { const pcmk__colocation_t *constraint = iter->data; const pcmk_resource_t *primary = constraint->primary; if (primary == orig_rsc) { continue; // Break colocation loop } if ((constraint->score == PCMK_SCORE_INFINITY) && (pcmk__colocation_affects(rsc, primary, constraint, true) == pcmk__coloc_affects_location)) { add_colocated_resources(primary, orig_rsc, &colocated_rscs); } } g_list_free(colocations); // Follow colocations where this resource is the primary resource colocations = pcmk__with_this_colocations(rsc); for (iter = colocations; iter != NULL; iter = iter->next) { const pcmk__colocation_t *constraint = iter->data; const pcmk_resource_t *dependent = constraint->dependent; if (dependent == orig_rsc) { continue; // Break colocation loop } if (pcmk__is_clone(rsc) && !pcmk__is_clone(dependent)) { continue; // We can't be sure whether dependent will be colocated } if ((constraint->score == PCMK_SCORE_INFINITY) && (pcmk__colocation_affects(dependent, rsc, constraint, true) == pcmk__coloc_affects_location)) { add_colocated_resources(dependent, orig_rsc, &colocated_rscs); } } g_list_free(colocations); return colocated_rscs; } // No-op function for variants that don't need to implement add_graph_meta() void pcmk__noop_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml) { } /*! * \internal * \brief Output a summary of scheduled actions for a resource * * \param[in,out] rsc Resource to output actions for */ void pcmk__output_resource_actions(pcmk_resource_t *rsc) { pcmk_node_t *next = NULL; pcmk_node_t *current = NULL; pcmk__output_t *out = NULL; pcmk__assert(rsc != NULL); out = rsc->priv->scheduler->priv->out; if (rsc->priv->children != NULL) { for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child = (pcmk_resource_t *) iter->data; child->priv->cmds->output_actions(child); } return; } next = rsc->priv->assigned_node; if (rsc->priv->active_nodes != NULL) { current = pcmk__current_node(rsc); if (rsc->priv->orig_role == pcmk_role_stopped) { /* This can occur when resources are being recovered because * the current role can change in pcmk__primitive_create_actions() */ rsc->priv->orig_role = pcmk_role_started; } } if ((current == NULL) && pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { - /* Don't log stopped orphans */ + // Don't log stopped removed resources return; } out->message(out, "rsc-action", rsc, current, next); } /*! * \internal * \brief Add a resource to a node's list of assigned resources * * \param[in,out] node Node to add resource to * \param[in] rsc Resource to add */ static inline void add_assigned_resource(pcmk_node_t *node, pcmk_resource_t *rsc) { node->priv->assigned_resources = g_list_prepend(node->priv->assigned_resources, rsc); } /*! * \internal * \brief Assign a specified resource (of any variant) to a node * * Assign a specified resource and its children (if any) to a specified node, if * the node can run the resource (or unconditionally, if \p force is true). Mark * the resources as no longer provisional. * * If a resource can't be assigned (or \p node is \c NULL), unassign any * previous assignment. If \p stop_if_fail is \c true, set next role to stopped * and update any existing actions scheduled for the resource. * * \param[in,out] rsc Resource to assign * \param[in,out] node Node to assign \p rsc to * \param[in] force If true, assign to \p node even if unavailable * \param[in] stop_if_fail If \c true and either \p rsc can't be assigned * or \p chosen is \c NULL, set next role to * stopped and update existing actions (if \p rsc * is not a primitive, this applies to its * primitive descendants instead) * * \return \c true if the assignment of \p rsc changed, or \c false otherwise * * \note Assigning a resource to the NULL node using this function is different * from calling pcmk__unassign_resource(), in that it may also update any * actions created for the resource. * \note The \c pcmk__assignment_methods_t:assign() method is preferred, unless * a resource should be assigned to the \c NULL node or every resource in * a tree should be assigned to the same node. * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can * completely undo the assignment. A successful assignment can be either * undone or left alone as final. A failed assignment has the same effect * as calling pcmk__unassign_resource(); there are no side effects on * roles or actions. */ bool pcmk__assign_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool force, bool stop_if_fail) { bool changed = false; pcmk_scheduler_t *scheduler = NULL; pcmk__assert(rsc != NULL); scheduler = rsc->priv->scheduler; if (rsc->priv->children != NULL) { for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child_rsc = iter->data; changed |= pcmk__assign_resource(child_rsc, node, force, stop_if_fail); } return changed; } // Assigning a primitive if (!force && (node != NULL) && ((node->assign->score < 0) // Allow graph to assume that guest node connections will come up || (!pcmk__node_available(node, true, false) && !pcmk__is_guest_or_bundle_node(node)))) { pcmk__rsc_debug(rsc, "All nodes for resource %s are unavailable, unclean or " "shutting down (%s can%s run resources, with score %s)", rsc->id, pcmk__node_name(node), (pcmk__node_available(node, true, false)? "" : "not"), pcmk_readable_score(node->assign->score)); if (stop_if_fail) { pe__set_next_role(rsc, pcmk_role_stopped, "node availability"); } node = NULL; } if (rsc->priv->assigned_node != NULL) { changed = !pcmk__same_node(rsc->priv->assigned_node, node); } else { changed = (node != NULL); } pcmk__unassign_resource(rsc); pcmk__clear_rsc_flags(rsc, pcmk__rsc_unassigned); if (node == NULL) { char *rc_stopped = NULL; pcmk__rsc_debug(rsc, "Could not assign %s to a node", rsc->id); if (!stop_if_fail) { return changed; } pe__set_next_role(rsc, pcmk_role_stopped, "unable to assign"); for (GList *iter = rsc->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *op = (pcmk_action_t *) iter->data; pcmk__rsc_debug(rsc, "Updating %s for %s assignment failure", op->uuid, rsc->id); if (pcmk__str_eq(op->task, PCMK_ACTION_STOP, pcmk__str_none)) { pcmk__clear_action_flags(op, pcmk__action_optional); } else if (pcmk__str_eq(op->task, PCMK_ACTION_START, pcmk__str_none)) { pcmk__clear_action_flags(op, pcmk__action_runnable); } else { // Cancel recurring actions, unless for stopped state const char *interval_ms_s = NULL; const char *target_rc_s = NULL; interval_ms_s = g_hash_table_lookup(op->meta, PCMK_META_INTERVAL); target_rc_s = g_hash_table_lookup(op->meta, PCMK__META_OP_TARGET_RC); if (rc_stopped == NULL) { rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING); } if (!pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches) && !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) { pcmk__clear_action_flags(op, pcmk__action_runnable); } } } free(rc_stopped); return changed; } pcmk__rsc_debug(rsc, "Assigning %s to %s", rsc->id, pcmk__node_name(node)); rsc->priv->assigned_node = pe__copy_node(node); add_assigned_resource(node, rsc); node->priv->num_resources++; node->assign->count++; pcmk__consume_node_capacity(node->priv->utilization, rsc); if (pcmk_is_set(scheduler->flags, pcmk__sched_show_utilization)) { pcmk__output_t *out = scheduler->priv->out; out->message(out, "resource-util", rsc, node, __func__); } return changed; } /*! * \internal * \brief Remove any node assignment from a specified resource and its children * * If a specified resource has been assigned to a node, remove that assignment * and mark the resource as provisional again. * * \param[in,out] rsc Resource to unassign * * \note This function is called recursively on \p rsc and its children. */ void pcmk__unassign_resource(pcmk_resource_t *rsc) { pcmk_node_t *old = rsc->priv->assigned_node; if (old == NULL) { crm_info("Unassigning %s", rsc->id); } else { crm_info("Unassigning %s from %s", rsc->id, pcmk__node_name(old)); } pcmk__set_rsc_flags(rsc, pcmk__rsc_unassigned); if (rsc->priv->children == NULL) { if (old == NULL) { return; } rsc->priv->assigned_node = NULL; /* We're going to free the pcmk_node_t copy, but its priv member is * shared and will remain, so update that appropriately first. */ old->priv->assigned_resources = g_list_remove(old->priv->assigned_resources, rsc); old->priv->num_resources--; pcmk__release_node_capacity(old->priv->utilization, rsc); pcmk__free_node_copy(old); return; } for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk__unassign_resource((pcmk_resource_t *) iter->data); } } /*! * \internal * \brief Check whether a resource has reached its migration threshold on a node * * \param[in,out] rsc Resource to check * \param[in] node Node to check * \param[out] failed If threshold has been reached, this will be set to * resource that failed (possibly a parent of \p rsc) * * \return true if the migration threshold has been reached, false otherwise */ bool pcmk__threshold_reached(pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_resource_t **failed) { int fail_count, remaining_tries; pcmk_resource_t *rsc_to_ban = rsc; // Migration threshold of 0 means never force away if (rsc->priv->ban_after_failures == 0) { return false; } // If we're ignoring failures, also ignore the migration threshold if (pcmk_is_set(rsc->flags, pcmk__rsc_ignore_failure)) { return false; } // If there are no failures, there's no need to force away fail_count = pe_get_failcount(node, rsc, NULL, pcmk__fc_effective|pcmk__fc_launched, NULL); if (fail_count <= 0) { return false; } // If failed resource is anonymous clone instance, we'll force clone away if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) { rsc_to_ban = uber_parent(rsc); } // How many more times recovery will be tried on this node remaining_tries = rsc->priv->ban_after_failures - fail_count; if (remaining_tries <= 0) { pcmk__sched_warn(rsc->priv->scheduler, "%s cannot run on %s due to reaching migration " "threshold (clean up resource to allow again) " QB_XS " failures=%d " PCMK_META_MIGRATION_THRESHOLD "=%d", rsc_to_ban->id, pcmk__node_name(node), fail_count, rsc->priv->ban_after_failures); if (failed != NULL) { *failed = rsc_to_ban; } return true; } crm_info("%s can fail %d more time%s on " "%s before reaching migration threshold (%d)", rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries), pcmk__node_name(node), rsc->priv->ban_after_failures); return false; } /*! * \internal * \brief Get a node's score * * \param[in] node Node with ID to check * \param[in] nodes List of nodes to look for \p node score in * * \return Node's score, or -INFINITY if not found */ static int get_node_score(const pcmk_node_t *node, GHashTable *nodes) { pcmk_node_t *found_node = NULL; if ((node != NULL) && (nodes != NULL)) { found_node = g_hash_table_lookup(nodes, node->priv->id); } if (found_node == NULL) { return -PCMK_SCORE_INFINITY; } return found_node->assign->score; } /*! * \internal * \brief Compare two resources according to which should be assigned first * * \param[in] a First resource to compare * \param[in] b Second resource to compare * \param[in] data Sorted list of all nodes in cluster * * \return -1 if \p a should be assigned before \b, 0 if they are equal, * or +1 if \p a should be assigned after \b */ static gint cmp_resources(gconstpointer a, gconstpointer b, gpointer data) { /* GLib insists that this function require gconstpointer arguments, but we * make a small, temporary change to each argument (setting the * pe_rsc_merging flag) during comparison */ pcmk_resource_t *resource1 = (pcmk_resource_t *) a; pcmk_resource_t *resource2 = (pcmk_resource_t *) b; const GList *nodes = data; int rc = 0; int r1_score = -PCMK_SCORE_INFINITY; int r2_score = -PCMK_SCORE_INFINITY; pcmk_node_t *r1_node = NULL; pcmk_node_t *r2_node = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; const char *reason = NULL; // Resources with highest priority should be assigned first reason = "priority"; r1_score = resource1->priv->priority; r2_score = resource2->priv->priority; if (r1_score > r2_score) { rc = -1; goto done; } if (r1_score < r2_score) { rc = 1; goto done; } // We need nodes to make any other useful comparisons reason = "no node list"; if (nodes == NULL) { goto done; } // Calculate and log node scores resource1->priv->cmds->add_colocated_node_scores(resource1, NULL, resource1->id, &r1_nodes, NULL, 1, pcmk__coloc_select_this_with); resource2->priv->cmds->add_colocated_node_scores(resource2, NULL, resource2->id, &r2_nodes, NULL, 1, pcmk__coloc_select_this_with); pe__show_node_scores(true, NULL, resource1->id, r1_nodes, resource1->priv->scheduler); pe__show_node_scores(true, NULL, resource2->id, r2_nodes, resource2->priv->scheduler); // The resource with highest score on its current node goes first reason = "current location"; if (resource1->priv->active_nodes != NULL) { r1_node = pcmk__current_node(resource1); } if (resource2->priv->active_nodes != NULL) { r2_node = pcmk__current_node(resource2); } r1_score = get_node_score(r1_node, r1_nodes); r2_score = get_node_score(r2_node, r2_nodes); if (r1_score > r2_score) { rc = -1; goto done; } if (r1_score < r2_score) { rc = 1; goto done; } // Otherwise a higher score on any node will do reason = "score"; for (const GList *iter = nodes; iter != NULL; iter = iter->next) { const pcmk_node_t *node = (const pcmk_node_t *) iter->data; r1_score = get_node_score(node, r1_nodes); r2_score = get_node_score(node, r2_nodes); if (r1_score > r2_score) { rc = -1; goto done; } if (r1_score < r2_score) { rc = 1; goto done; } } done: crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s", resource1->id, r1_score, ((r1_node == NULL)? "" : " on "), ((r1_node == NULL)? "" : r1_node->priv->id), ((rc < 0)? '>' : ((rc > 0)? '<' : '=')), resource2->id, r2_score, ((r2_node == NULL)? "" : " on "), ((r2_node == NULL)? "" : r2_node->priv->id), reason); if (r1_nodes != NULL) { g_hash_table_destroy(r1_nodes); } if (r2_nodes != NULL) { g_hash_table_destroy(r2_nodes); } return rc; } /*! * \internal * \brief Sort resources in the order they should be assigned to nodes * * \param[in,out] scheduler Scheduler data */ void pcmk__sort_resources(pcmk_scheduler_t *scheduler) { GList *nodes = g_list_copy(scheduler->nodes); nodes = pcmk__sort_nodes(nodes, NULL); scheduler->priv->resources = g_list_sort_with_data(scheduler->priv->resources, cmp_resources, nodes); g_list_free(nodes); } diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c index 1269e22fb9..1161bfd3d2 100644 --- a/lib/pacemaker/pcmk_scheduler.c +++ b/lib/pacemaker/pcmk_scheduler.c @@ -1,848 +1,848 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" CRM_TRACE_INIT_DATA(pacemaker); /*! * \internal * \brief Do deferred action checks after assignment * * When unpacking the resource history, the scheduler checks for resource * configurations that have changed since an action was run. However, at that * time, bundles using the REMOTE_CONTAINER_HACK don't have their final * parameter information, so instead they add a deferred check to a list. This * function processes one entry in that list. * * \param[in,out] rsc Resource that action history is for * \param[in,out] node Node that action history is for * \param[in] rsc_op Action history entry * \param[in] check Type of deferred check to do */ static void check_params(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *rsc_op, enum pcmk__check_parameters check) { const char *reason = NULL; pcmk__op_digest_t *digest_data = NULL; switch (check) { case pcmk__check_active: if (pcmk__check_action_config(rsc, node, rsc_op) && pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL)) { reason = "action definition changed"; } break; case pcmk__check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, rsc->priv->scheduler); switch (digest_data->rc) { case pcmk__digest_unknown: crm_trace("Resource %s history entry %s on %s has " "no digest to compare", rsc->id, pcmk__xe_id(rsc_op), node->priv->id); break; case pcmk__digest_match: break; default: reason = "resource parameters have changed"; break; } break; } if (reason != NULL) { pe__clear_failcount(rsc, node, reason, rsc->priv->scheduler); } } /*! * \internal * \brief Check whether a resource has failcount clearing scheduled on a node * * \param[in] node Node to check * \param[in] rsc Resource to check * * \return true if \p rsc has failcount clearing scheduled on \p node, * otherwise false */ static bool failcount_clear_action_exists(const pcmk_node_t *node, const pcmk_resource_t *rsc) { GList *list = pe__resource_actions(rsc, node, PCMK_ACTION_CLEAR_FAILCOUNT, TRUE); if (list != NULL) { g_list_free(list); return true; } return false; } /*! * \internal * \brief Ban a resource from a node if it reached its failure threshold there * * \param[in,out] data Resource to check failure threshold for * \param[in] user_data Node to check resource on */ static void check_failure_threshold(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; // If this is a collective resource, apply recursively to children instead if (rsc->priv->children != NULL) { g_list_foreach(rsc->priv->children, check_failure_threshold, user_data); return; } if (!failcount_clear_action_exists(node, rsc)) { /* Don't force the resource away from this node due to a failcount * that's going to be cleared. * * @TODO Failcount clearing can be scheduled in * pcmk__handle_rsc_config_changes() via process_rsc_history(), or in * schedule_resource_actions() via check_params(). This runs well before * then, so it cannot detect those, meaning we might check the migration * threshold when we shouldn't. Worst case, we stop or move the * resource, then move it back in the next transition. */ pcmk_resource_t *failed = NULL; if (pcmk__threshold_reached(rsc, node, &failed)) { resource_location(failed, node, -PCMK_SCORE_INFINITY, "__fail_limit__", rsc->priv->scheduler); } } } /*! * \internal * \brief If resource has exclusive discovery, ban node if not allowed * * Location constraints have a PCMK_XA_RESOURCE_DISCOVERY option that allows * users to specify where probes are done for the affected resource. If this is * set to \c exclusive, probes will only be done on nodes listed in exclusive * constraints. This function bans the resource from the node if the node is not * listed. * * \param[in,out] data Resource to check * \param[in] user_data Node to check resource on */ static void apply_exclusive_discovery(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; /* @TODO This checks rsc and the top rsc, but should probably check all * ancestors (a cloned group could have it set on the group) */ if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes) || pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pcmk__rsc_exclusive_probes)) { pcmk_node_t *match = NULL; // If this is a collective resource, apply recursively to children g_list_foreach(rsc->priv->children, apply_exclusive_discovery, user_data); match = g_hash_table_lookup(rsc->priv->allowed_nodes, node->priv->id); if ((match != NULL) && (match->assign->probe_mode != pcmk__probe_exclusive)) { match->assign->score = -PCMK_SCORE_INFINITY; } } } /*! * \internal * \brief Apply stickiness to a resource if appropriate * * \param[in,out] data Resource to check for stickiness * \param[in] user_data Ignored */ static void apply_stickiness(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; pcmk_node_t *node = NULL; // If this is a collective resource, apply recursively to children instead if (rsc->priv->children != NULL) { g_list_foreach(rsc->priv->children, apply_stickiness, NULL); return; } /* A resource is sticky if it is managed, has stickiness configured, and is * active on a single node. */ if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed) || (rsc->priv->stickiness < 1) || !pcmk__list_of_1(rsc->priv->active_nodes)) { return; } node = rsc->priv->active_nodes->data; /* In a symmetric cluster, stickiness can always be used. In an * asymmetric cluster, we have to check whether the resource is still * allowed on the node, so we don't keep the resource somewhere it is no * longer explicitly enabled. */ if (!pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_symmetric_cluster) && (g_hash_table_lookup(rsc->priv->allowed_nodes, node->priv->id) == NULL)) { pcmk__rsc_debug(rsc, "Ignoring %s stickiness because the cluster is " "asymmetric and %s is not explicitly allowed", rsc->id, pcmk__node_name(node)); return; } pcmk__rsc_debug(rsc, "Resource %s has %d stickiness on %s", rsc->id, rsc->priv->stickiness, pcmk__node_name(node)); resource_location(rsc, node, rsc->priv->stickiness, "stickiness", rsc->priv->scheduler); } /*! * \internal * \brief Apply shutdown locks for all resources as appropriate * * \param[in,out] scheduler Scheduler data */ static void apply_shutdown_locks(pcmk_scheduler_t *scheduler) { if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { return; } for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->priv->cmds->shutdown_lock(rsc); } } /* * \internal * \brief Apply node-specific scheduling criteria * * After the CIB has been unpacked, process node-specific scheduling criteria * including shutdown locks, location constraints, resource stickiness, * migration thresholds, and exclusive resource discovery. */ static void apply_node_criteria(pcmk_scheduler_t *scheduler) { crm_trace("Applying node-specific scheduling criteria"); apply_shutdown_locks(scheduler); pcmk__apply_locations(scheduler); g_list_foreach(scheduler->priv->resources, apply_stickiness, NULL); for (GList *node_iter = scheduler->nodes; node_iter != NULL; node_iter = node_iter->next) { for (GList *rsc_iter = scheduler->priv->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { check_failure_threshold(rsc_iter->data, node_iter->data); apply_exclusive_discovery(rsc_iter->data, node_iter->data); } } } /*! * \internal * \brief Assign resources to nodes * * \param[in,out] scheduler Scheduler data */ static void assign_resources(pcmk_scheduler_t *scheduler) { GList *iter = NULL; crm_trace("Assigning resources to nodes"); if (!pcmk__str_eq(scheduler->priv->placement_strategy, PCMK_VALUE_DEFAULT, pcmk__str_casei)) { pcmk__sort_resources(scheduler); } pcmk__show_node_capacities("Original", scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_have_remote_nodes)) { /* Assign remote connection resources first (which will also assign any * colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; const pcmk_node_t *target = rsc->priv->partial_migration_target; if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { pcmk__rsc_trace(rsc, "Assigning remote connection resource '%s'", rsc->id); rsc->priv->cmds->assign(rsc, target, true); } } } /* now do the rest of the resources */ for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (!pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { pcmk__rsc_trace(rsc, "Assigning %s resource '%s'", rsc->priv->xml->name, rsc->id); rsc->priv->cmds->assign(rsc, NULL, true); } } pcmk__show_node_capacities("Remaining", scheduler); } /*! * \internal - * \brief Schedule fail count clearing on online nodes if resource is orphaned + * \brief Schedule fail count clearing on online nodes if resource is removed * * \param[in,out] data Resource to check * \param[in] user_data Ignored */ static void -clear_failcounts_if_orphaned(gpointer data, gpointer user_data) +clear_failcounts_if_removed(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { return; } - crm_trace("Clear fail counts for orphaned resource %s", rsc->id); + crm_trace("Clear fail counts for removed resource %s", rsc->id); /* There's no need to recurse into rsc->private->children because those * should just be unassigned clone instances. */ for (GList *iter = rsc->priv->scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; pcmk_action_t *clear_op = NULL; if (!node->details->online) { continue; } if (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) == 0) { continue; } - clear_op = pe__clear_failcount(rsc, node, "it is orphaned", + clear_op = pe__clear_failcount(rsc, node, "it is removed", rsc->priv->scheduler); /* We can't use order_action_then_stop() here because its * pcmk__ar_guest_allowed breaks things */ pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pcmk__ar_ordered, rsc->priv->scheduler); } } /*! * \internal * \brief Schedule any resource actions needed * * \param[in,out] scheduler Scheduler data */ static void schedule_resource_actions(pcmk_scheduler_t *scheduler) { // Process deferred action checks pcmk__foreach_param_check(scheduler, check_params); pcmk__free_param_checks(scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) { crm_trace("Scheduling probes"); pcmk__schedule_probes(scheduler); } if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) { - g_list_foreach(scheduler->priv->resources, clear_failcounts_if_orphaned, + g_list_foreach(scheduler->priv->resources, clear_failcounts_if_removed, NULL); } crm_trace("Scheduling resource actions"); for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->priv->cmds->create_actions(rsc); } } /*! * \internal * \brief Check whether a resource or any of its descendants are managed * * \param[in] rsc Resource to check * * \return true if resource or any descendant is managed, otherwise false */ static bool is_managed(const pcmk_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { return true; } for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { if (is_managed((pcmk_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether any resources in the cluster are managed * * \param[in] scheduler Scheduler data * * \return true if any resource is managed, otherwise false */ static bool any_managed_resources(const pcmk_scheduler_t *scheduler) { for (const GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { if (is_managed((const pcmk_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether a node requires fencing * * \param[in] node Node to check * \param[in] have_managed Whether any resource in cluster is managed * * \return true if \p node should be fenced, otherwise false */ static bool needs_fencing(const pcmk_node_t *node, bool have_managed) { return have_managed && node->details->unclean && pe_can_fence(node->priv->scheduler, node); } /*! * \internal * \brief Check whether a node requires shutdown * * \param[in] node Node to check * * \return true if \p node should be shut down, otherwise false */ static bool needs_shutdown(const pcmk_node_t *node) { if (pcmk__is_pacemaker_remote_node(node)) { /* Do not send shutdown actions for Pacemaker Remote nodes. * @TODO We might come up with a good use for this in the future. */ return false; } return node->details->online && node->details->shutdown; } /*! * \internal * \brief Track and order non-DC fencing * * \param[in,out] list List of existing non-DC fencing actions * \param[in,out] action Fencing action to prepend to \p list * \param[in] scheduler Scheduler data * * \return (Possibly new) head of \p list */ static GList * add_nondc_fencing(GList *list, pcmk_action_t *action, const pcmk_scheduler_t *scheduler) { if (!pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing) && (list != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pcmk_action_t *) list->data, action, pcmk__ar_ordered); } return g_list_prepend(list, action); } /*! * \internal * \brief Schedule a node for fencing * * \param[in,out] node Node that requires fencing */ static pcmk_action_t * schedule_fencing(pcmk_node_t *node) { pcmk_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, node->priv->scheduler); pcmk__sched_warn(node->priv->scheduler, "Scheduling node %s for fencing", pcmk__node_name(node)); pcmk__order_vs_fence(fencing, node->priv->scheduler); return fencing; } /*! * \internal * \brief Create and order node fencing and shutdown actions * * \param[in,out] scheduler Scheduler data */ static void schedule_fencing_and_shutdowns(pcmk_scheduler_t *scheduler) { pcmk_action_t *dc_down = NULL; bool integrity_lost = false; bool have_managed = any_managed_resources(scheduler); GList *fencing_ops = NULL; GList *shutdown_ops = NULL; crm_trace("Scheduling fencing and shutdowns as needed"); if (!have_managed) { crm_notice("No fencing will be done until there are resources " "to manage"); } // Check each node for whether it needs fencing or shutdown for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; pcmk_action_t *fencing = NULL; const bool is_dc = pcmk__same_node(node, scheduler->dc_node); /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pcmk__is_guest_or_bundle_node(node)) { if (pcmk_is_set(node->priv->flags, pcmk__node_remote_reset) && have_managed && pe_can_fence(scheduler, node)) { pcmk__fence_guest(node); } continue; } if (needs_fencing(node, have_managed)) { fencing = schedule_fencing(node); // Track DC and non-DC fence actions separately if (is_dc) { dc_down = fencing; } else { fencing_ops = add_nondc_fencing(fencing_ops, fencing, scheduler); } } else if (needs_shutdown(node)) { pcmk_action_t *down_op = pcmk__new_shutdown_action(node); // Track DC and non-DC shutdown actions separately if (is_dc) { dc_down = down_op; } else { shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if ((fencing == NULL) && node->details->unclean) { integrity_lost = true; pcmk__config_warn("Node %s is unclean but cannot be fenced", pcmk__node_name(node)); } } if (integrity_lost) { if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { pcmk__config_warn("Resource functionality and data integrity " "cannot be guaranteed (configure, enable, " "and test fencing to correct this)"); } else if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) { crm_notice("Unclean nodes will not be fenced until quorum is " "attained or " PCMK_OPT_NO_QUORUM_POLICY " is set to " PCMK_VALUE_IGNORE); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { pcmk__order_after_each(dc_down, shutdown_ops); } // Order any non-DC fencing before any DC fencing or shutdown if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ pcmk__order_after_each(dc_down, fencing_ops); } else if (fencing_ops != NULL) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pcmk_action_t *) fencing_ops->data, dc_down, pcmk__ar_ordered); } } g_list_free(fencing_ops); g_list_free(shutdown_ops); } static void log_resource_details(pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv->out; GList *all = NULL; /* Due to the `crm_mon --node=` feature, out->message() for all the * resource-related messages expects a list of nodes that we are allowed to * output information for. Here, we create a wildcard to match all nodes. */ all = g_list_prepend(all, (gpointer) "*"); for (GList *item = scheduler->priv->resources; item != NULL; item = item->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) item->data; - // Log all resources except inactive orphans + // Log all resources except inactive removed resources if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed) || (rsc->priv->orig_role != pcmk_role_stopped)) { out->message(out, (const char *) rsc->priv->xml->name, 0UL, rsc, all, all); } } g_list_free(all); } static void log_all_actions(pcmk_scheduler_t *scheduler) { /* This only ever outputs to the log, so ignore whatever output object was * previously set and just log instead. */ pcmk__output_t *prev_out = scheduler->priv->out; pcmk__output_t *out = NULL; if (pcmk__log_output_new(&out) != pcmk_rc_ok) { return; } pe__register_messages(out); pcmk__register_lib_messages(out); pcmk__output_set_log_level(out, LOG_NOTICE); scheduler->priv->out = out; out->begin_list(out, NULL, NULL, "Actions"); pcmk__output_actions(scheduler); out->end_list(out); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); scheduler->priv->out = prev_out; } /*! * \internal * \brief Log all required but unrunnable actions at trace level * * \param[in] scheduler Scheduler data */ static void log_unrunnable_actions(const pcmk_scheduler_t *scheduler) { const uint64_t flags = pcmk__action_optional |pcmk__action_runnable |pcmk__action_pseudo; crm_trace("Required but unrunnable actions:"); for (const GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { const pcmk_action_t *action = (const pcmk_action_t *) iter->data; if (!pcmk_any_flags_set(action->flags, flags)) { pcmk__log_action("\t", action, true); } } } /*! * \internal * \brief Run the scheduler for a given CIB * * \param[in,out] scheduler Scheduler data */ void pcmk__schedule_actions(pcmk_scheduler_t *scheduler) { cluster_status(scheduler); pcmk__set_assignment_methods(scheduler); pcmk__apply_node_health(scheduler); pcmk__unpack_constraints(scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_validate_only)) { return; } if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only) && pcmk__is_daemon) { log_resource_details(scheduler); } apply_node_criteria(scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { return; } pcmk__create_internal_constraints(scheduler); pcmk__handle_rsc_config_changes(scheduler); assign_resources(scheduler); schedule_resource_actions(scheduler); /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we can mark nodes as needing fencing. */ pcmk__order_remote_connection_actions(scheduler); schedule_fencing_and_shutdowns(scheduler); pcmk__apply_orderings(scheduler); log_all_actions(scheduler); pcmk__create_graph(scheduler); if (get_crm_log_level() == LOG_TRACE) { log_unrunnable_actions(scheduler); } } /*! * \internal * \brief Initialize scheduler data * * Make our own copies of the CIB XML and date/time object, if they're not * \c NULL. This way we don't have to take ownership of the objects passed via * the API. * * This function is most useful for public API functions that want the caller * to retain ownership of the CIB object * * \param[in,out] out Output object * \param[in] input The CIB XML to check (if \c NULL, use current CIB) * \param[in] date Date and time to use in the scheduler (if \c NULL, * use current date and time). This can be used for * checking whether a rule is in effect at a certa * date and time. * \param[out] scheduler Where to store initialized scheduler data * * \return Standard Pacemaker return code */ int pcmk__init_scheduler(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date, pcmk_scheduler_t **scheduler) { // Allows for cleaner syntax than dereferencing the scheduler argument pcmk_scheduler_t *new_scheduler = NULL; new_scheduler = pcmk_new_scheduler(); if (new_scheduler == NULL) { return ENOMEM; } pcmk__set_scheduler_flags(new_scheduler, pcmk__sched_no_counts); // Populate the scheduler data // Make our own copy of the given input or fetch the CIB and use that if (input != NULL) { new_scheduler->input = pcmk__xml_copy(NULL, input); if (new_scheduler->input == NULL) { out->err(out, "Failed to copy input XML"); pcmk_free_scheduler(new_scheduler); return ENOMEM; } } else { int rc = cib__signon_query(out, NULL, &(new_scheduler->input)); if (rc != pcmk_rc_ok) { pcmk_free_scheduler(new_scheduler); return rc; } } // Make our own copy of the given crm_time_t object; otherwise // cluster_status() populates with the current time if (date != NULL) { // pcmk_copy_time() guarantees non-NULL new_scheduler->priv->now = pcmk_copy_time(date); } // Unpack everything cluster_status(new_scheduler); *scheduler = new_scheduler; return pcmk_rc_ok; }