diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c index da52f54e5b..7e0e15c1ab 100644 --- a/daemons/attrd/pacemaker-attrd.c +++ b/daemons/attrd/pacemaker-attrd.c @@ -1,320 +1,334 @@ /* - * Copyright 2013-2022 the Pacemaker project contributors + * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" #define SUMMARY "daemon for managing Pacemaker node attributes" +gboolean stand_alone = FALSE; + +static GOptionEntry entries[] = { + { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, + "(Advanced use only) Run in stand-alone mode", NULL }, + + { NULL } +}; + static pcmk__output_t *out = NULL; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; lrmd_t *the_lrmd = NULL; crm_cluster_t *attrd_cluster = NULL; crm_trigger_t *attrd_config_read = NULL; crm_exit_t attrd_exit_status = CRM_EX_OK; static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (attrd_shutting_down()) { crm_info("Connection disconnection complete"); } else { /* eventually this should trigger a reconnect, not a shutdown */ crm_crit("Lost connection to the CIB manager, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } return; } static void attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG), "Cleared transient attributes: %s " CRM_XS " xpath=%s rc=%d", pcmk_strerror(rc), (char *) user_data, rc); } #define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS /*! * \internal * \brief Wipe all transient attributes for this node from the CIB * * Clear any previous transient node attributes from the CIB. This is * normally done by the DC's controller when this node leaves the cluster, but * this handles the case where the node restarted so quickly that the * cluster layer didn't notice. * * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned), * ideally we'd skip this and sync our attributes from the writer. * However, currently we reject any values for us that the writer has, in * attrd_peer_update(). */ static void attrd_erase_attrs(void) { int call_id; char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", xpath); call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_quorum_override | cib_xpath); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, "attrd_erase_cb", attrd_erase_cb, free); } static int attrd_cib_connect(int max_retry) { static int attempts = 0; int rc = -ENOTCONN; the_cib = cib_new(); if (the_cib == NULL) { return -ENOTCONN; } do { if(attempts > 0) { sleep(attempts); } attempts++; crm_debug("Connection attempt %d to the CIB manager", attempts); rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); } while(rc != pcmk_ok && attempts < max_retry); if (rc != pcmk_ok) { crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); goto cleanup; } crm_debug("Connected to the CIB manager after %d attempts", attempts); rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); if(rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); goto cleanup; } return pcmk_ok; cleanup: cib__clean_up_connection(&the_cib); return -ENOTCONN; } /*! * \internal * \brief Prepare the CIB after cluster is connected */ static void attrd_cib_init(void) { // We have no attribute values in memory, wipe the CIB to match attrd_erase_attrs(); // Set a trigger for reading the CIB (for the alerts section) attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); // Always read the CIB at start-up mainloop_set_trigger(attrd_config_read); } static bool ipc_already_running(void) { pcmk_ipc_api_t *old_instance = NULL; int rc = pcmk_rc_ok; rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_attrd); if (rc != pcmk_rc_ok) { return false; } rc = pcmk_connect_ipc(old_instance, pcmk_ipc_dispatch_sync); if (rc != pcmk_rc_ok) { pcmk_free_ipc_api(old_instance); return false; } pcmk_disconnect_ipc(old_instance); pcmk_free_ipc_api(old_instance); return true; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { - return pcmk__build_arg_context(args, "text (default), xml", group, NULL); + GOptionContext *context = NULL; + + context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); + pcmk__add_main_args(context, entries); + return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; GError *error = NULL; bool initialized = false; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, &output_group); attrd_init_mainloop(); crm_log_preinit(NULL, argc, argv); mainloop_add_signal(SIGTERM, attrd_shutdown); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { attrd_exit_status = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { attrd_exit_status = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } initialized = true; crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_notice("Starting Pacemaker node attribute manager"); + crm_notice("Starting Pacemaker node attribute manager%s", + stand_alone ? " in standalone mode" : ""); if (ipc_already_running()) { crm_err("pacemaker-attrd is already active, aborting startup"); crm_exit(CRM_EX_OK); } attributes = pcmk__strkey_table(NULL, attrd_free_attribute); /* Connect to the CIB before connecting to the cluster or listening for IPC. * This allows us to assume the CIB is connected whenever we process a * cluster or IPC message (which also avoids start-up race conditions). */ if (attrd_cib_connect(30) != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; goto done; } crm_info("CIB connection active"); if (attrd_cluster_connect() != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; goto done; } crm_info("Cluster connection active"); // Initialization that requires the cluster to be connected attrd_election_init(); attrd_cib_init(); /* Set a private attribute for ourselves with the protocol version we * support. This lets all nodes determine the minimum supported version * across all nodes. It also ensures that the writer learns our node name, * so it can send our attributes to the CIB. */ attrd_broadcast_protocol(); attrd_init_ipc(); crm_notice("Pacemaker node attribute manager successfully started and accepting connections"); attrd_run_mainloop(); done: if (initialized) { crm_info("Shutting down attribute manager"); attrd_election_fini(); attrd_ipc_fini(); attrd_lrmd_disconnect(); attrd_cib_disconnect(); attrd_free_waitlist(); pcmk_cluster_free(attrd_cluster); g_hash_table_destroy(attributes); } g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(error, out); if (out != NULL) { out->finish(out, attrd_exit_status, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(attrd_exit_status); } diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index 2803cbf9f2..329fb5ad8c 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -1,214 +1,216 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #ifndef PACEMAKER_ATTRD__H # define PACEMAKER_ATTRD__H #include #include #include #include #include #include #include /* * Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used * with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned. * * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every * peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will * also set a private attribute for itself with its version, so any attrd can * determine the minimum version supported by all peers. * * Protocol Pacemaker Significant changes * -------- --------- ------------------- * 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only), * PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH, * PCMK__ATTRD_CMD_FLUSH, PCMK__ATTRD_CMD_SYNC, * PCMK__ATTRD_CMD_SYNC_RESPONSE * 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_PATTERN), * PCMK__ATTRD_CMD_QUERY * 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH, * PCMK__ATTRD_CMD_UPDATE_DELAY * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes * 4 2.1.5 Multiple attributes can be updated in a single IPC * message * 5 2.1.5 Peers can request confirmation of a sent message */ #define ATTRD_PROTOCOL_VERSION "5" #define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) #define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) #define attrd_send_ack(client, id, flags) \ pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) void attrd_init_mainloop(void); void attrd_run_mainloop(void); void attrd_set_requesting_shutdown(void); void attrd_clear_requesting_shutdown(void); void attrd_free_waitlist(void); bool attrd_requesting_shutdown(void); bool attrd_shutting_down(void); void attrd_shutdown(int nsig); void attrd_init_ipc(void); void attrd_ipc_fini(void); void attrd_cib_disconnect(void); bool attrd_value_needs_expansion(const char *value); int attrd_expand_value(const char *value, const char *old_value); /* regular expression to clear failures of all resources */ #define ATTRD_RE_CLEAR_ALL \ "^(" PCMK__FAIL_COUNT_PREFIX "|" PCMK__LAST_FAILURE_PREFIX ")-" /* regular expression to clear failure of all operations for one resource * (format takes resource name) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$" /* regular expression to clear failure of one operation for one resource * (format takes resource name, operation name, and interval) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%u)?$" int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, guint interval_ms); extern cib_t *the_cib; /* Alerts */ extern lrmd_t *the_lrmd; extern crm_trigger_t *attrd_config_read; void attrd_lrmd_disconnect(void); gboolean attrd_read_options(gpointer user_data); void attrd_cib_replaced_cb(const char *event, xmlNode * msg); void attrd_cib_updated_cb(const char *event, xmlNode *msg); int attrd_send_attribute_alert(const char *node, int nodeid, const char *attr, const char *value); // Elections void attrd_election_init(void); void attrd_election_fini(void); void attrd_start_election_if_needed(void); bool attrd_election_won(void); void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml); bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml); void attrd_declare_winner(void); void attrd_remove_voter(const crm_node_t *peer); void attrd_xml_add_writer(xmlNode *xml); typedef struct attribute_s { char *uuid; /* TODO: Remove if at all possible */ char *id; char *set_id; char *set_type; GHashTable *values; int update; int timeout_ms; /* TODO: refactor these three as a bitmask */ bool changed; /* whether attribute value has changed since last write */ bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ gboolean is_private; /* whether to keep this attribute out of the CIB */ mainloop_timer_t *timer; char *user; gboolean force_write; /* Flag for updating attribute by ignoring delay */ } attribute_t; typedef struct attribute_value_s { uint32_t nodeid; gboolean is_remote; char *nodename; char *current; char *requested; gboolean seen; } attribute_value_t; extern crm_cluster_t *attrd_cluster; extern GHashTable *attributes; extern GHashTable *peer_protocol_vers; #define CIB_OP_TIMEOUT_S 120 int attrd_cluster_connect(void); void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, bool filter); void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); void attrd_peer_remove(const char *host, bool uncache, const char *source); void attrd_peer_clear_failure(pcmk__request_t *request); void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml); void attrd_broadcast_protocol(void); xmlNode *attrd_client_peer_remove(pcmk__request_t *request); xmlNode *attrd_client_clear_failure(pcmk__request_t *request); xmlNode *attrd_client_update(pcmk__request_t *request); xmlNode *attrd_client_refresh(pcmk__request_t *request); xmlNode *attrd_client_query(pcmk__request_t *request); gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, const attribute_value_t *v, bool force_write); void attrd_clear_value_seen(void); void attrd_free_attribute(gpointer data); void attrd_free_attribute_value(gpointer data); attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); void attrd_write_attribute(attribute_t *a, bool ignore_delay); void attrd_write_attributes(bool all, bool ignore_delay); void attrd_write_or_elect_attribute(attribute_t *a); extern int minimum_protocol_version; void attrd_remove_peer_protocol_ver(const char *host); void attrd_update_minimum_protocol_ver(const char *host, const char *value); mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); void attrd_unregister_handlers(void); void attrd_handle_request(pcmk__request_t *request); enum attrd_sync_point { attrd_sync_point_local, attrd_sync_point_cluster, }; typedef int (*attrd_confirmation_action_fn)(xmlNode *); void attrd_add_client_to_waitlist(pcmk__request_t *request); void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); int attrd_cluster_sync_point_update(xmlNode *xml); void attrd_do_not_expect_from_peer(const char *host); void attrd_do_not_wait_for_client(pcmk__client_t *client); void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); void attrd_free_confirmations(void); void attrd_handle_confirmation(int callid, const char *host); void attrd_remove_client_from_waitlist(pcmk__client_t *client); const char *attrd_request_sync_point(xmlNode *xml); bool attrd_request_has_sync_point(xmlNode *xml); void attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest); +extern gboolean stand_alone; + #endif /* PACEMAKER_ATTRD__H */