diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c index 12bfc6845d..d0020d82b7 100644 --- a/daemons/controld/controld_attrd.c +++ b/daemons/controld/controld_attrd.c @@ -1,186 +1,208 @@ /* - * Copyright 2006-2020 the Pacemaker project contributors + * Copyright 2006-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include +#include +#include #include #include -static crm_ipc_t *attrd_ipc = NULL; +static pcmk_ipc_api_t *attrd_api = NULL; + +enum attrd_command { + cmd_clear, + cmd_purge, + cmd_update +}; void controld_close_attrd_ipc() { - if (attrd_ipc) { + if (attrd_api != NULL) { crm_trace("Closing connection to pacemaker-attrd"); - crm_ipc_close(attrd_ipc); - crm_ipc_destroy(attrd_ipc); - attrd_ipc = NULL; + pcmk_disconnect_ipc(attrd_api); + pcmk_free_ipc_api(attrd_api); + attrd_api = NULL; } } static void log_attrd_error(const char *host, const char *name, const char *value, gboolean is_remote, char command, int rc) { const char *node_type = (is_remote? "Pacemaker Remote" : "cluster"); gboolean shutting_down = pcmk_is_set(fsa_input_register, R_SHUTDOWN); const char *when = (shutting_down? " at shutdown" : ""); switch (command) { case 0: crm_err("Could not clear failure attributes for %s on %s node %s%s: %s " CRM_XS " rc=%d", (name? name : "all resources"), node_type, host, when, pcmk_rc_str(rc), rc); break; case 'C': crm_err("Could not purge %s node %s in attribute manager%s: %s " CRM_XS " rc=%d", node_type, host, when, pcmk_rc_str(rc), rc); break; case 'U': /* We weren't able to update an attribute after several retries, * so something is horribly wrong with the attribute manager or the * underlying system. */ do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR, "Could not update attribute %s=%s for %s node %s%s: %s " CRM_XS " rc=%d", name, value, node_type, host, when, pcmk_rc_str(rc), rc); if (AM_I_DC) { /* We are unable to provide accurate information to the * scheduler, so allow another node to take over DC. * @TODO Should we do this unconditionally on any failure? */ crmd_exit(CRM_EX_FATAL); } else if (shutting_down) { // Fast-track shutdown since unable to request via attribute register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL); } break; } } static void update_attrd_helper(const char *host, const char *name, const char *value, const char *interval_spec, const char *user_name, - gboolean is_remote_node, char command) + gboolean is_remote_node, enum attrd_command command) { int rc; int attrd_opts = pcmk__node_attr_none; if (is_remote_node) { pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); } - if (attrd_ipc == NULL) { - attrd_ipc = crm_ipc_new(T_ATTRD, 0); + if (attrd_api == NULL) { + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + + if (rc != pcmk_rc_ok) { + log_attrd_error(host, name, value, is_remote_node, command, rc); + return; + } } for (int attempt = 1; attempt <= 4; ++attempt) { rc = pcmk_rc_ok; // If we're not already connected, try to connect - if (crm_ipc_connected(attrd_ipc) == FALSE) { + if (!pcmk_ipc_is_connected(attrd_api)) { if (attempt == 1) { // Start with a clean slate - crm_ipc_close(attrd_ipc); - } - if (crm_ipc_connect(attrd_ipc) == FALSE) { - rc = errno; + pcmk_disconnect_ipc(attrd_api); } + + // Connect without a main loop, and with no callback either. + // We don't use any commands that expect a reply. + rc = pcmk_connect_ipc(attrd_api, pcmk_ipc_dispatch_sync); + crm_debug("Attribute manager connection attempt %d of 4: %s (%d)", attempt, pcmk_rc_str(rc), rc); } if (rc == pcmk_rc_ok) { - if (command) { - rc = pcmk__node_attr_request(attrd_ipc, command, host, name, - value, XML_CIB_TAG_STATUS, NULL, - NULL, user_name, attrd_opts); - } else { - /* No command means clear fail count (name/value is really - * resource/operation) - */ - rc = pcmk__node_attr_request_clear(attrd_ipc, host, name, - value, interval_spec, - user_name, attrd_opts); + switch (command) { + case cmd_clear: + /* name/value is really resource/operation */ + rc = pcmk__attrd_api_clear_failures(attrd_api, host, name, + value, interval_spec, + user_name, attrd_opts); + break; + + case cmd_update: + rc = pcmk__attrd_api_update(attrd_api, host, name, value, + NULL, NULL, user_name, + attrd_opts | pcmk__node_attr_value); + break; + + case cmd_purge: + rc = pcmk__attrd_api_purge(attrd_api, host); + break; } + crm_debug("Attribute manager request attempt %d of 4: %s (%d)", attempt, pcmk_rc_str(rc), rc); } if (rc == pcmk_rc_ok) { // Success, we're done break; } else if ((rc != EAGAIN) && (rc != EALREADY)) { /* EAGAIN or EALREADY indicates a temporary block, so just try * again. Otherwise, close the connection for a clean slate. */ - crm_ipc_close(attrd_ipc); + pcmk_disconnect_ipc(attrd_api); } /* @TODO If the attribute manager remains unavailable the entire time, * this function takes more than 6 seconds. Maybe set a timer for * retries, to let the main loop do other work. */ if (attempt < 4) { sleep(attempt); } } if (rc != pcmk_rc_ok) { log_attrd_error(host, name, value, is_remote_node, command, rc); } } void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node) { update_attrd_helper(host, name, value, NULL, user_name, is_remote_node, - 'U'); + cmd_update); } void update_attrd_remote_node_removed(const char *host, const char *user_name) { crm_trace("Asking attribute manager to purge Pacemaker Remote node %s", host); - update_attrd_helper(host, NULL, NULL, NULL, user_name, TRUE, 'C'); + update_attrd_helper(host, NULL, NULL, NULL, user_name, TRUE, cmd_purge); } void update_attrd_clear_failures(const char *host, const char *rsc, const char *op, const char *interval_spec, gboolean is_remote_node) { const char *op_desc = NULL; const char *interval_desc = NULL; const char *node_type = is_remote_node? "Pacemaker Remote" : "cluster"; if (op) { interval_desc = interval_spec? interval_spec : "nonrecurring"; op_desc = op; } else { interval_desc = "all"; op_desc = "operations"; } crm_info("Asking pacemaker-attrd to clear failure of %s %s for %s on %s node %s", interval_desc, op_desc, rsc, node_type, host); - update_attrd_helper(host, rsc, op, interval_spec, NULL, is_remote_node, 0); + update_attrd_helper(host, rsc, op, interval_spec, NULL, is_remote_node, cmd_clear); }