diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am index 776e4a7ee1..f29d1058ce 100644 --- a/include/crm/common/Makefile.am +++ b/include/crm/common/Makefile.am @@ -1,19 +1,19 @@ # # Copyright 2004-2020 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # MAINTAINERCLEANFILES = Makefile.in headerdir=$(pkgincludedir)/crm/common header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \ - nvpair.h acl.h + nvpair.h acl.h ipc_controld.h noinst_HEADERS = internal.h alerts_internal.h \ iso8601_internal.h remote_internal.h xml_internal.h \ ipc_internal.h output.h cmdline_internal.h curses_internal.h \ attrd_internal.h options_internal.h diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h index 23613a1000..f5ec562a6e 100644 --- a/include/crm/common/internal.h +++ b/include/crm/common/internal.h @@ -1,302 +1,310 @@ /* * Copyright 2015-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_COMMON_INTERNAL__H #define CRM_COMMON_INTERNAL__H #include // getpid() #include // bool #include // strcmp() #include // uid_t, gid_t, pid_t #include // guint, GList, GHashTable #include // xmlNode #include // crm_strdup_printf() +#include // mainloop_io_t, struct ipc_client_callbacks // Internal ACL-related utilities (from acl.c) char *pcmk__uid2username(uid_t uid); const char *pcmk__update_acl_user(xmlNode *request, const char *field, const char *peer_user); #if ENABLE_ACL # include static inline bool pcmk__is_privileged(const char *user) { return user && (!strcmp(user, CRM_DAEMON_USER) || !strcmp(user, "root")); } #endif #if SUPPORT_CIBSECRETS // Internal CIB utilities (from cib_secrets.c) */ int pcmk__substitute_secrets(const char *rsc_id, GHashTable *params); #endif /* internal digest-related utilities (from digest.c) */ bool pcmk__verify_digest(xmlNode *input, const char *expected); /* internal I/O utilities (from io.c) */ int pcmk__real_path(const char *path, char **resolved_path); char *pcmk__series_filename(const char *directory, const char *series, int sequence, bool bzip); int pcmk__read_series_sequence(const char *directory, const char *series, unsigned int *seq); void pcmk__write_series_sequence(const char *directory, const char *series, unsigned int sequence, int max); int pcmk__chown_series_sequence(const char *directory, const char *series, uid_t uid, gid_t gid); int pcmk__build_path(const char *path_c, mode_t mode); bool pcmk__daemon_can_write(const char *dir, const char *file); void pcmk__sync_directory(const char *name); int pcmk__file_contents(const char *filename, char **contents); int pcmk__write_sync(int fd, const char *contents); int pcmk__set_nonblocking(int fd); const char *pcmk__get_tmpdir(void); void pcmk__close_fds_in_child(bool); /*! * \internal * \brief Open /dev/null to consume next available file descriptor * * Open /dev/null, disregarding the result. This is intended when daemonizing to * be able to null stdin, stdout, and stderr. * * \param[in] flags O_RDONLY (stdin) or O_WRONLY (stdout and stderr) */ static inline void pcmk__open_devnull(int flags) { // Static analysis clutter // cppcheck-suppress leakReturnValNotUsed (void) open("/dev/null", flags); } /* internal logging utilities */ # define pcmk__config_err(fmt...) do { \ crm_config_error = TRUE; \ crm_err(fmt); \ } while (0) # define pcmk__config_warn(fmt...) do { \ crm_config_warning = TRUE; \ crm_warn(fmt); \ } while (0) /*! * \internal * \brief Execute code depending on whether message would be logged * * This is similar to do_crm_log_unlikely() except instead of logging, it either * continues past this statement or executes else_action depending on whether a * message of the given severity would be logged or not. This allows whole * blocks of code to be skipped if tracing or debugging is turned off. * * \param[in] level Severity at which to continue past this statement * \param[in] else_action Code block to execute if severity would not be logged * * \note else_action must not contain a break or continue statement */ # define pcmk__log_else(level, else_action) do { \ static struct qb_log_callsite *trace_cs = NULL; \ \ if (trace_cs == NULL) { \ trace_cs = qb_log_callsite_get(__func__, __FILE__, "log_else", \ level, __LINE__, 0); \ } \ if (!crm_is_callsite_active(trace_cs, level, 0)) { \ else_action; \ } \ } while(0) +/* internal main loop utilities (from mainloop.c) */ + +int pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata, + struct ipc_client_callbacks *callbacks, + mainloop_io_t **source); + + /* internal procfs utilities (from procfs.c) */ pid_t pcmk__procfs_pid_of(const char *name); unsigned int pcmk__procfs_num_cores(void); /* internal XML schema functions (from xml.c) */ void crm_schema_init(void); void crm_schema_cleanup(void); /* internal functions related to process IDs (from pid.c) */ /*! * \internal * \brief Check whether process exists (by PID and optionally executable path) * * \param[in] pid PID of process to check * \param[in] daemon If not NULL, path component to match with procfs entry * * \return Standard Pacemaker return code * \note Particular return codes of interest include pcmk_rc_ok for alive, * ESRCH for process is not alive (verified by kill and/or executable path * match), EACCES for caller unable or not allowed to check. A result of * "alive" is less reliable when \p daemon is not provided or procfs is * not available, since there is no guarantee that the PID has not been * recycled for another process. * \note This function cannot be used to verify \e authenticity of the process. */ int pcmk__pid_active(pid_t pid, const char *daemon); int pcmk__read_pidfile(const char *filename, pid_t *pid); int pcmk__pidfile_matches(const char *filename, pid_t expected_pid, const char *expected_name, pid_t *pid); int pcmk__lock_pidfile(const char *filename, const char *name); /* interal functions related to resource operations (from operations.c) */ // printf-style format to create operation ID from resource, action, interval #define PCMK__OP_FMT "%s_%s_%u" char *pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms); char *pcmk__notify_key(const char *rsc_id, const char *notify_type, const char *op_type); char *pcmk__transition_key(int transition_id, int action_id, int target_rc, const char *node); void pcmk__filter_op_for_digest(xmlNode *param_set); // miscellaneous utilities (from utils.c) const char *pcmk_message_name(const char *name); extern int pcmk__score_red; extern int pcmk__score_green; extern int pcmk__score_yellow; /* internal generic string functions (from strings.c) */ int pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val, guint *result); bool pcmk__starts_with(const char *str, const char *prefix); bool pcmk__ends_with(const char *s, const char *match); bool pcmk__ends_with_ext(const char *s, const char *match); char *pcmk__add_word(char *list, const char *word); int pcmk__compress(const char *data, unsigned int length, unsigned int max, char **result, unsigned int *result_len); int pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end); gboolean pcmk__str_in_list(GList *lst, const gchar *s); bool pcmk__str_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED; bool pcmk__str_none_of(const char *s, ...) G_GNUC_NULL_TERMINATED; /* Correctly displaying singular or plural is complicated; consider "1 node has" * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g. * * if (a == 1) { * crm_info("singular message"): * } else { * crm_info("plural message"); * } * * though even that's not sufficient for all languages besides English (if we * ever desire to do translations of output and log messages). But the following * convenience macros are "good enough" and more concise for many cases. */ /* Example: * crm_info("Found %d %s", nentries, * pcmk__plural_alt(nentries, "entry", "entries")); */ #define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2)) // Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes)); #define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s") static inline int pcmk__str_empty(const char *s) { return (s == NULL) || (s[0] == '\0'); } static inline char * pcmk__getpid_s(void) { return crm_strdup_printf("%lu", (unsigned long) getpid()); } // More efficient than g_list_length(list) == 1 static inline bool pcmk__list_of_1(GList *list) { return list && (list->next == NULL); } // More efficient than g_list_length(list) > 1 static inline bool pcmk__list_of_multiple(GList *list) { return list && (list->next != NULL); } /* convenience functions for failure-related node attributes */ #define PCMK__FAIL_COUNT_PREFIX "fail-count" #define PCMK__LAST_FAILURE_PREFIX "last-failure" /*! * \internal * \brief Generate a failure-related node attribute name for a resource * * \param[in] prefix Start of attribute name * \param[in] rsc_id Resource name * \param[in] op Operation name * \param[in] interval_ms Operation interval * * \return Newly allocated string with attribute name * * \note Failure attributes are named like PREFIX-RSC#OP_INTERVAL (for example, * "fail-count-myrsc#monitor_30000"). The '#' is used because it is not * a valid character in a resource ID, to reliably distinguish where the * operation name begins. The '_' is used simply to be more comparable to * action labels like "myrsc_monitor_30000". */ static inline char * pcmk__fail_attr_name(const char *prefix, const char *rsc_id, const char *op, guint interval_ms) { CRM_CHECK(prefix && rsc_id && op, return NULL); return crm_strdup_printf("%s-%s#%s_%u", prefix, rsc_id, op, interval_ms); } static inline char * pcmk__failcount_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__FAIL_COUNT_PREFIX, rsc_id, op, interval_ms); } static inline char * pcmk__lastfailure_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__LAST_FAILURE_PREFIX, rsc_id, op, interval_ms); } #endif /* CRM_COMMON_INTERNAL__H */ diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h index a0df956524..c67aaea4d4 100644 --- a/include/crm/common/ipc.h +++ b/include/crm/common/ipc.h @@ -1,137 +1,230 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_COMMON_IPC__H # define CRM_COMMON_IPC__H #ifdef __cplusplus extern "C" { #endif /** * \file - * \brief Wrappers for and extensions to libqb IPC + * \brief IPC interface to Pacemaker daemons + * * \ingroup core */ #include #include #include /* * Message creation utilities * * These are used for both IPC messages and cluster layer messages. However, * since this is public API, they stay in this header for backward * compatibility. */ #define create_reply(request, xml_response_data) \ create_reply_adv(request, xml_response_data, __FUNCTION__) xmlNode *create_reply_adv(xmlNode *request, xmlNode *xml_response_data, const char *origin); #define create_request(task, xml_data, host_to, sys_to, sys_from, uuid_from) \ create_request_adv(task, xml_data, host_to, sys_to, sys_from, uuid_from, \ __FUNCTION__) xmlNode *create_request_adv(const char *task, xmlNode *xml_data, const char *host_to, const char *sys_to, const char *sys_from, const char *uuid_from, const char *origin); +/* + * The library supports two methods of creating IPC connections. The older code + * allows connecting to any arbitrary IPC name. The newer code only allows + * connecting to one of the Pacemaker daemons. + * + * As daemons are converted to use the new model, the old functions should be + * considered deprecated for use with those daemons. Once all daemons are + * converted, the old functions should be officially deprecated as public API + * and eventually made internal API. + */ + +/* + * Pacemaker daemon IPC + */ + +//! Available IPC interfaces +enum pcmk_ipc_server { + pcmk_ipc_attrd, //!< Attribute manager + pcmk_ipc_based, //!< CIB manager + pcmk_ipc_controld, //!< Controller + pcmk_ipc_execd, //!< Executor + pcmk_ipc_fenced, //!< Fencer + pcmk_ipc_pacemakerd, //!< Launcher + pcmk_ipc_schedulerd, //!< Scheduler +}; + +//! Possible event types that an IPC event callback can be called for +enum pcmk_ipc_event { + pcmk_ipc_event_connect, //!< Result of asynchronous connection attempt + pcmk_ipc_event_disconnect, //!< Termination of IPC connection + pcmk_ipc_event_reply, //!< Daemon's reply to client IPC request + pcmk_ipc_event_notify, //!< Notification from daemon +}; + +//! How IPC replies should be dispatched +enum pcmk_ipc_dispatch { + pcmk_ipc_dispatch_main, //!< Attach IPC to GMainLoop for dispatch + pcmk_ipc_dispatch_poll, //!< Caller will poll and dispatch IPC + pcmk_ipc_dispatch_sync, //!< Sending a command will wait for any reply +}; + +//! Client connection to Pacemaker IPC +typedef struct pcmk_ipc_api_s pcmk_ipc_api_t; + +/*! + * \brief Callback function type for Pacemaker daemon IPC APIs + * + * \param[in] api IPC API connection + * \param[in] event_type The type of event that occurred + * \param[in] status Event status + * \param[in] event_data Event-specific data + * \param[in] user_data Caller data provided when callback was registered + * + * \note For connection and disconnection events, event_data may be NULL (for + * local IPC) or the name of the connected node (for remote IPC, for + * daemons that support that). For reply and notify events, event_data is + * defined by the specific daemon API. + */ +typedef void (*pcmk_ipc_callback_t)(pcmk_ipc_api_t *api, + enum pcmk_ipc_event event_type, + crm_exit_t status, + void *event_data, void *user_data); + +int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server); + +void pcmk_free_ipc_api(pcmk_ipc_api_t *api); + +int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type); + +void pcmk_disconnect_ipc(pcmk_ipc_api_t *api); + +int pcmk_poll_ipc(pcmk_ipc_api_t *api, int timeout_ms); + +void pcmk_dispatch_ipc(pcmk_ipc_api_t *api); + +void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, + void *user_data); + +const char *pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log); + +bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api); + +int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, + uint32_t nodeid); + + +/* + * Generic IPC API (to eventually be deprecated as public API and made internal) + */ + /* *INDENT-OFF* */ enum crm_ipc_flags { crm_ipc_flags_none = 0x00000000, crm_ipc_compressed = 0x00000001, /* Message has been compressed */ crm_ipc_proxied = 0x00000100, /* _ALL_ replies to proxied connections need to be sent as events */ crm_ipc_client_response = 0x00000200, /* A Response is expected in reply */ - // These are options only for pcmk__ipc_send_iov() + // These are options for Pacemaker's internal use only (pcmk__ipc_send_*()) crm_ipc_server_event = 0x00010000, /* Send an Event instead of a Response */ crm_ipc_server_free = 0x00020000, /* Free the iovec after sending */ crm_ipc_proxied_relay_response = 0x00040000, /* all replies to proxied connections are sent as events, this flag preserves whether the event should be treated as an actual event, or a response.*/ crm_ipc_server_info = 0x00100000, /* Log failures as LOG_INFO */ crm_ipc_server_error = 0x00200000, /* Log failures as LOG_ERR */ }; /* *INDENT-ON* */ typedef struct crm_ipc_s crm_ipc_t; crm_ipc_t *crm_ipc_new(const char *name, size_t max_size); bool crm_ipc_connect(crm_ipc_t * client); void crm_ipc_close(crm_ipc_t * client); void crm_ipc_destroy(crm_ipc_t * client); void pcmk_free_ipc_event(struct iovec *event); int crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode ** reply); int crm_ipc_get_fd(crm_ipc_t * client); bool crm_ipc_connected(crm_ipc_t * client); int crm_ipc_ready(crm_ipc_t * client); long crm_ipc_read(crm_ipc_t * client); const char *crm_ipc_buffer(crm_ipc_t * client); uint32_t crm_ipc_buffer_flags(crm_ipc_t * client); const char *crm_ipc_name(crm_ipc_t * client); unsigned int crm_ipc_default_buffer_size(void); /*! * \brief Check the authenticity of the IPC socket peer process * * If everything goes well, peer's authenticity is verified by the means * of comparing against provided referential UID and GID (either satisfies), * and the result of this check can be deduced from the return value. * As an exception, detected UID of 0 ("root") satisfies arbitrary * provided referential daemon's credentials. * * \param[in] sock IPC related, connected Unix socket to check peer of * \param[in] refuid referential UID to check against * \param[in] refgid referential GID to check against * \param[out] gotpid to optionally store obtained PID of the peer * (not available on FreeBSD, special value of 1 * used instead, and the caller is required to * special case this value respectively) * \param[out] gotuid to optionally store obtained UID of the peer * \param[out] gotgid to optionally store obtained GID of the peer * * \return 0 if IPC related socket's peer is not authentic given the * referential credentials (see above), 1 if it is, * negative value on error (generally expressing -errno unless * it was zero even on nonhappy path, -pcmk_err_generic is * returned then; no message is directly emitted) * * \note While this function is tolerant on what constitutes authorized * IPC daemon process (its effective user matches UID=0 or \p refuid, * or at least its group matches \p refgid), either or both (in case * of UID=0) mismatches on the expected credentials of such peer * process \e shall be investigated at the caller when value of 1 * gets returned there, since higher-than-expected privileges in * respect to the expected/intended credentials possibly violate * the least privilege principle and may pose an additional risk * (i.e. such accidental inconsistency shall be eventually fixed). */ int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); -/* Utils */ +/* This is controller-specific but is declared in this header for C API + * backward compatibility. + */ xmlNode *create_hello_message(const char *uuid, const char *client_name, const char *major_version, const char *minor_version); #ifdef __cplusplus } #endif #endif diff --git a/include/crm/common/ipc_controld.h b/include/crm/common/ipc_controld.h new file mode 100644 index 0000000000..0ebabfcfb0 --- /dev/null +++ b/include/crm/common/ipc_controld.h @@ -0,0 +1,99 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PCMK__IPC_CONTROLD__H +# define PCMK__IPC_CONTROLD__H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \file + * \brief IPC commands for Pacemaker controller + * + * \ingroup core + */ + +#include // bool +#include // xmlNode +#include // pcmk_ipc_api_t + +//! Possible types of controller replies +enum pcmk_controld_api_reply { + pcmk_controld_reply_unknown, + pcmk_controld_reply_reprobe, + pcmk_controld_reply_info, + pcmk_controld_reply_resource, + pcmk_controld_reply_ping, +}; + +/*! + * Controller reply passed to event callback + * + * \note Shutdown and election calls have no reply. Reprobe calls are + * acknowledged but contain no data (reply_type will be the only item + * set). Node info and ping calls have their own reply data. Fail and + * refresh calls use the resource reply type and reply data. + * \note The pointers in the reply are only guaranteed to be meaningful for the + * execution of the callback; if the values are needed for later, the + * callback should copy them. + */ +typedef struct { + enum pcmk_controld_api_reply reply_type; + const char *feature_set; //!< CRM feature set advertised by controller + const char *host_from; //!< Name of node that sent reply + + union { + // pcmk_controld_reply_info + struct { + bool have_quorum; + bool is_remote; + int id; + const char *uuid; + const char *uname; + const char *state; + } node_info; + + // pcmk_controld_reply_resource + struct { + xmlNode *node_state; // // uint8_t, uint32_t #include // bool #include // size_t #include // GList #include // xmlNode, xmlAttr #include // struct qb_ipc_response_header /* * XML and ACLs */ enum xml_private_flags { xpf_none = 0x0000, xpf_dirty = 0x0001, xpf_deleted = 0x0002, xpf_created = 0x0004, xpf_modified = 0x0008, xpf_tracking = 0x0010, xpf_processed = 0x0020, xpf_skip = 0x0040, xpf_moved = 0x0080, xpf_acl_enabled = 0x0100, xpf_acl_read = 0x0200, xpf_acl_write = 0x0400, xpf_acl_deny = 0x0800, xpf_acl_create = 0x1000, xpf_acl_denied = 0x2000, xpf_lazy = 0x4000, }; typedef struct xml_private_s { long check; uint32_t flags; char *user; GList *acls; GList *deleted_objs; } xml_private_t; G_GNUC_INTERNAL void pcmk__set_xml_flag(xmlNode *xml, enum xml_private_flags flag); G_GNUC_INTERNAL bool pcmk__tracking_xml_changes(xmlNode *xml, bool lazy); G_GNUC_INTERNAL int pcmk__element_xpath(const char *prefix, xmlNode *xml, char *buffer, int offset, size_t buffer_size); G_GNUC_INTERNAL void pcmk__free_acls(GList *acls); G_GNUC_INTERNAL void pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user); G_GNUC_INTERNAL bool pcmk__check_acl(xmlNode *xml, const char *name, enum xml_private_flags mode); G_GNUC_INTERNAL void pcmk__apply_acl(xmlNode *xml); G_GNUC_INTERNAL void pcmk__apply_creation_acl(xmlNode *xml, bool check_top); G_GNUC_INTERNAL void pcmk__mark_xml_attr_dirty(xmlAttr *a); static inline xmlAttr * pcmk__first_xml_attr(const xmlNode *xml) { return xml? xml->properties : NULL; } static inline const char * pcmk__xml_attr_value(const xmlAttr *attr) { return ((attr == NULL) || (attr->children == NULL))? NULL : (const char *) attr->children->content; } /* * IPC */ #define PCMK__IPC_VERSION 1 +#define PCMK__CONTROLD_API_MAJOR "1" +#define PCMK__CONTROLD_API_MINOR "0" + +// IPC behavior that varies by daemon +typedef struct pcmk__ipc_methods_s { + /*! + * \internal + * \brief Allocate any private data needed by daemon IPC + * + * \param[in] api IPC API connection + * + * \return Standard Pacemaker return code + */ + int (*new_data)(pcmk_ipc_api_t *api); + + /*! + * \internal + * \brief Free any private data used by daemon IPC + * + * \param[in] api_data Data allocated by new_data() method + */ + void (*free_data)(void *api_data); + + /*! + * \internal + * \brief Perform daemon-specific handling after successful connection + * + * Some daemons require clients to register before sending any other + * commands. The controller requires a CRM_OP_HELLO (with no reply), and + * the CIB manager, executor, and fencer require a CRM_OP_REGISTER (with a + * reply). Ideally this would be consistent across all daemons, but for now + * this allows each to do its own authorization. + * + * \param[in] api IPC API connection + * + * \return Standard Pacemaker return code + */ + int (*post_connect)(pcmk_ipc_api_t *api); + + /*! + * \internal + * \brief Check whether an IPC request results in a reply + * + * \parma[in] api IPC API connection + * \param[in] request IPC request XML + * + * \return true if request would result in an IPC reply, false otherwise + */ + bool (*reply_expected)(pcmk_ipc_api_t *api, xmlNode *request); + + /*! + * \internal + * \brief Perform daemon-specific handling of an IPC message + * + * \param[in] api IPC API connection + * \param[in] msg Message read from IPC connection + */ + void (*dispatch)(pcmk_ipc_api_t *api, xmlNode *msg); + + /*! + * \internal + * \brief Perform daemon-specific handling of an IPC disconnect + * + * \param[in] api IPC API connection + */ + void (*post_disconnect)(pcmk_ipc_api_t *api); +} pcmk__ipc_methods_t; + +// Implementation of pcmk_ipc_api_t +struct pcmk_ipc_api_s { + enum pcmk_ipc_server server; // Daemon this IPC API instance is for + enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched + crm_ipc_t *ipc; // IPC connection + mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC + bool free_on_disconnect; // Whether disconnect should free object + pcmk_ipc_callback_t cb; // Caller-registered callback (if any) + void *user_data; // Caller-registered data (if any) + void *api_data; // For daemon-specific use + pcmk__ipc_methods_t *cmds; // Behavior that varies by daemon +}; + typedef struct pcmk__ipc_header_s { struct qb_ipc_response_header qb; uint32_t size_uncompressed; uint32_t size_compressed; uint32_t flags; uint8_t version; } pcmk__ipc_header_t; +G_GNUC_INTERNAL +int pcmk__send_ipc_request(pcmk_ipc_api_t *api, xmlNode *request); + +G_GNUC_INTERNAL +void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, + enum pcmk_ipc_event event_type, + crm_exit_t status, void *event_data); + G_GNUC_INTERNAL unsigned int pcmk__ipc_buffer_size(unsigned int max); G_GNUC_INTERNAL bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header); +G_GNUC_INTERNAL +pcmk__ipc_methods_t *pcmk__controld_api_methods(void); + #endif // CRMCOMMON_PRIVATE__H diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index 773758865c..4077d615fd 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -1,755 +1,1416 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED) # ifdef US_AUTH_PEERCRED_UCRED # ifndef _GNU_SOURCE # define _GNU_SOURCE # endif # endif # include #elif defined(US_AUTH_GETPEERUCRED) # include #endif #include #include #include #include #include /* indirectly: pcmk_err_generic */ #include #include #include #include "crmcommon_private.h" +/*! + * \brief Create a new object for using Pacemaker daemon IPC + * + * \param[out] api Where to store new IPC object + * \param[in] server Which Pacemaker daemon the object is for + * + * \return Standard Pacemaker result code + * + * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). + * \note This is intended to supersede crm_ipc_new() but currently only + * supports the controller IPC API. + */ +int +pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) +{ + size_t max_size = 0; + + if (api == NULL) { + return EINVAL; + } + + *api = calloc(1, sizeof(pcmk_ipc_api_t)); + if (*api == NULL) { + return errno; + } + + (*api)->server = server; + if (pcmk_ipc_name(*api, false) == NULL) { + pcmk_free_ipc_api(*api); + *api = NULL; + return EOPNOTSUPP; + } + + // Set server methods and max_size (if not default) + switch (server) { + case pcmk_ipc_attrd: + break; + + case pcmk_ipc_based: + max_size = 512 * 1024; // 512KB + break; + + case pcmk_ipc_controld: + (*api)->cmds = pcmk__controld_api_methods(); + break; + + case pcmk_ipc_execd: + break; + + case pcmk_ipc_fenced: + break; + + case pcmk_ipc_pacemakerd: + break; + + case pcmk_ipc_schedulerd: + // @TODO max_size could vary by client, maybe take as argument? + max_size = 5 * 1024 * 1024; // 5MB + break; + } + if ((*api)->cmds == NULL) { + pcmk_free_ipc_api(*api); + *api = NULL; + return ENOMEM; + } + + (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), max_size); + if ((*api)->ipc == NULL) { + pcmk_free_ipc_api(*api); + *api = NULL; + return ENOMEM; + } + + // If daemon API has its own data to track, allocate it + if ((*api)->cmds->new_data != NULL) { + if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) { + pcmk_free_ipc_api(*api); + *api = NULL; + return ENOMEM; + } + } + crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true)); + return pcmk_rc_ok; +} + +static void +free_daemon_specific_data(pcmk_ipc_api_t *api) +{ + if ((api != NULL) && (api->cmds != NULL)) { + if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) { + api->cmds->free_data(api->api_data); + api->api_data = NULL; + } + free(api->cmds); + api->cmds = NULL; + } +} + +/*! + * \internal + * \brief Call an IPC API event callback, if one is registed + * + * \param[in] api IPC API connection + * \param[in] event_type The type of event that occurred + * \param[in] status Event status + * \param[in] event_data Event-specific data + */ +void +pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, + crm_exit_t status, void *event_data) +{ + if ((api != NULL) && (api->cb != NULL)) { + api->cb(api, event_type, status, event_data, api->user_data); + } +} + +/*! + * \internal + * \brief Clean up after an IPC disconnect + * + * \param[in] user_data IPC API connection that disconnected + * + * \note This function can be used as a main loop IPC destroy callback. + */ +static void +ipc_post_disconnect(gpointer user_data) +{ + pcmk_ipc_api_t *api = user_data; + + crm_info("Disconnected from %s IPC API", pcmk_ipc_name(api, true)); + + // Perform any daemon-specific handling needed + if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) { + api->cmds->post_disconnect(api); + } + + // Call client's registered event callback + pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT, + NULL); + + /* If this is being called from a running main loop, mainloop_gio_destroy() + * will free ipc and mainloop_io immediately after calling this function. + * If this is called from a stopped main loop, these will leak, so the best + * practice is to close the connection before stopping the main loop. + */ + api->ipc = NULL; + api->mainloop_io = NULL; + + if (api->free_on_disconnect) { + /* pcmk_free_ipc_api() has already been called, but did not free api + * or api->cmds because this function needed them. Do that now. + */ + free_daemon_specific_data(api); + crm_trace("Freeing IPC API object after disconnect"); + free(api); + } +} + +/*! + * \brief Free the contents of an IPC API object + * + * \param[in] api IPC API object to free + */ +void +pcmk_free_ipc_api(pcmk_ipc_api_t *api) +{ + bool free_on_disconnect = false; + + if (api == NULL) { + return; + } + crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true)); + + if (api->ipc != NULL) { + if (api->mainloop_io != NULL) { + /* We need to keep the api pointer itself around, because it is the + * user data for the IPC client destroy callback. That will be + * triggered by the pcmk_disconnect_ipc() call below, but it might + * happen later in the main loop (if still running). + * + * This flag tells the destroy callback to free the object. It can't + * do that unconditionally, because the application might call this + * function after a disconnect that happened by other means. + */ + free_on_disconnect = api->free_on_disconnect = true; + } + pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true + } + if (!free_on_disconnect) { + free_daemon_specific_data(api); + crm_trace("Freeing IPC API object"); + free(api); + } +} + +/*! + * \brief Get the IPC name used with an IPC API connection + * + * \param[in] api IPC API connection + * \param[in] for_log If true, return human-friendly name instead of IPC name + * + * \return IPC API's human-friendly or connection name, or if none is available, + * "Pacemaker" if for_log is true and NULL if for_log is false + */ +const char * +pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log) +{ + if (api == NULL) { + return for_log? "Pacemaker" : NULL; + } + switch (api->server) { + case pcmk_ipc_attrd: + return for_log? "attribute manager" : NULL /* T_ATTRD */; + + case pcmk_ipc_based: + return for_log? "CIB manager" : NULL /* PCMK__SERVER_BASED_RW */; + + case pcmk_ipc_controld: + return for_log? "controller" : CRM_SYSTEM_CRMD; + + case pcmk_ipc_execd: + return for_log? "executor" : NULL /* CRM_SYSTEM_LRMD */; + + case pcmk_ipc_fenced: + return for_log? "fencer" : NULL /* "stonith-ng" */; + + case pcmk_ipc_pacemakerd: + return for_log? "launcher" : NULL /* CRM_SYSTEM_MCP */; + + case pcmk_ipc_schedulerd: + return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */; + + default: + return for_log? "Pacemaker" : NULL; + } +} + +/*! + * \brief Check whether an IPC API connection is active + * + * \param[in] api IPC API connection + * + * \return true if IPC is connected, false otherwise + */ +bool +pcmk_ipc_is_connected(pcmk_ipc_api_t *api) +{ + return (api != NULL) && crm_ipc_connected(api->ipc); +} + +/*! + * \internal + * \brief Call the daemon-specific API's dispatch function + * + * Perform daemon-specific handling of IPC reply dispatch. It is the daemon + * method's responsibility to call the client's registered event callback, as + * well as allocate and free any event data. + * + * \param[in] api IPC API connection + */ +static void +call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message) +{ + crm_log_xml_trace(message, "ipc-received"); + if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) { + api->cmds->dispatch(api, message); + } +} + +/*! + * \internal + * \brief Dispatch data read from IPC source + * + * \param[in] buffer Data read from IPC + * \param[in] length Number of bytes of data in buffer (ignored) + * \param[in] user_data IPC object + * + * \return Always 0 (meaning connection is still required) + * + * \note This function can be used as a main loop IPC dispatch callback. + */ +static int +dispatch_ipc_data(const char *buffer, ssize_t length, gpointer user_data) +{ + pcmk_ipc_api_t *api = user_data; + xmlNode *msg; + + CRM_CHECK(api != NULL, return 0); + + if (buffer == NULL) { + crm_warn("Empty message received from %s IPC", + pcmk_ipc_name(api, true)); + return 0; + } + + msg = string2xml(buffer); + if (msg == NULL) { + crm_warn("Malformed message received from %s IPC", + pcmk_ipc_name(api, true)); + return 0; + } + call_api_dispatch(api, msg); + free_xml(msg); + return 0; +} + +/*! + * \brief Check whether an IPC connection has data available (without main loop) + * + * \param[in] api IPC API connection + * \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once + * and return immediately; otherwise, poll for up to + * this many milliseconds + * + * \return Standard Pacemaker return code + * + * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call + * this function to check whether IPC data is available. Return values of + * interest include pcmk_rc_ok meaning data is available, and EAGAIN + * meaning no data is available; all other values indicate errors. + * \todo This does not allow the caller to poll multiple file descriptors at + * once. If there is demand for that, we could add a wrapper for + * crm_ipc_get_fd(api->ipc), so the caller can call poll() themselves. + */ +int +pcmk_poll_ipc(pcmk_ipc_api_t *api, int timeout_ms) +{ + int rc; + struct pollfd pollfd = { 0, }; + + if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) { + return EINVAL; + } + pollfd.fd = crm_ipc_get_fd(api->ipc); + pollfd.events = POLLIN; + rc = poll(&pollfd, 1, timeout_ms); + if (rc < 0) { + return errno; + } else if (rc == 0) { + return EAGAIN; + } + return pcmk_rc_ok; +} + +/*! + * \brief Dispatch available messages on an IPC connection (without main loop) + * + * \param[in] api IPC API connection + * + * \return Standard Pacemaker return code + * + * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call + * this function when IPC data is available. + */ +void +pcmk_dispatch_ipc(pcmk_ipc_api_t *api) +{ + if (api == NULL) { + return; + } + while (crm_ipc_ready(api->ipc)) { + if (crm_ipc_read(api->ipc) > 0) { + dispatch_ipc_data(crm_ipc_buffer(api->ipc), 0, api); + } + } +} + +// \return Standard Pacemaker return code +static int +connect_with_main_loop(pcmk_ipc_api_t *api) +{ + int rc; + + struct ipc_client_callbacks callbacks = { + .dispatch = dispatch_ipc_data, + .destroy = ipc_post_disconnect, + }; + + rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api, + &callbacks, &(api->mainloop_io)); + if (rc != pcmk_rc_ok) { + return rc; + } + crm_debug("Connected to %s IPC (attached to main loop)", + pcmk_ipc_name(api, true)); + /* After this point, api->mainloop_io owns api->ipc, so api->ipc + * should not be explicitly freed. + */ + return pcmk_rc_ok; +} + +// \return Standard Pacemaker return code +static int +connect_without_main_loop(pcmk_ipc_api_t *api) +{ + int rc; + + if (!crm_ipc_connect(api->ipc)) { + rc = errno; + crm_ipc_close(api->ipc); + return rc; + } + crm_debug("Connected to %s IPC (without main loop)", + pcmk_ipc_name(api, true)); + return pcmk_rc_ok; +} + +/*! + * \brief Connect to a Pacemaker daemon via IPC + * + * \param[in] api IPC API instance + * \param[out] dispatch_type How IPC replies should be dispatched + * + * \return Standard Pacemaker return code + */ +int +pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) +{ + int rc = pcmk_rc_ok; + + if ((api == NULL) || (api->ipc == NULL)) { + crm_err("Cannot connect to uninitialized API object"); + return EINVAL; + } + + if (crm_ipc_connected(api->ipc)) { + crm_trace("Already connected to %s IPC API", pcmk_ipc_name(api, true)); + return pcmk_rc_ok; + } + + api->dispatch_type = dispatch_type; + switch (dispatch_type) { + case pcmk_ipc_dispatch_main: + rc = connect_with_main_loop(api); + break; + + case pcmk_ipc_dispatch_sync: + case pcmk_ipc_dispatch_poll: + rc = connect_without_main_loop(api); + break; + } + if (rc != pcmk_rc_ok) { + return rc; + } + + if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) { + rc = api->cmds->post_connect(api); + if (rc != pcmk_rc_ok) { + crm_ipc_close(api->ipc); + } + } + return rc; +} + +/*! + * \brief Disconnect an IPC API instance + * + * \param[in] api IPC API connection + * + * \return Standard Pacemaker return code + * + * \note If the connection is attached to a main loop, this function should be + * called before quitting the main loop, to ensure that all memory is + * freed. + */ +void +pcmk_disconnect_ipc(pcmk_ipc_api_t *api) +{ + if ((api == NULL) || (api->ipc == NULL)) { + return; + } + switch (api->dispatch_type) { + case pcmk_ipc_dispatch_main: + { + mainloop_io_t *mainloop_io = api->mainloop_io; + + // Make sure no code with access to api can use these again + api->mainloop_io = NULL; + api->ipc = NULL; + + mainloop_del_ipc_client(mainloop_io); + // After this point api might have already been freed + } + break; + + case pcmk_ipc_dispatch_poll: + case pcmk_ipc_dispatch_sync: + { + crm_ipc_t *ipc = api->ipc; + + // Make sure no code with access to api can use ipc again + api->ipc = NULL; + + // This should always be the case already, but to be safe + api->free_on_disconnect = false; + + crm_ipc_destroy(ipc); + ipc_post_disconnect(api); + } + break; + } +} + +/*! + * \brief Register a callback for IPC API events + * + * \param[in] api IPC API connection + * \param[in] callback Callback to register + * \param[in] userdata Caller data to pass to callback + * + * \note This function may be called multiple times to update the callback + * and/or user data. The caller remains responsible for freeing + * userdata in any case (after the IPC is disconnected, if the + * user data is still registered with the IPC). + */ +void +pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, + void *user_data) +{ + if (api == NULL) { + return; + } + api->cb = cb; + api->user_data = user_data; +} + +/*! + * \internal + * \brief Send an XML request across an IPC API connection + * + * \param[in] api IPC API connection + * \param[in] request XML request to send + * + * \return Standard Pacemaker return code + * + * \note Daemon-specific IPC API functions should call this function to send + * requests, because it handles different dispatch types appropriately. + */ +int +pcmk__send_ipc_request(pcmk_ipc_api_t *api, xmlNode *request) +{ + int rc; + xmlNode *reply = NULL; + enum crm_ipc_flags flags = crm_ipc_flags_none; + + if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) { + return EINVAL; + } + crm_log_xml_trace(request, "ipc-sent"); + + // Synchronous dispatch requires waiting for a reply + if ((api->dispatch_type == pcmk_ipc_dispatch_sync) + && (api->cmds != NULL) + && (api->cmds->reply_expected != NULL) + && (api->cmds->reply_expected(api, request))) { + flags = crm_ipc_client_response; + } + + // The 0 here means a default timeout of 5 seconds + rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); + + if (rc < 0) { + return pcmk_legacy2rc(rc); + } else if (rc == 0) { + return ENODATA; + } + + // With synchronous dispatch, we dispatch any reply now + if (reply != NULL) { + call_api_dispatch(api, reply); + free_xml(reply); + } + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Create the XML for an IPC request to purge a node from the peer cache + * + * \param[in] api IPC API connection + * \param[in] node_name If not NULL, name of node to purge + * \param[in] nodeid If not 0, node ID of node to purge + * + * \return Newly allocated IPC request XML + * + * \note The controller, fencer, and pacemakerd use the same request syntax, but + * the attribute manager uses a different one. The CIB manager doesn't + * have any syntax for it. The executor and scheduler don't connect to the + * cluster layer and thus don't have or need any syntax for it. + * + * \todo Modify the attribute manager to accept the common syntax (as well + * as its current one, for compatibility with older clients). Modify + * the CIB manager to accept and honor the common syntax. Modify the + * executor and scheduler to accept the syntax (immediately returning + * success), just for consistency. Modify this function to use the + * common syntax with all daemons if their version supports it. + */ +static xmlNode * +create_purge_node_request(pcmk_ipc_api_t *api, const char *node_name, + uint32_t nodeid) +{ + xmlNode *request = NULL; + const char *client = crm_system_name? crm_system_name : "client"; + + switch (api->server) { + case pcmk_ipc_attrd: + request = create_xml_node(NULL, __FUNCTION__); + crm_xml_add(request, F_TYPE, T_ATTRD); + crm_xml_add(request, F_ORIG, crm_system_name); + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); + crm_xml_add(request, PCMK__XA_ATTR_NODE_NAME, node_name); + if (nodeid > 0) { + crm_xml_add_int(request, PCMK__XA_ATTR_NODE_ID, (int) nodeid); + } + break; + + case pcmk_ipc_controld: + case pcmk_ipc_fenced: + case pcmk_ipc_pacemakerd: + request = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, + pcmk_ipc_name(api, false), client, NULL); + if (nodeid > 0) { + crm_xml_set_id(request, "%lu", (unsigned long) nodeid); + } + crm_xml_add(request, XML_ATTR_UNAME, node_name); + break; + + case pcmk_ipc_based: + case pcmk_ipc_execd: + case pcmk_ipc_schedulerd: + break; + } + return request; +} + +/*! + * \brief Ask a Pacemaker daemon to purge a node from its peer cache + * + * \param[in] api IPC API connection + * \param[in] node_name If not NULL, name of node to purge + * \param[in] nodeid If not 0, node ID of node to purge + * + * \return Standard Pacemaker return code + * + * \note At least one of node_name or nodeid must be specified. + */ +int +pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) +{ + int rc = 0; + xmlNode *request = NULL; + + if (api == NULL) { + return EINVAL; + } + if ((node_name == NULL) && (nodeid == 0)) { + return EINVAL; + } + + request = create_purge_node_request(api, node_name, nodeid); + if (request == NULL) { + return EOPNOTSUPP; + } + rc = pcmk__send_ipc_request(api, request); + free_xml(request); + + crm_debug("%s peer cache purge of node %s[%lu]: rc=%d", + pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc); + return rc; +} + +/* + * Generic IPC API (to eventually be deprecated as public API and made internal) + */ + struct crm_ipc_s { struct pollfd pfd; unsigned int max_buf_size; // maximum bytes we can send or receive over IPC unsigned int buf_size; // size of allocated buffer int msg_size; int need_reply; char *buffer; char *name; qb_ipcc_connection_t *ipc; }; +/*! + * \brief Create a new (legacy) object for using Pacemaker daemon IPC + * + * \param[in] name IPC system name to connect to + * \param[in] max_size Use a maximum IPC buffer size of at least this size + * + * \return Newly allocated IPC object on success, NULL otherwise + * + * \note The caller is responsible for freeing the result using + * crm_ipc_destroy(). + * \note This should be considered deprecated for use with daemons supported by + * pcmk_new_ipc_api(). + */ crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); + if (client == NULL) { + crm_err("Could not create IPC connection: %s", strerror(errno)); + return NULL; + } client->name = strdup(name); + if (client->name == NULL) { + crm_err("Could not create IPC connection: %s", strerror(errno)); + free(client); + return NULL; + } client->buf_size = pcmk__ipc_buffer_size(max_size); client->buffer = malloc(client->buf_size); + if (client->buffer == NULL) { + crm_err("Could not create IPC connection: %s", strerror(errno)); + free(client->name); + free(client); + return NULL; + } /* Clients initiating connection pick the max buf size */ client->max_buf_size = client->buf_size; client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \brief Establish an IPC connection to a Pacemaker component * * \param[in] client Connection instance obtained from crm_ipc_new() * * \return TRUE on success, FALSE otherwise (in which case errno will be set; * specifically, in case of discovering the remote side is not * authentic, its value is set to ECONNABORTED). */ bool crm_ipc_connect(crm_ipc_t * client) { uid_t cl_uid = 0; gid_t cl_gid = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; int rv; client->need_reply = FALSE; client->ipc = qb_ipcc_connect(client->name, client->buf_size); if (client->ipc == NULL) { crm_debug("Could not establish %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno); return FALSE; } client->pfd.fd = crm_ipc_get_fd(client); if (client->pfd.fd < 0) { rv = errno; /* message already omitted */ crm_ipc_close(client); errno = rv; return FALSE; } rv = pcmk_daemon_user(&cl_uid, &cl_gid); if (rv < 0) { /* message already omitted */ crm_ipc_close(client); errno = -rv; return FALSE; } if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid, &found_pid, &found_uid, &found_gid))) { crm_err("Daemon (IPC %s) is not authentic:" " process %lld (uid: %lld, gid: %lld)", client->name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); crm_ipc_close(client); errno = ECONNABORTED; return FALSE; } else if (rv < 0) { errno = -rv; crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)", client->name); crm_ipc_close(client); errno = -rv; return FALSE; } qb_ipcc_context_set(client->ipc, client); #ifdef HAVE_IPCS_GET_BUFFER_SIZE client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc); if (client->max_buf_size > client->buf_size) { free(client->buffer); client->buffer = calloc(1, client->max_buf_size); client->buf_size = client->max_buf_size; } #endif return TRUE; } void crm_ipc_close(crm_ipc_t * client) { if (client) { - crm_trace("Disconnecting %s IPC connection %p (%p)", client->name, client, client->ipc); - if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying an active IPC connection to %s", client->name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } crm_trace("Destroying IPC connection to %s: %p", client->name, client); free(client->buffer); free(client->name); free(client); } } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = 0; if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) { return fd; } errno = EINVAL; crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s", (client? client->name : "unspecified client")); return -errno; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } /*! * \brief Check whether an IPC connection is ready to be read * * \param[in] client Connection to check * * \return Positive value if ready to be read, 0 if not ready, -errno on error */ int crm_ipc_ready(crm_ipc_t *client) { int rc; CRM_ASSERT(client != NULL); if (crm_ipc_connected(client) == FALSE) { return -ENOTCONN; } client->pfd.revents = 0; rc = poll(&(client->pfd), 1, 0); return (rc < 0)? -errno : rc; } // \return Standard Pacemaker return code static int crm_ipc_decompress(crm_ipc_t * client) { pcmk__ipc_header_t *header = (pcmk__ipc_header_t *)(void*)client->buffer; if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; /* never let buf size fall below our max size required for ipc reads. */ unsigned int new_buf_size = QB_MAX((sizeof(pcmk__ipc_header_t) + size_u), client->max_buf_size); char *uncompressed = calloc(1, new_buf_size); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed + sizeof(pcmk__ipc_header_t), &size_u, client->buffer + sizeof(pcmk__ipc_header_t), header->size_compressed, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); return EILSEQ; } /* * This assert no longer holds true. For an identical msg, some clients may * require compression, and others may not. If that same msg (event) is sent * to multiple clients, it could result in some clients receiving a compressed * msg even though compression was not explicitly required for them. * * CRM_ASSERT((header->size_uncompressed + sizeof(pcmk__ipc_header_t)) >= ipc_buffer_max); */ CRM_ASSERT(size_u == header->size_uncompressed); memcpy(uncompressed, client->buffer, sizeof(pcmk__ipc_header_t)); /* Preserve the header */ header = (pcmk__ipc_header_t *)(void*)uncompressed; free(client->buffer); client->buf_size = new_buf_size; client->buffer = uncompressed; } CRM_ASSERT(client->buffer[sizeof(pcmk__ipc_header_t) + header->size_uncompressed - 1] == 0); return pcmk_rc_ok; } long crm_ipc_read(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; CRM_ASSERT(client != NULL); CRM_ASSERT(client->ipc != NULL); CRM_ASSERT(client->buffer != NULL); client->buffer[0] = 0; client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size, 0); if (client->msg_size >= 0) { int rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } header = (pcmk__ipc_header_t *)(void*)client->buffer; if (!pcmk__valid_ipc_header(header)) { return -EBADMSG; } crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s", client->name, header->qb.id, header->qb.size, client->msg_size, client->buffer + sizeof(pcmk__ipc_header_t)); } else { crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size)); } if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) { crm_err("Connection to %s failed", client->name); } if (header) { /* Data excluding the header */ return header->size_uncompressed; } return -ENOMSG; } const char * crm_ipc_buffer(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->buffer + sizeof(pcmk__ipc_header_t); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; CRM_ASSERT(client != NULL); if (client->buffer == NULL) { return 0; } header = (pcmk__ipc_header_t *)(void*)client->buffer; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->name; } // \return Standard Pacemaker return code static int internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ssize_t *bytes) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); int rc = pcmk_rc_ok; /* get the reply */ crm_trace("client %s waiting on reply to msg id %d", client->name, request_id); do { *bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000); if (*bytes > 0) { pcmk__ipc_header_t *hdr = NULL; rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return rc; } hdr = (pcmk__ipc_header_t *)(void*)client->buffer; if (hdr->qb.id == request_id) { /* Got it */ break; } else if (hdr->qb.id < request_id) { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "OldIpcReply"); } else { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "ImpossibleReply"); CRM_ASSERT(hdr->qb.id <= request_id); } } else if (crm_ipc_connected(client) == FALSE) { crm_err("Server disconnected client %s while waiting for msg id %d", client->name, request_id); break; } } while (time(NULL) < timeout); if (*bytes < 0) { rc = (int) -*bytes; // System errno } return rc; } /*! * \brief Send an IPC XML message * * \param[in] client Connection to IPC server * \param[in] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags * \param[in] ms_timeout Give up if not sent within this much time * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * * \return Negative errno on error, otherwise size of reply received in bytes * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode ** reply) { int rc = 0; ssize_t qb_rc = 0; ssize_t bytes = 0; struct iovec *iov; static uint32_t id = 0; static int factor = 8; pcmk__ipc_header_t *header; if (client == NULL) { crm_notice("Can't send IPC request without connection (bug?): %.100s", message); return -ENOTCONN; } else if (crm_ipc_connected(client) == FALSE) { /* Don't even bother */ crm_notice("Can't send IPC request to %s: Connection closed", client->name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } if (client->need_reply) { qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); if (qb_rc < 0) { crm_warn("Sending IPC to %s disabled until pending reply received", client->name); return -EALREADY; } else { crm_notice("Sending IPC to %s re-enabled after pending reply received", client->name); client->need_reply = FALSE; } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); if (rc != pcmk_rc_ok) { crm_warn("Couldn't prepare IPC request to %s: %s " CRM_XS " rc=%d", client->name, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } header = iov[0].iov_base; header->flags |= flags; if(is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ clear_bit(flags, crm_ipc_client_response); } if(header->size_compressed) { if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { crm_notice("Compressed message exceeds %d0%% of configured IPC " "limit (%u bytes); consider setting PCMK_ipc_buffer to " "%u or higher", factor, client->max_buf_size, 2 * client->max_buf_size); factor++; } } crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", client->name, header->qb.id, header->qb.size, ms_timeout); if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); do { /* @TODO Is this check really needed? Won't qb_ipcc_sendv() return * an error if it's not connected? */ if (!crm_ipc_connected(client)) { goto send_cleanup; } qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); } while ((qb_rc == -EAGAIN) && (time(NULL) < timeout)); rc = (int) qb_rc; // Negative of system errno, or bytes sent if (qb_rc <= 0) { goto send_cleanup; } else if (is_not_set(flags, crm_ipc_client_response)) { crm_trace("Not waiting for reply to %s IPC request %d", client->name, header->qb.id); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes); if (rc != pcmk_rc_ok) { /* We didn't get the reply in time, so disable future sends for now. * The only alternative would be to close the connection since we * don't know how to detect and discard out-of-sequence replies. * * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } rc = (int) bytes; // Negative system errno, or size of reply received } else { // No timeout, and client response needed do { qb_rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1); } while ((qb_rc == -EAGAIN) && crm_ipc_connected(client)); rc = (int) qb_rc; // Negative system errno, or size of reply received } if (rc > 0) { pcmk__ipc_header_t *hdr = (pcmk__ipc_header_t *)(void*)client->buffer; crm_trace("Received %d-byte reply %d to %s IPC %d: %.100s", rc, hdr->qb.id, client->name, header->qb.id, crm_ipc_buffer(client)); if (reply) { *reply = string2xml(crm_ipc_buffer(client)); } } else { crm_trace("No reply to %s IPC %d: rc=%d", client->name, header->qb.id, rc); } send_cleanup: if (crm_ipc_connected(client) == FALSE) { crm_notice("Couldn't send %s IPC request %d: Connection closed " CRM_XS " rc=%d", client->name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { crm_warn("%s IPC request %d failed: %s after %dms " CRM_XS " rc=%d", client->name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("%s IPC request %d failed: %s " CRM_XS " rc=%d", client->name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } pcmk_free_ipc_event(iov); return rc; } int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; #if defined(US_AUTH_PEERCRED_UCRED) struct ucred ucred; socklen_t ucred_len = sizeof(ucred); if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) && ucred_len == sizeof(ucred)) { found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; #elif defined(US_AUTH_PEERCRED_SOCKPEERCRED) struct sockpeercred sockpeercred; socklen_t sockpeercred_len = sizeof(sockpeercred); if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &sockpeercred, &sockpeercred_len) && sockpeercred_len == sizeof(sockpeercred_len)) { found_pid = sockpeercred.pid; found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; #elif defined(US_AUTH_GETPEEREID) if (!getpeereid(sock, &found_uid, &found_gid)) { found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */ #elif defined(US_AUTH_GETPEERUCRED) ucred_t *ucred; if (!getpeerucred(sock, &ucred)) { errno = 0; found_pid = ucred_getpid(ucred); found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ret = -errno; ucred_free(ucred); if (ret) { return (ret < 0) ? ret : -pcmk_err_generic; } #else # error "No way to authenticate a Unix socket peer" errno = 0; if (0) { #endif if (gotpid != NULL) { *gotpid = found_pid; } if (gotuid != NULL) { *gotuid = found_uid; } if (gotgid != NULL) { *gotgid = found_gid; } ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid); } else { ret = (errno > 0) ? -errno : -pcmk_err_generic; } return ret; } int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid) { static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ int fd; int rc = pcmk_rc_ipc_unresponsive; int auth_rc = 0; int32_t qb_rc; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; qb_ipcc_connection_t *c; c = qb_ipcc_connect(name, 0); if (c == NULL) { crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); rc = pcmk_rc_ipc_unresponsive; goto bail; } qb_rc = qb_ipcc_fd_get(c, &fd); if (qb_rc != 0) { rc = (int) -qb_rc; // System errno crm_err("Could not get fd from %s IPC: %s " CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } auth_rc = crm_ipc_is_authentic_process(fd, refuid, refgid, &found_pid, &found_uid, &found_gid); if (auth_rc < 0) { rc = pcmk_legacy2rc(auth_rc); crm_err("Could not get peer credentials from %s IPC: %s " CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } if (gotpid != NULL) { *gotpid = found_pid; } if (auth_rc == 0) { crm_err("Daemon (IPC %s) effectively blocked with unauthorized" " process %lld (uid: %lld, gid: %lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = pcmk_rc_ipc_unauthorized; goto bail; } rc = pcmk_rc_ok; if ((found_uid != refuid || found_gid != refgid) && strncmp(last_asked_name, name, sizeof(last_asked_name))) { if ((found_uid == 0) && (refuid != 0)) { crm_warn("Daemon (IPC %s) runs as root, whereas the expected" " credentials are %lld:%lld, hazard of violating" " the least privilege principle", name, (long long) refuid, (long long) refgid); } else { crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" " expected credentials are %lld:%lld, which may" " mean a different set of privileges than expected", name, (long long) found_uid, (long long) found_gid, (long long) refuid, (long long) refgid); } memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); } bail: if (c != NULL) { qb_ipcc_disconnect(c); } return rc; } - -xmlNode * -create_hello_message(const char *uuid, - const char *client_name, const char *major_version, const char *minor_version) -{ - xmlNode *hello_node = NULL; - xmlNode *hello = NULL; - - if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name) - || pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) { - crm_err("Could not create IPC hello message from %s (UUID %s): " - "missing information", - client_name? client_name : "unknown client", - uuid? uuid : "unknown"); - return NULL; - } - - hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); - if (hello_node == NULL) { - crm_err("Could not create IPC hello message from %s (UUID %s): " - "Message data creation failed", client_name, uuid); - return NULL; - } - - crm_xml_add(hello_node, "major_version", major_version); - crm_xml_add(hello_node, "minor_version", minor_version); - crm_xml_add(hello_node, "client_name", client_name); - crm_xml_add(hello_node, "client_uuid", uuid); - - hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); - if (hello == NULL) { - crm_err("Could not create IPC hello message from %s (UUID %s): " - "Request creation failed", client_name, uuid); - return NULL; - } - free_xml(hello_node); - - crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); - return hello; -} diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c new file mode 100644 index 0000000000..22bb7338b6 --- /dev/null +++ b/lib/common/ipc_controld.c @@ -0,0 +1,609 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "crmcommon_private.h" + +struct controld_api_private_s { + char *client_uuid; + unsigned int replies_expected; +}; + +// \return Standard Pacemaker return code +static int +new_data(pcmk_ipc_api_t *api) +{ + struct controld_api_private_s *private = NULL; + + api->api_data = calloc(1, sizeof(struct controld_api_private_s)); + + if (api->api_data == NULL) { + return errno; + } + + private = api->api_data; + + /* This is set to the PID because that's how it was always done, but PIDs + * are not unique because clients can be remote. The value appears to be + * unused other than as part of F_CRM_SYS_FROM in IPC requests, which is + * only compared against the internal system names (CRM_SYSTEM_TENGINE, + * etc.), so it shouldn't be a problem. + */ + private->client_uuid = pcmk__getpid_s(); + + /* @TODO Implement a call ID model similar to the CIB, executor, and fencer + * IPC APIs, so that requests and replies can be matched, and + * duplicate replies can be discarded. + */ + return pcmk_rc_ok; +} + +static void +free_data(void *data) +{ + free(((struct controld_api_private_s *) data)->client_uuid); + free(data); +} + +// \return Standard Pacemaker return code +static int +post_connect(pcmk_ipc_api_t *api) +{ + /* The controller currently requires clients to register via a hello + * request, but does not reply back. + */ + struct controld_api_private_s *private = api->api_data; + const char *client_name = crm_system_name? crm_system_name : "client"; + xmlNode *hello; + int rc; + + hello = create_hello_message(private->client_uuid, client_name, + PCMK__CONTROLD_API_MAJOR, + PCMK__CONTROLD_API_MINOR); + rc = pcmk__send_ipc_request(api, hello); + free_xml(hello); + if (rc != pcmk_rc_ok) { + crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s", + pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc); + } else { + crm_debug("Sent IPC hello to %s", pcmk_ipc_name(api, true)); + } + return rc; +} + +#define xml_true(xml, field) crm_is_true(crm_element_value(xml, field)) + +static void +set_node_info_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) +{ + data->reply_type = pcmk_controld_reply_info; + if (msg_data == NULL) { + return; + } + data->data.node_info.have_quorum = xml_true(msg_data, XML_ATTR_HAVE_QUORUM); + data->data.node_info.is_remote = xml_true(msg_data, XML_NODE_IS_REMOTE); + crm_element_value_int(msg_data, XML_ATTR_ID, &(data->data.node_info.id)); + data->data.node_info.uuid = crm_element_value(msg_data, XML_ATTR_UUID); + data->data.node_info.uname = crm_element_value(msg_data, XML_ATTR_UNAME); + data->data.node_info.state = crm_element_value(msg_data, XML_NODE_IS_PEER); +} + +static void +set_ping_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) +{ + data->reply_type = pcmk_controld_reply_ping; + if (msg_data == NULL) { + return; + } + data->data.ping.sys_from = crm_element_value(msg_data, + XML_PING_ATTR_SYSFROM); + data->data.ping.fsa_state = crm_element_value(msg_data, + XML_PING_ATTR_CRMDSTATE); + data->data.ping.result = crm_element_value(msg_data, XML_PING_ATTR_STATUS); +} + +static bool +reply_expected(pcmk_ipc_api_t *api, xmlNode *request) +{ + const char *command = crm_element_value(request, F_CRM_TASK); + + if (command == NULL) { + return false; + } + + // We only need to handle commands that functions in this file can send + return !strcmp(command, CRM_OP_REPROBE) + || !strcmp(command, CRM_OP_NODE_INFO) + || !strcmp(command, CRM_OP_PING) + || !strcmp(command, CRM_OP_LRM_FAIL) + || !strcmp(command, CRM_OP_LRM_DELETE); +} + +static void +dispatch(pcmk_ipc_api_t *api, xmlNode *reply) +{ + struct controld_api_private_s *private = api->api_data; + crm_exit_t status = CRM_EX_OK; + xmlNode *msg_data = NULL; + const char *value = NULL; + pcmk_controld_api_reply_t reply_data = { + pcmk_controld_reply_unknown, NULL, NULL, + }; + + if (private->replies_expected > 0) { + private->replies_expected--; + } + + // Do some basic validation of the reply + + /* @TODO We should be able to verify that value is always a response, but + * currently the controller doesn't always properly set the type. Even + * if we fix the controller, we'll still need to handle replies from + * old versions (feature set could be used to differentiate). + */ + value = crm_element_value(reply, F_CRM_MSG_TYPE); + if ((value == NULL) || (strcmp(value, XML_ATTR_REQUEST) + && strcmp(value, XML_ATTR_RESPONSE))) { + crm_debug("Unrecognizable controller message: invalid message type '%s'", + crm_str(value)); + status = CRM_EX_PROTOCOL; + reply = NULL; + } + + if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) { + crm_debug("Unrecognizable controller message: no reference"); + status = CRM_EX_PROTOCOL; + reply = NULL; + } + + value = crm_element_value(reply, F_CRM_TASK); + if (value == NULL) { + crm_debug("Unrecognizable controller message: no command name"); + status = CRM_EX_PROTOCOL; + reply = NULL; + } + + // Parse useful info from reply + + if (reply != NULL) { + reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION); + reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM); + msg_data = get_message_xml(reply, F_CRM_DATA); + + if (!strcmp(value, CRM_OP_REPROBE)) { + reply_data.reply_type = pcmk_controld_reply_reprobe; + + } else if (!strcmp(value, CRM_OP_NODE_INFO)) { + set_node_info_data(&reply_data, msg_data); + + } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) { + reply_data.reply_type = pcmk_controld_reply_resource; + reply_data.data.resource.node_state = msg_data; + + } else if (!strcmp(value, CRM_OP_PING)) { + set_ping_data(&reply_data, msg_data); + + } else { + crm_debug("Unrecognizable controller message: unknown command '%s'", + value); + status = CRM_EX_PROTOCOL; + reply = NULL; + } + } + + pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); +} + +pcmk__ipc_methods_t * +pcmk__controld_api_methods() +{ + pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); + + if (cmds != NULL) { + cmds->new_data = new_data; + cmds->free_data = free_data; + cmds->post_connect = post_connect; + cmds->reply_expected = reply_expected; + cmds->dispatch = dispatch; + } + return cmds; +} + +/*! + * \internal + * \brief Create XML for a controller IPC request + * + * \param[in] api Controller connection + * \param[in] op Controller IPC command name + * \param[in] node Node name to set as destination host + * \param[in] msg_data XML to attach to request as message data + * + * \return Newly allocated XML for request + */ +static xmlNode * +create_controller_request(pcmk_ipc_api_t *api, const char *op, + const char *node, xmlNode *msg_data) +{ + struct controld_api_private_s *private = api->api_data; + const char *sys_to = NULL; + + if ((node == NULL) && !strcmp(op, CRM_OP_PING)) { + sys_to = CRM_SYSTEM_DC; + } else { + sys_to = CRM_SYSTEM_CRMD; + } + return create_request(op, msg_data, node, sys_to, + (crm_system_name? crm_system_name : "client"), + private->client_uuid); +} + +// \return Standard Pacemaker return code +static int +send_controller_request(pcmk_ipc_api_t *api, xmlNode *request, + bool reply_is_expected) +{ + int rc; + + if (crm_element_value(request, XML_ATTR_REFERENCE) == NULL) { + return EINVAL; + } + rc = pcmk__send_ipc_request(api, request); + if ((rc == pcmk_rc_ok) && reply_is_expected) { + struct controld_api_private_s *private = api->api_data; + + private->replies_expected++; + } + return rc; +} + +static xmlNode * +create_reprobe_message_data(const char *target_node, const char *router_node) +{ + xmlNode *msg_data; + + msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE); + crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); + if ((router_node != NULL) && safe_str_neq(router_node, target_node)) { + crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); + } + return msg_data; +} + +/*! + * \brief Send a reprobe controller operation + * + * \param[in] api Controller connection + * \param[in] target_node Name of node to reprobe + * \param[in] router_node Router node for host + * + * \return Standard Pacemaker return code + * \note Event callback will get a reply of type pcmk_controld_reply_reprobe. + */ +int +pcmk_controld_api_reprobe(pcmk_ipc_api_t *api, const char *target_node, + const char *router_node) +{ + xmlNode *request; + xmlNode *msg_data; + int rc = pcmk_rc_ok; + + if (api == NULL) { + return EINVAL; + } + if (router_node == NULL) { + router_node = target_node; + } + crm_debug("Sending %s IPC request to reprobe %s via %s", + pcmk_ipc_name(api, true), crm_str(target_node), + crm_str(router_node)); + msg_data = create_reprobe_message_data(target_node, router_node); + request = create_controller_request(api, CRM_OP_REPROBE, router_node, + msg_data); + rc = send_controller_request(api, request, true); + free_xml(msg_data); + free_xml(request); + return rc; +} + +/*! + * \brief Send a "node info" controller operation + * + * \param[in] api Controller connection + * \param[in] nodeid ID of node to get info for (or 0 for local node) + * + * \return Standard Pacemaker return code + * \note Event callback will get a reply of type pcmk_controld_reply_info. + */ +int +pcmk_controld_api_node_info(pcmk_ipc_api_t *api, uint32_t nodeid) +{ + xmlNode *request; + int rc = pcmk_rc_ok; + + request = create_controller_request(api, CRM_OP_NODE_INFO, NULL, NULL); + if (request == NULL) { + return EINVAL; + } + if (nodeid > 0) { + crm_xml_set_id(request, "%lu", (unsigned long) nodeid); + } + + rc = send_controller_request(api, request, true); + free_xml(request); + return rc; +} + +/*! + * \brief Ask the controller for status + * + * \param[in] api Controller connection + * \param[in] node_name Name of node whose status is desired (or NULL for DC) + * + * \return Standard Pacemaker return code + * \note Event callback will get a reply of type pcmk_controld_reply_ping. + */ +int +pcmk_controld_api_ping(pcmk_ipc_api_t *api, const char *node_name) +{ + xmlNode *request; + int rc = pcmk_rc_ok; + + request = create_controller_request(api, CRM_OP_PING, node_name, NULL); + if (request == NULL) { + return EINVAL; + } + rc = send_controller_request(api, request, true); + free_xml(request); + return rc; +} + +/*! + * \internal + * \brief Ask the controller to shut down + * + * \param[in] api Controller connection + * \param[in] node_name Name of node whose controller should shut down + * + * \return Standard Pacemaker return code + * + * \note This capability currently does not work, so the function is considered + * internal. It will likely be removed. + * \note Event callback will not get a reply. + */ +int +pcmk_controld_api_shutdown(pcmk_ipc_api_t *api, const char *node_name) +{ + xmlNode *request; + int rc = pcmk_rc_ok; + + request = create_controller_request(api, CRM_OP_SHUTDOWN, NULL, NULL); + if (request == NULL) { + return EINVAL; + } + rc = send_controller_request(api, request, false); + free_xml(request); + return rc; +} + +/*! + * \internal + * \brief Ask the controller to start a DC election + * + * \param[in] api Controller connection + * + * \return Standard Pacemaker return code + * + * \note This capability currently does not work, so the function is considered + * internal. It will likely be removed. + * \note Event callback will not get a reply. + */ +int +pcmk_controld_api_start_election(pcmk_ipc_api_t *api) +{ + xmlNode *request; + int rc = pcmk_rc_ok; + + request = create_controller_request(api, CRM_OP_VOTE, NULL, NULL); + if (request == NULL) { + return EINVAL; + } + rc = send_controller_request(api, request, false); + free_xml(request); + return rc; +} + +// \return Standard Pacemaker return code +static int +controller_resource_op(pcmk_ipc_api_t *api, const char *op, + const char *target_node, const char *router_node, + bool cib_only, const char *rsc_id, + const char *rsc_long_id, const char *standard, + const char *provider, const char *type) +{ + int rc = pcmk_rc_ok; + char *key; + xmlNode *request, *msg_data, *xml_rsc, *params; + + if (api == NULL) { + return EINVAL; + } + if (router_node == NULL) { + router_node = target_node; + } + + msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); + + /* The controller logs the transition key from resource op requests, so we + * need to have *something* for it. + * @TODO don't use "crm-resource" + */ + key = pcmk__transition_key(0, getpid(), 0, + "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); + crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); + free(key); + + crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); + if (safe_str_neq(router_node, target_node)) { + crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); + } + + if (cib_only) { + // Indicate that only the CIB needs to be cleaned + crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); + } + + xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); + crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); + crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc_long_id); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, standard); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, provider); + crm_xml_add(xml_rsc, XML_ATTR_TYPE, type); + + params = create_xml_node(msg_data, XML_TAG_ATTRS); + crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + + // The controller parses the timeout from the request + key = crm_meta_name(XML_ATTR_TIMEOUT); + crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg + free(key); + + request = create_controller_request(api, op, router_node, msg_data); + rc = send_controller_request(api, request, true); + free_xml(msg_data); + free_xml(request); + return rc; +} + +/*! + * \brief Ask the controller to fail a resource + * + * \param[in] api Controller connection + * \param[in] target_node Name of node resource is on + * \param[in] router_node Router node for target + * \param[in] rsc_id ID of resource to fail + * \param[in] rsc_long_id Long ID of resource (if any) + * \param[in] standard Standard of resource + * \param[in] provider Provider of resource (if any) + * \param[in] type Type of resource to fail + * + * \return Standard Pacemaker return code + * \note Event callback will get a reply of type pcmk_controld_reply_resource. + */ +int +pcmk_controld_api_fail(pcmk_ipc_api_t *api, + const char *target_node, const char *router_node, + const char *rsc_id, const char *rsc_long_id, + const char *standard, const char *provider, + const char *type) +{ + crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s", + pcmk_ipc_name(api, true), crm_str(rsc_id), crm_str(rsc_long_id), + crm_str(target_node), crm_str(router_node)); + return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node, + router_node, false, rsc_id, rsc_long_id, + standard, provider, type); +} + +/*! + * \brief Ask the controller to refresh a resource + * + * \param[in] api Controller connection + * \param[in] target_node Name of node resource is on + * \param[in] router_node Router node for target + * \param[in] rsc_id ID of resource to refresh + * \param[in] rsc_long_id Long ID of resource (if any) + * \param[in] standard Standard of resource + * \param[in] provider Provider of resource (if any) + * \param[in] type Type of resource + * \param[in] cib_only If true, clean resource from CIB only + * + * \return Standard Pacemaker return code + * \note Event callback will get a reply of type pcmk_controld_reply_resource. + */ +int +pcmk_controld_api_refresh(pcmk_ipc_api_t *api, const char *target_node, + const char *router_node, + const char *rsc_id, const char *rsc_long_id, + const char *standard, const char *provider, + const char *type, bool cib_only) +{ + crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s", + pcmk_ipc_name(api, true), crm_str(rsc_id), crm_str(rsc_long_id), + crm_str(target_node), crm_str(router_node)); + return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node, + router_node, cib_only, rsc_id, rsc_long_id, + standard, provider, type); +} + +/*! + * \brief Get the number of IPC replies currently expected from the controller + * + * \param[in] api Controller IPC API connection + * + * \return Number of replies expected + */ +unsigned int +pcmk_controld_api_replies_expected(pcmk_ipc_api_t *api) +{ + struct controld_api_private_s *private = api->api_data; + + return private->replies_expected; +} + +xmlNode * +create_hello_message(const char *uuid, const char *client_name, + const char *major_version, const char *minor_version) +{ + xmlNode *hello_node = NULL; + xmlNode *hello = NULL; + + if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name) + || pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) { + crm_err("Could not create IPC hello message from %s (UUID %s): " + "missing information", + client_name? client_name : "unknown client", + uuid? uuid : "unknown"); + return NULL; + } + + hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); + if (hello_node == NULL) { + crm_err("Could not create IPC hello message from %s (UUID %s): " + "Message data creation failed", client_name, uuid); + return NULL; + } + + crm_xml_add(hello_node, "major_version", major_version); + crm_xml_add(hello_node, "minor_version", minor_version); + crm_xml_add(hello_node, "client_name", client_name); + crm_xml_add(hello_node, "client_uuid", uuid); + + hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); + if (hello == NULL) { + crm_err("Could not create IPC hello message from %s (UUID %s): " + "Request creation failed", client_name, uuid); + return NULL; + } + free_xml(hello_node); + + crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); + return hello; +} diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c index 634eead71b..e942e578c0 100644 --- a/lib/common/mainloop.c +++ b/lib/common/mainloop.c @@ -1,1417 +1,1451 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include struct mainloop_child_s { pid_t pid; char *desc; unsigned timerid; gboolean timeout; void *privatedata; enum mainloop_child_flags flags; /* Called when a process dies */ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); }; struct trigger_s { GSource source; gboolean running; gboolean trigger; void *user_data; guint id; }; static gboolean crm_trigger_prepare(GSource * source, gint * timeout) { crm_trigger_t *trig = (crm_trigger_t *) source; /* cluster-glue's FD and IPC related sources make use of * g_source_add_poll() but do not set a timeout in their prepare * functions * * This means mainloop's poll() will block until an event for one * of these sources occurs - any /other/ type of source, such as * this one or g_idle_*, that doesn't use g_source_add_poll() is * S-O-L and won't be processed until there is something fd-based * happens. * * Luckily the timeout we can set here affects all sources and * puts an upper limit on how long poll() can take. * * So unconditionally set a small-ish timeout, not too small that * we're in constant motion, which will act as an upper bound on * how long the signal handling might be delayed for. */ *timeout = 500; /* Timeout in ms */ return trig->trigger; } static gboolean crm_trigger_check(GSource * source) { crm_trigger_t *trig = (crm_trigger_t *) source; return trig->trigger; } static gboolean crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) { int rc = TRUE; crm_trigger_t *trig = (crm_trigger_t *) source; if (trig->running) { /* Wait until the existing job is complete before starting the next one */ return TRUE; } trig->trigger = FALSE; if (callback) { rc = callback(trig->user_data); if (rc < 0) { crm_trace("Trigger handler %p not yet complete", trig); trig->running = TRUE; rc = TRUE; } } return rc; } static void crm_trigger_finalize(GSource * source) { crm_trace("Trigger %p destroyed", source); } static GSourceFuncs crm_trigger_funcs = { crm_trigger_prepare, crm_trigger_check, crm_trigger_dispatch, crm_trigger_finalize, }; static crm_trigger_t * mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { crm_trigger_t *trigger = NULL; trigger = (crm_trigger_t *) source; trigger->id = 0; trigger->trigger = FALSE; trigger->user_data = userdata; if (dispatch) { g_source_set_callback(source, dispatch, trigger, NULL); } g_source_set_priority(source, priority); g_source_set_can_recurse(source, FALSE); trigger->id = g_source_attach(source, NULL); return trigger; } void mainloop_trigger_complete(crm_trigger_t * trig) { crm_trace("Trigger handler %p complete", trig); trig->running = FALSE; } /* If dispatch returns: * -1: Job running but not complete * 0: Remove the trigger from mainloop * 1: Leave the trigger in mainloop */ crm_trigger_t * mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { GSource *source = NULL; CRM_ASSERT(sizeof(crm_trigger_t) > sizeof(GSource)); source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t)); CRM_ASSERT(source != NULL); return mainloop_setup_trigger(source, priority, dispatch, userdata); } void mainloop_set_trigger(crm_trigger_t * source) { if(source) { source->trigger = TRUE; } } gboolean mainloop_destroy_trigger(crm_trigger_t * source) { GSource *gs = NULL; if(source == NULL) { return TRUE; } gs = (GSource *)source; g_source_destroy(gs); /* Remove from mainloop, ref_count-- */ g_source_unref(gs); /* The caller no longer carries a reference to source * * At this point the source should be free'd, * unless we're currently processing said * source, in which case mainloop holds an * additional reference and it will be free'd * once our processing completes */ return TRUE; } // Define a custom glib source for signal handling // Data structure for custom glib source typedef struct signal_s { crm_trigger_t trigger; // trigger that invoked source (must be first) void (*handler) (int sig); // signal handler int signal; // signal that was received } crm_signal_t; // Table to associate signal handlers with signal numbers static crm_signal_t *crm_signals[NSIG]; /*! * \internal * \brief Dispatch an event from custom glib source for signals * * Given an signal event, clear the event trigger and call any registered * signal handler. * * \param[in] source glib source that triggered this dispatch * \param[in] callback (ignored) * \param[in] userdata (ignored) */ static gboolean crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) { crm_signal_t *sig = (crm_signal_t *) source; if(sig->signal != SIGCHLD) { crm_notice("Caught '%s' signal "CRM_XS" %d (%s handler)", strsignal(sig->signal), sig->signal, (sig->handler? "invoking" : "no")); } sig->trigger.trigger = FALSE; if (sig->handler) { sig->handler(sig->signal); } return TRUE; } /*! * \internal * \brief Handle a signal by setting a trigger for signal source * * \param[in] sig Signal number that was received * * \note This is the true signal handler for the mainloop signal source, and * must be async-safe. */ static void mainloop_signal_handler(int sig) { if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) { mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]); } } // Functions implementing our custom glib source for signal handling static GSourceFuncs crm_signal_funcs = { crm_trigger_prepare, crm_trigger_check, crm_signal_dispatch, crm_trigger_finalize, }; /*! * \internal * \brief Set a true signal handler * * signal()-like interface to sigaction() * * \param[in] sig Signal number to register handler for * \param[in] dispatch Signal handler * * \return The previous value of the signal handler, or SIG_ERR on error * \note The dispatch function must be async-safe. */ sighandler_t crm_signal_handler(int sig, sighandler_t dispatch) { sigset_t mask; struct sigaction sa; struct sigaction old; if (sigemptyset(&mask) < 0) { crm_err("Could not set handler for signal %d: %s", sig, pcmk_strerror(errno)); return SIG_ERR; } memset(&sa, 0, sizeof(struct sigaction)); sa.sa_handler = dispatch; sa.sa_flags = SA_RESTART; sa.sa_mask = mask; if (sigaction(sig, &sa, &old) < 0) { crm_err("Could not set handler for signal %d: %s", sig, pcmk_strerror(errno)); return SIG_ERR; } return old.sa_handler; } static void mainloop_destroy_signal_entry(int sig) { crm_signal_t *tmp = crm_signals[sig]; crm_signals[sig] = NULL; crm_trace("Destroying signal %d", sig); mainloop_destroy_trigger((crm_trigger_t *) tmp); } /*! * \internal * \brief Add a signal handler to a mainloop * * \param[in] sig Signal number to handle * \param[in] dispatch Signal handler function * * \note The true signal handler merely sets a mainloop trigger to call this * dispatch function via the mainloop. Therefore, the dispatch function * does not need to be async-safe. */ gboolean mainloop_add_signal(int sig, void (*dispatch) (int sig)) { GSource *source = NULL; int priority = G_PRIORITY_HIGH - 1; if (sig == SIGTERM) { /* TERM is higher priority than other signals, * signals are higher priority than other ipc. * Yes, minus: smaller is "higher" */ priority--; } if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) { crm_trace("Signal handler for %d is already installed", sig); return TRUE; } else if (crm_signals[sig] != NULL) { crm_err("Different signal handler for %d is already installed", sig); return FALSE; } CRM_ASSERT(sizeof(crm_signal_t) > sizeof(GSource)); source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t)); crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL); CRM_ASSERT(crm_signals[sig] != NULL); crm_signals[sig]->handler = dispatch; crm_signals[sig]->signal = sig; if (crm_signal_handler(sig, mainloop_signal_handler) == SIG_ERR) { mainloop_destroy_signal_entry(sig); return FALSE; } #if 0 /* If we want signals to interrupt mainloop's poll(), instead of waiting for * the timeout, then we should call siginterrupt() below * * For now, just enforce a low timeout */ if (siginterrupt(sig, 1) < 0) { crm_perror(LOG_INFO, "Could not enable system call interruptions for signal %d", sig); } #endif return TRUE; } gboolean mainloop_destroy_signal(int sig) { if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signal_handler(sig, NULL) == SIG_ERR) { crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig); return FALSE; } else if (crm_signals[sig] == NULL) { return TRUE; } mainloop_destroy_signal_entry(sig); return TRUE; } static qb_array_t *gio_map = NULL; void mainloop_cleanup(void) { if (gio_map) { qb_array_free(gio_map); } for (int sig = 0; sig < NSIG; ++sig) { mainloop_destroy_signal_entry(sig); } } /* * libqb... */ struct gio_to_qb_poll { int32_t is_used; guint source; int32_t events; void *data; qb_ipcs_dispatch_fn_t fn; enum qb_loop_priority p; }; static gboolean gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; gint fd = g_io_channel_unix_get_fd(gio); crm_trace("%p.%d %d", data, fd, condition); /* if this assert get's hit, then there is a race condition between * when we destroy a fd and when mainloop actually gives it up */ CRM_ASSERT(adaptor->is_used > 0); return (adaptor->fn(fd, condition, adaptor->data) == 0); } static void gio_poll_destroy(gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; adaptor->is_used--; CRM_ASSERT(adaptor->is_used >= 0); if (adaptor->is_used == 0) { crm_trace("Marking adaptor %p unused", adaptor); adaptor->source = 0; } } /*! * \internal * \brief Convert libqb's poll priority into GLib's one * * \param[in] prio libqb's poll priority (#QB_LOOP_MED assumed as fallback) * * \return best matching GLib's priority */ static gint conv_prio_libqb2glib(enum qb_loop_priority prio) { gint ret = G_PRIORITY_DEFAULT; switch (prio) { case QB_LOOP_LOW: ret = G_PRIORITY_LOW; break; case QB_LOOP_HIGH: ret = G_PRIORITY_HIGH; break; default: crm_trace("Invalid libqb's loop priority %d, assuming QB_LOOP_MED", prio); /* fall-through */ case QB_LOOP_MED: break; } return ret; } /*! * \internal * \brief Convert libqb's poll priority to rate limiting spec * * \param[in] prio libqb's poll priority (#QB_LOOP_MED assumed as fallback) * * \return best matching rate limiting spec */ static enum qb_ipcs_rate_limit conv_libqb_prio2ratelimit(enum qb_loop_priority prio) { /* this is an inversion of what libqb's qb_ipcs_request_rate_limit does */ enum qb_ipcs_rate_limit ret = QB_IPCS_RATE_NORMAL; switch (prio) { case QB_LOOP_LOW: ret = QB_IPCS_RATE_SLOW; break; case QB_LOOP_HIGH: ret = QB_IPCS_RATE_FAST; break; default: crm_trace("Invalid libqb's loop priority %d, assuming QB_LOOP_MED", prio); /* fall-through */ case QB_LOOP_MED: break; } return ret; } static int32_t gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn, int32_t add) { struct gio_to_qb_poll *adaptor; GIOChannel *channel; int32_t res = 0; res = qb_array_index(gio_map, fd, (void **)&adaptor); if (res < 0) { crm_err("Array lookup failed for fd=%d: %d", fd, res); return res; } crm_trace("Adding fd=%d to mainloop as adaptor %p", fd, adaptor); if (add && adaptor->source) { crm_err("Adaptor for descriptor %d is still in-use", fd); return -EEXIST; } if (!add && !adaptor->is_used) { crm_err("Adaptor for descriptor %d is not in-use", fd); return -ENOENT; } /* channel is created with ref_count = 1 */ channel = g_io_channel_unix_new(fd); if (!channel) { crm_err("No memory left to add fd=%d", fd); return -ENOMEM; } if (adaptor->source) { g_source_remove(adaptor->source); adaptor->source = 0; } /* Because unlike the poll() API, glib doesn't tell us about HUPs by default */ evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR); adaptor->fn = fn; adaptor->events = evts; adaptor->data = data; adaptor->p = p; adaptor->is_used++; adaptor->source = g_io_add_watch_full(channel, conv_prio_libqb2glib(p), evts, gio_read_socket, adaptor, gio_poll_destroy); /* Now that mainloop now holds a reference to channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that channel will be free'd by: * g_main_context_dispatch() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after gio_poll_destroy() completes */ g_io_channel_unref(channel); crm_trace("Added to mainloop with gsource id=%d", adaptor->source); if (adaptor->source > 0) { return 0; } return -EINVAL; } static int32_t gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_TRUE); } static int32_t gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_FALSE); } static int32_t gio_poll_dispatch_del(int32_t fd) { struct gio_to_qb_poll *adaptor; crm_trace("Looking for fd=%d", fd); if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) { if (adaptor->source) { g_source_remove(adaptor->source); adaptor->source = 0; } } return 0; } struct qb_ipcs_poll_handlers gio_poll_funcs = { .job_add = NULL, .dispatch_add = gio_poll_dispatch_add, .dispatch_mod = gio_poll_dispatch_mod, .dispatch_del = gio_poll_dispatch_del, }; static enum qb_ipc_type pick_ipc_type(enum qb_ipc_type requested) { const char *env = getenv("PCMK_ipc_type"); if (env && strcmp("shared-mem", env) == 0) { return QB_IPC_SHM; } else if (env && strcmp("socket", env) == 0) { return QB_IPC_SOCKET; } else if (env && strcmp("posix", env) == 0) { return QB_IPC_POSIX_MQ; } else if (env && strcmp("sysv", env) == 0) { return QB_IPC_SYSV_MQ; } else if (requested == QB_IPC_NATIVE) { /* We prefer shared memory because the server never blocks on * send. If part of a message fits into the socket, libqb * needs to block until the remainder can be sent also. * Otherwise the client will wait forever for the remaining * bytes. */ return QB_IPC_SHM; } return requested; } qb_ipcs_service_t * mainloop_add_ipc_server(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers *callbacks) { return mainloop_add_ipc_server_with_prio(name, type, callbacks, QB_LOOP_MED); } qb_ipcs_service_t * mainloop_add_ipc_server_with_prio(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers *callbacks, enum qb_loop_priority prio) { int rc = 0; qb_ipcs_service_t *server = NULL; if (gio_map == NULL) { gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1); } server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks); if (server == NULL) { crm_err("Could not create %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc); return NULL; } if (prio != QB_LOOP_MED) { qb_ipcs_request_rate_limit(server, conv_libqb_prio2ratelimit(prio)); } #ifdef HAVE_IPCS_GET_BUFFER_SIZE /* All clients should use at least ipc_buffer_max as their buffer size */ qb_ipcs_enforce_buffer_size(server, crm_ipc_default_buffer_size()); #endif qb_ipcs_poll_handlers_set(server, &gio_poll_funcs); rc = qb_ipcs_run(server); if (rc < 0) { crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc); return NULL; } return server; } void mainloop_del_ipc_server(qb_ipcs_service_t * server) { if (server) { qb_ipcs_destroy(server); } } struct mainloop_io_s { char *name; void *userdata; int fd; guint source; crm_ipc_t *ipc; GIOChannel *channel; int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata); int (*dispatch_fn_io) (gpointer userdata); void (*destroy_fn) (gpointer userdata); }; static gboolean mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data) { gboolean keep = TRUE; mainloop_io_t *client = data; CRM_ASSERT(client->fd == g_io_channel_unix_get_fd(gio)); if (condition & G_IO_IN) { if (client->ipc) { long rc = 0; int max = 10; do { rc = crm_ipc_read(client->ipc); if (rc <= 0) { crm_trace("Message acquisition from %s[%p] failed: %s (%ld)", client->name, client, pcmk_strerror(rc), rc); } else if (client->dispatch_fn_ipc) { const char *buffer = crm_ipc_buffer(client->ipc); crm_trace("New message from %s[%p] = %ld (I/O condition=%d)", client->name, client, rc, condition); if (client->dispatch_fn_ipc(buffer, rc, client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); keep = FALSE; } } } while (keep && rc > 0 && --max > 0); } else { crm_trace("New message from %s[%p] %u", client->name, client, condition); if (client->dispatch_fn_io) { if (client->dispatch_fn_io(client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); keep = FALSE; } } } } if (client->ipc && crm_ipc_connected(client->ipc) == FALSE) { crm_err("Connection to %s closed " CRM_XS "client=%p condition=%d", client->name, client, condition); keep = FALSE; } else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) { crm_trace("The connection %s[%p] has been closed (I/O condition=%d)", client->name, client, condition); keep = FALSE; } else if ((condition & G_IO_IN) == 0) { /* #define GLIB_SYSDEF_POLLIN =1 #define GLIB_SYSDEF_POLLPRI =2 #define GLIB_SYSDEF_POLLOUT =4 #define GLIB_SYSDEF_POLLERR =8 #define GLIB_SYSDEF_POLLHUP =16 #define GLIB_SYSDEF_POLLNVAL =32 typedef enum { G_IO_IN GLIB_SYSDEF_POLLIN, G_IO_OUT GLIB_SYSDEF_POLLOUT, G_IO_PRI GLIB_SYSDEF_POLLPRI, G_IO_ERR GLIB_SYSDEF_POLLERR, G_IO_HUP GLIB_SYSDEF_POLLHUP, G_IO_NVAL GLIB_SYSDEF_POLLNVAL } GIOCondition; A bitwise combination representing a condition to watch for on an event source. G_IO_IN There is data to read. G_IO_OUT Data can be written (without blocking). G_IO_PRI There is urgent data to read. G_IO_ERR Error condition. G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets). G_IO_NVAL Invalid request. The file descriptor is not open. */ crm_err("Strange condition: %d", condition); } /* keep == FALSE results in mainloop_gio_destroy() being called * just before the source is removed from mainloop */ return keep; } static void mainloop_gio_destroy(gpointer c) { mainloop_io_t *client = c; char *c_name = strdup(client->name); /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c * client->channel will still have ref_count > 0... should be == 1 */ crm_trace("Destroying client %s[%p]", c_name, c); if (client->ipc) { crm_ipc_close(client->ipc); } if (client->destroy_fn) { void (*destroy_fn) (gpointer userdata) = client->destroy_fn; client->destroy_fn = NULL; destroy_fn(client->userdata); } if (client->ipc) { crm_ipc_t *ipc = client->ipc; client->ipc = NULL; crm_ipc_destroy(ipc); } crm_trace("Destroyed client %s[%p]", c_name, c); free(client->name); client->name = NULL; free(client); free(c_name); } -mainloop_io_t * -mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, - struct ipc_client_callbacks *callbacks) +/*! + * \brief Connect to IPC and add it as a main loop source + * + * \param[in] ipc IPC connection to add + * \param[in] priority Event source priority to use for connection + * \param[in] userdata Data to register with callbacks + * \param[in] callbacks Dispatch and destroy callbacks for connection + * \param[out] source Newly allocated event source + * + * \return Standard Pacemaker return code + * + * \note On failure, the caller is still responsible for ipc. On success, the + * caller should call mainloop_del_ipc_client() when source is no longer + * needed, which will lead to the disconnection of the IPC later in the + * main loop if it is connected. However the IPC disconnects, + * mainloop_gio_destroy() will free ipc and source after calling the + * destroy callback. + */ +int +pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata, + struct ipc_client_callbacks *callbacks, + mainloop_io_t **source) { - mainloop_io_t *client = NULL; - crm_ipc_t *conn = crm_ipc_new(name, max_size); + CRM_CHECK((ipc != NULL) && (callbacks != NULL), return EINVAL); - if (conn && crm_ipc_connect(conn)) { - int32_t fd = crm_ipc_get_fd(conn); + if (!crm_ipc_connect(ipc)) { + return ENOTCONN; + } + *source = mainloop_add_fd(crm_ipc_name(ipc), priority, crm_ipc_get_fd(ipc), + userdata, NULL); + if (*source == NULL) { + int rc = errno; - client = mainloop_add_fd(name, priority, fd, userdata, NULL); + crm_ipc_close(ipc); + return rc; } + (*source)->ipc = ipc; + (*source)->destroy_fn = callbacks->destroy; + (*source)->dispatch_fn_ipc = callbacks->dispatch; + return pcmk_rc_ok; +} - if (client == NULL) { - crm_perror(LOG_TRACE, "Connection to %s failed", name); - if (conn) { - crm_ipc_close(conn); - crm_ipc_destroy(conn); +mainloop_io_t * +mainloop_add_ipc_client(const char *name, int priority, size_t max_size, + void *userdata, struct ipc_client_callbacks *callbacks) +{ + crm_ipc_t *ipc = crm_ipc_new(name, max_size); + mainloop_io_t *source = NULL; + int rc = pcmk__add_mainloop_ipc(ipc, priority, userdata, callbacks, + &source); + + if (rc != pcmk_rc_ok) { + if (crm_log_level == LOG_STDOUT) { + fprintf(stderr, "Connection to %s failed: %s", + name, pcmk_rc_str(rc)); } + crm_ipc_destroy(ipc); return NULL; } - - client->ipc = conn; - client->destroy_fn = callbacks->destroy; - client->dispatch_fn_ipc = callbacks->dispatch; - return client; + return source; } void mainloop_del_ipc_client(mainloop_io_t * client) { mainloop_del_fd(client); } crm_ipc_t * mainloop_get_ipc_client(mainloop_io_t * client) { if (client) { return client->ipc; } return NULL; } mainloop_io_t * mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks * callbacks) { mainloop_io_t *client = NULL; if (fd >= 0) { client = calloc(1, sizeof(mainloop_io_t)); if (client == NULL) { return NULL; } client->name = strdup(name); client->userdata = userdata; if (callbacks) { client->destroy_fn = callbacks->destroy; client->dispatch_fn_io = callbacks->dispatch; } client->fd = fd; client->channel = g_io_channel_unix_new(fd); client->source = g_io_add_watch_full(client->channel, priority, (G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback, client, mainloop_gio_destroy); /* Now that mainloop now holds a reference to channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that channel will be free'd by: * g_main_context_dispatch() or g_source_remove() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after mainloop_gio_destroy() completes */ g_io_channel_unref(client->channel); crm_trace("Added connection %d for %s[%p].%d", client->source, client->name, client, fd); } else { errno = EINVAL; } return client; } void mainloop_del_fd(mainloop_io_t * client) { if (client != NULL) { crm_trace("Removing client %s[%p]", client->name, client); if (client->source) { /* Results in mainloop_gio_destroy() being called just * before the source is removed from mainloop */ g_source_remove(client->source); } } } static GListPtr child_list = NULL; pid_t mainloop_child_pid(mainloop_child_t * child) { return child->pid; } const char * mainloop_child_name(mainloop_child_t * child) { return child->desc; } int mainloop_child_timeout(mainloop_child_t * child) { return child->timeout; } void * mainloop_child_userdata(mainloop_child_t * child) { return child->privatedata; } void mainloop_clear_child_userdata(mainloop_child_t * child) { child->privatedata = NULL; } /* good function name */ static void child_free(mainloop_child_t *child) { if (child->timerid != 0) { crm_trace("Removing timer %d", child->timerid); g_source_remove(child->timerid); child->timerid = 0; } free(child->desc); free(child); } /* terrible function name */ static int child_kill_helper(mainloop_child_t *child) { int rc; if (child->flags & mainloop_leave_pid_group) { crm_debug("Kill pid %d only. leave group intact.", child->pid); rc = kill(child->pid, SIGKILL); } else { crm_debug("Kill pid %d's group", child->pid); rc = kill(-child->pid, SIGKILL); } if (rc < 0) { if (errno != ESRCH) { crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid); } return -errno; } return 0; } static gboolean child_timeout_callback(gpointer p) { mainloop_child_t *child = p; int rc = 0; child->timerid = 0; if (child->timeout) { crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid); return FALSE; } rc = child_kill_helper(child); if (rc == -ESRCH) { /* Nothing left to do. pid doesn't exist */ return FALSE; } child->timeout = TRUE; crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid); child->timerid = g_timeout_add(5000, child_timeout_callback, child); return FALSE; } static bool child_waitpid(mainloop_child_t *child, int flags) { int rc = 0; int core = 0; int signo = 0; int status = 0; int exitcode = 0; bool callback_needed = true; rc = waitpid(child->pid, &status, flags); if (rc == 0) { // WNOHANG in flags, and child status is not available crm_trace("Child process %d (%s) still active", child->pid, child->desc); callback_needed = false; } else if (rc != child->pid) { /* According to POSIX, possible conditions: * - child->pid was non-positive (process group or any child), * and rc is specific child * - errno ECHILD (pid does not exist or is not child) * - errno EINVAL (invalid flags) * - errno EINTR (caller interrupted by signal) * * @TODO Handle these cases more specifically. */ signo = SIGCHLD; exitcode = 1; crm_notice("Wait for child process %d (%s) interrupted: %s", child->pid, child->desc, pcmk_strerror(errno)); } else if (WIFEXITED(status)) { exitcode = WEXITSTATUS(status); crm_trace("Child process %d (%s) exited with status %d", child->pid, child->desc, exitcode); } else if (WIFSIGNALED(status)) { signo = WTERMSIG(status); crm_trace("Child process %d (%s) exited with signal %d (%s)", child->pid, child->desc, signo, strsignal(signo)); #ifdef WCOREDUMP // AIX, SunOS, maybe others } else if (WCOREDUMP(status)) { core = 1; crm_err("Child process %d (%s) dumped core", child->pid, child->desc); #endif } else { // flags must contain WUNTRACED and/or WCONTINUED to reach this crm_trace("Child process %d (%s) stopped or continued", child->pid, child->desc); callback_needed = false; } if (callback_needed && child->callback) { child->callback(child, child->pid, core, signo, exitcode); } return callback_needed; } static void child_death_dispatch(int signal) { for (GList *iter = child_list; iter; ) { GList *saved = iter; mainloop_child_t *child = iter->data; iter = iter->next; if (child_waitpid(child, WNOHANG)) { crm_trace("Removing completed process %d from child list", child->pid); child_list = g_list_remove_link(child_list, saved); g_list_free(saved); child_free(child); } } } static gboolean child_signal_init(gpointer p) { crm_trace("Installed SIGCHLD handler"); /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */ mainloop_add_signal(SIGCHLD, child_death_dispatch); /* In case they terminated before the signal handler was installed */ child_death_dispatch(SIGCHLD); return FALSE; } gboolean mainloop_child_kill(pid_t pid) { GListPtr iter; mainloop_child_t *child = NULL; mainloop_child_t *match = NULL; /* It is impossible to block SIGKILL, this allows us to * call waitpid without WNOHANG flag.*/ int waitflags = 0, rc = 0; for (iter = child_list; iter != NULL && match == NULL; iter = iter->next) { child = iter->data; if (pid == child->pid) { match = child; } } if (match == NULL) { return FALSE; } rc = child_kill_helper(match); if(rc == -ESRCH) { /* It's gone, but hasn't shown up in waitpid() yet. Wait until we get * SIGCHLD and let handler clean it up as normal (so we get the correct * return code/status). The blocking alternative would be to call * child_waitpid(match, 0). */ crm_trace("Waiting for signal that child process %d completed", match->pid); return TRUE; } else if(rc != 0) { /* If KILL for some other reason set the WNOHANG flag since we * can't be certain what happened. */ waitflags = WNOHANG; } if (!child_waitpid(match, waitflags)) { /* not much we can do if this occurs */ return FALSE; } child_list = g_list_remove(child_list, match); child_free(match); return TRUE; } /* Create/Log a new tracked process * To track a process group, use -pid * * @TODO Using a non-positive pid (i.e. any child, or process group) would * likely not be useful since we will free the child after the first * completed process. */ void mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc, void *privatedata, enum mainloop_child_flags flags, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) { static bool need_init = TRUE; mainloop_child_t *child = g_new(mainloop_child_t, 1); child->pid = pid; child->timerid = 0; child->timeout = FALSE; child->privatedata = privatedata; child->callback = callback; child->flags = flags; if(desc) { child->desc = strdup(desc); } if (timeout) { child->timerid = g_timeout_add(timeout, child_timeout_callback, child); } child_list = g_list_append(child_list, child); if(need_init) { need_init = FALSE; /* SIGCHLD processing has to be invoked from mainloop. * We do not want it to be possible to both add a child pid * to mainloop, and have the pid's exit callback invoked within * the same callstack. */ g_timeout_add(1, child_signal_init, NULL); } } void mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) { mainloop_child_add_with_flags(pid, timeout, desc, privatedata, 0, callback); } struct mainloop_timer_s { guint id; guint period_ms; bool repeat; char *name; GSourceFunc cb; void *userdata; }; static gboolean mainloop_timer_cb(gpointer user_data) { int id = 0; bool repeat = FALSE; struct mainloop_timer_s *t = user_data; CRM_ASSERT(t != NULL); id = t->id; t->id = 0; /* Ensure it's unset during callbacks so that * mainloop_timer_running() works as expected */ if(t->cb) { crm_trace("Invoking callbacks for timer %s", t->name); repeat = t->repeat; if(t->cb(t->userdata) == FALSE) { crm_trace("Timer %s complete", t->name); repeat = FALSE; } } if(repeat) { /* Restore if repeating */ t->id = id; } return repeat; } bool mainloop_timer_running(mainloop_timer_t *t) { if(t && t->id != 0) { return TRUE; } return FALSE; } void mainloop_timer_start(mainloop_timer_t *t) { mainloop_timer_stop(t); if(t && t->period_ms > 0) { crm_trace("Starting timer %s", t->name); t->id = g_timeout_add(t->period_ms, mainloop_timer_cb, t); } } void mainloop_timer_stop(mainloop_timer_t *t) { if(t && t->id != 0) { crm_trace("Stopping timer %s", t->name); g_source_remove(t->id); t->id = 0; } } guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms) { guint last = 0; if(t) { last = t->period_ms; t->period_ms = period_ms; } if(t && t->id != 0 && last != t->period_ms) { mainloop_timer_start(t); } return last; } mainloop_timer_t * mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata) { mainloop_timer_t *t = calloc(1, sizeof(mainloop_timer_t)); if(t) { if(name) { t->name = crm_strdup_printf("%s-%u-%d", name, period_ms, repeat); } else { t->name = crm_strdup_printf("%p-%u-%d", t, period_ms, repeat); } t->id = 0; t->period_ms = period_ms; t->repeat = repeat; t->cb = cb; t->userdata = userdata; crm_trace("Created timer %s with %p %p", t->name, userdata, t->userdata); } return t; } void mainloop_timer_del(mainloop_timer_t *t) { if(t) { crm_trace("Destroying timer %s", t->name); mainloop_timer_stop(t); free(t->name); free(t); } } /* * Helpers to make sure certain events aren't lost at shutdown */ static gboolean drain_timeout_cb(gpointer user_data) { bool *timeout_popped = (bool*) user_data; *timeout_popped = TRUE; return FALSE; } /*! * \brief Drain some remaining main loop events then quit it * * \param[in] mloop Main loop to drain and quit * \param[in] n Drain up to this many pending events */ void pcmk_quit_main_loop(GMainLoop *mloop, unsigned int n) { if ((mloop != NULL) && g_main_loop_is_running(mloop)) { GMainContext *ctx = g_main_loop_get_context(mloop); /* Drain up to n events in case some memory clean-up is pending * (helpful to reduce noise in valgrind output). */ for (int i = 0; (i < n) && g_main_context_pending(ctx); ++i) { g_main_context_dispatch(ctx); } g_main_loop_quit(mloop); } } /*! * \brief Process main loop events while a certain condition is met * * \param[in] mloop Main loop to process * \param[in] timer_ms Don't process longer than this amount of time * \param[in] check Function that returns TRUE if events should be processed * * \note This function is intended to be called at shutdown if certain important * events should not be missed. The caller would likely quit the main loop * or exit after calling this function. The check() function will be * passed the remaining timeout in milliseconds. */ void pcmk_drain_main_loop(GMainLoop *mloop, guint timer_ms, bool (*check)(guint)) { bool timeout_popped = FALSE; guint timer = 0; GMainContext *ctx = NULL; CRM_CHECK(mloop && check, return); ctx = g_main_loop_get_context(mloop); if (ctx) { time_t start_time = time(NULL); timer = g_timeout_add(timer_ms, drain_timeout_cb, &timeout_popped); while (!timeout_popped && check(timer_ms - (time(NULL) - start_time) * 1000)) { g_main_context_iteration(ctx, TRUE); } } if (!timeout_popped && (timer > 0)) { g_source_remove(timer); } } // Deprecated functions kept only for backward API compatibility gboolean crm_signal(int sig, void (*dispatch) (int sig)); /* * \brief Use crm_signal_handler() instead * \deprecated */ gboolean crm_signal(int sig, void (*dispatch) (int sig)) { return crm_signal_handler(sig, dispatch) != SIG_ERR; } diff --git a/tools/Makefile.am b/tools/Makefile.am index c822a8c287..4609b0f581 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,168 +1,167 @@ # # Copyright 2004-2019 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk if BUILD_SYSTEMD systemdsystemunit_DATA = crm_mon.service endif -noinst_HEADERS = crm_mon.h crm_resource.h crm_resource_controller.h +noinst_HEADERS = crm_mon.h crm_resource.h pcmkdir = $(datadir)/$(PACKAGE) pcmk_DATA = report.common report.collector sbin_SCRIPTS = crm_report crm_standby crm_master crm_failcount if BUILD_CIBSECRETS sbin_SCRIPTS += cibsecret endif noinst_SCRIPTS = pcmk_simtimes EXTRA_DIST = crm_diff.8.inc \ crm_error.8.inc \ crm_mon.sysconfig \ crm_mon.8.inc \ crm_node.8.inc \ crm_rule.8.inc \ crm_simulate.8.inc \ fix-manpages \ stonith_admin.8.inc sbin_PROGRAMS = attrd_updater \ cibadmin \ crmadmin \ crm_simulate \ crm_attribute \ crm_diff \ crm_error \ crm_mon \ crm_node \ crm_resource \ crm_rule \ crm_shadow \ crm_verify \ crm_ticket \ iso8601 \ stonith_admin if BUILD_SERVICELOG sbin_PROGRAMS += notifyServicelogEvent endif if BUILD_OPENIPMI_SERVICELOG sbin_PROGRAMS += ipmiservicelogd endif ## SOURCES # A few tools are just thin wrappers around crm_attribute. # This makes their help get updated when crm_attribute changes # (see mk/common.mk). MAN8DEPS = crm_attribute crmadmin_SOURCES = crmadmin.c crmadmin_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_error_SOURCES = crm_error.c crm_error_LDADD = $(top_builddir)/lib/common/libcrmcommon.la cibadmin_SOURCES = cibadmin.c cibadmin_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_shadow_SOURCES = crm_shadow.c crm_shadow_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_node_SOURCES = crm_node.c crm_node_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_simulate_SOURCES = crm_simulate.c crm_simulate_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_diff_SOURCES = crm_diff.c crm_diff_LDADD = $(top_builddir)/lib/common/libcrmcommon.la crm_mon_SOURCES = crm_mon.c crm_mon_curses.c crm_mon_print.c crm_mon_runtime.c crm_mon_xml.c crm_mon_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la \ $(CURSESLIBS) crm_verify_SOURCES = crm_verify.c crm_verify_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_attribute_SOURCES = crm_attribute.c crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_resource_SOURCES = crm_resource.c \ crm_resource_ban.c \ - crm_resource_controller.c \ crm_resource_print.c \ crm_resource_runtime.c crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_rule_SOURCES = crm_rule.c crm_rule_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/common/libcrmcommon.la iso8601_SOURCES = iso8601.c iso8601_LDADD = $(top_builddir)/lib/common/libcrmcommon.la attrd_updater_SOURCES = attrd_updater.c attrd_updater_LDADD = $(top_builddir)/lib/common/libcrmcommon.la crm_ticket_SOURCES = crm_ticket.c crm_ticket_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la stonith_admin_SOURCES = stonith_admin.c stonith_admin_LDADD = $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/common/libcrmcommon.la if BUILD_SERVICELOG notifyServicelogEvent_SOURCES = notifyServicelogEvent.c notifyServicelogEvent_CFLAGS = $(SERVICELOG_CFLAGS) notifyServicelogEvent_LDADD = $(top_builddir)/lib/common/libcrmcommon.la $(SERVICELOG_LIBS) endif if BUILD_OPENIPMI_SERVICELOG ipmiservicelogd_SOURCES = ipmiservicelogd.c ipmiservicelogd_CFLAGS = $(OPENIPMI_SERVICELOG_CFLAGS) $(SERVICELOG_CFLAGS) ipmiservicelogd_LDFLAGS = $(top_builddir)/lib/common/libcrmcommon.la $(OPENIPMI_SERVICELOG_LIBS) $(SERVICELOG_LIBS) endif CLEANFILES = $(man8_MANS) diff --git a/tools/crm_node.c b/tools/crm_node.c index db31f2009a..57c2ee598f 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -1,666 +1,645 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include +#include #include #define SUMMARY "crm_node - Tool for displaying low-level node information" struct { gboolean corosync; gboolean dangerous_cmd; gboolean force_flag; char command; int nodeid; char *target_uname; } options = { .command = '\0', .force_flag = FALSE }; gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); -static char *pid_s = NULL; static GMainLoop *mainloop = NULL; static crm_exit_t exit_code = CRM_EX_OK; #define INDENT " " static GOptionEntry command_entries[] = { { "cluster-id", 'i', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display this node's cluster id", NULL }, { "list", 'l', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display all known members (past and present) of this cluster", NULL }, { "name", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the name used by the cluster for this node", NULL }, { "partition", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the members of this partition", NULL }, { "quorum", 'q', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display a 1 if our partition has quorum, 0 if not", NULL }, { "name-for-id", 'N', 0, G_OPTION_ARG_CALLBACK, name_cb, "Display the name used by the cluster for the node with the specified ID", "ID" }, { "remove", 'R', 0, G_OPTION_ARG_CALLBACK, remove_cb, "(Advanced) Remove the (stopped) node with the specified name from Pacemaker's\n" INDENT "configuration and caches (the node must already have been removed from\n" INDENT "the underlying cluster stack configuration", "NAME" }, { NULL } }; static GOptionEntry addl_entries[] = { { "force", 'f', 0, G_OPTION_ARG_NONE, &options.force_flag, NULL, NULL }, #if SUPPORT_COROSYNC /* Unused and deprecated */ { "corosync", 'C', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.corosync, NULL, NULL }, #endif // @TODO add timeout option for when IPC replies are needed { NULL } }; gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (safe_str_eq("-i", option_name) || safe_str_eq("--cluster-id", option_name)) { options.command = 'i'; } else if (safe_str_eq("-l", option_name) || safe_str_eq("--list", option_name)) { options.command = 'l'; } else if (safe_str_eq("-n", option_name) || safe_str_eq("--name", option_name)) { options.command = 'n'; } else if (safe_str_eq("-p", option_name) || safe_str_eq("--partition", option_name)) { options.command = 'p'; } else if (safe_str_eq("-q", option_name) || safe_str_eq("--quorum", option_name)) { options.command = 'q'; } else { g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "Unknown param passed to command_cb: %s\n", option_name); return FALSE; } return TRUE; } gboolean name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.command = 'N'; options.nodeid = crm_parse_int(optarg, NULL); return TRUE; } gboolean remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (optarg == NULL) { crm_err("-R option requires an argument"); g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "-R option requires an argument"); return FALSE; } options.command = 'R'; options.dangerous_cmd = TRUE; options.target_uname = strdup(optarg); return TRUE; } -/*! - * \internal - * \brief Exit crm_node - * Clean up memory, and either quit mainloop (if running) or exit - * - * \param[in] value Exit status - */ static void -crm_node_exit(crm_exit_t value) -{ - if (pid_s) { - free(pid_s); - pid_s = NULL; - } - - exit_code = value; - - if (mainloop && g_main_loop_is_running(mainloop)) { - g_main_loop_quit(mainloop); - } else { - crm_exit(exit_code); - } -} - -static void -exit_disconnect(gpointer user_data) -{ - fprintf(stderr, "error: Lost connection to cluster\n"); - crm_node_exit(CRM_EX_DISCONNECT); -} - -typedef int (*ipc_dispatch_fn) (const char *buffer, ssize_t length, - gpointer userdata); - -static crm_ipc_t * -new_mainloop_for_ipc(const char *system, ipc_dispatch_fn dispatch) -{ - mainloop_io_t *source = NULL; - crm_ipc_t *ipc = NULL; - - struct ipc_client_callbacks ipc_callbacks = { - .dispatch = dispatch, - .destroy = exit_disconnect - }; - - mainloop = g_main_loop_new(NULL, FALSE); - source = mainloop_add_ipc_client(system, G_PRIORITY_DEFAULT, 0, - NULL, &ipc_callbacks); - ipc = mainloop_get_ipc_client(source); - if (ipc == NULL) { - fprintf(stderr, - "error: Could not connect to cluster (is it running?)\n"); - crm_node_exit(CRM_EX_DISCONNECT); - } - return ipc; -} - -static int -send_controller_hello(crm_ipc_t *controller) -{ - xmlNode *hello = NULL; - int rc; - - pid_s = pcmk__getpid_s(); - hello = create_hello_message(pid_s, crm_system_name, "1", "0"); - rc = crm_ipc_send(controller, hello, 0, 0, NULL); - free_xml(hello); - return (rc < 0)? rc : 0; -} - -static int -send_node_info_request(crm_ipc_t *controller, uint32_t nodeid) +controller_event_cb(pcmk_ipc_api_t *controld_api, + enum pcmk_ipc_event event_type, crm_exit_t status, + void *event_data, void *user_data) { - xmlNode *ping = NULL; - int rc; + pcmk_controld_api_reply_t *reply = event_data; - ping = create_request(CRM_OP_NODE_INFO, NULL, NULL, CRM_SYSTEM_CRMD, - crm_system_name, pid_s); - if (nodeid > 0) { - crm_xml_add_int(ping, XML_ATTR_ID, nodeid); - } - rc = crm_ipc_send(controller, ping, 0, 0, NULL); - free_xml(ping); - return (rc < 0)? rc : 0; -} + switch (event_type) { + case pcmk_ipc_event_disconnect: + if (exit_code == CRM_EX_DISCONNECT) { // Unexpected + fprintf(stderr, "error: Lost connection to controller\n"); + } + goto done; + break; -static int -dispatch_controller(const char *buffer, ssize_t length, gpointer userdata) -{ - xmlNode *message = string2xml(buffer); - xmlNode *data = NULL; - const char *value = NULL; + case pcmk_ipc_event_reply: + break; - if (message == NULL) { - fprintf(stderr, "error: Could not understand reply from controller\n"); - crm_node_exit(CRM_EX_PROTOCOL); - return 0; + default: + return; } - crm_log_xml_trace(message, "controller reply"); - - exit_code = CRM_EX_PROTOCOL; - // Validate reply - value = crm_element_value(message, F_CRM_MSG_TYPE); - if (safe_str_neq(value, XML_ATTR_RESPONSE)) { - fprintf(stderr, "error: Message from controller was not a reply\n"); + if (status != CRM_EX_OK) { + fprintf(stderr, "error: Bad reply from controller: %s\n", + crm_exit_str(status)); goto done; } - value = crm_element_value(message, XML_ATTR_REFERENCE); - if (value == NULL) { - fprintf(stderr, "error: Controller reply did not specify original message\n"); - goto done; - } - data = get_message_xml(message, F_CRM_DATA); - if (data == NULL) { - fprintf(stderr, "error: Controller reply did not contain any data\n"); + if (reply->reply_type != pcmk_controld_reply_info) { + fprintf(stderr, "error: Unknown reply type %d from controller\n", + reply->reply_type); goto done; } + // Parse desired info from reply and display to user switch (options.command) { case 'i': - value = crm_element_value(data, XML_ATTR_ID); - if (value == NULL) { - fprintf(stderr, "error: Controller reply did not contain node ID\n"); - } else { - printf("%s\n", value); - exit_code = CRM_EX_OK; + if (reply->data.node_info.id == 0) { + fprintf(stderr, + "error: Controller reply did not contain node ID\n"); + exit_code = CRM_EX_PROTOCOL; + goto done; } + printf("%d\n", reply->data.node_info.id); break; case 'n': case 'N': - value = crm_element_value(data, XML_ATTR_UNAME); - if (value == NULL) { + if (reply->data.node_info.uname == NULL) { fprintf(stderr, "Node is not known to cluster\n"); exit_code = CRM_EX_NOHOST; - } else { - printf("%s\n", value); - exit_code = CRM_EX_OK; + goto done; } + printf("%s\n", reply->data.node_info.uname); break; case 'q': - value = crm_element_value(data, XML_ATTR_HAVE_QUORUM); - if (value == NULL) { - fprintf(stderr, "error: Controller reply did not contain quorum status\n"); - } else { - bool quorum = crm_is_true(value); - - printf("%d\n", quorum); - exit_code = quorum? CRM_EX_OK : CRM_EX_QUORUM; + printf("%d\n", reply->data.node_info.have_quorum); + if (!(reply->data.node_info.have_quorum)) { + exit_code = CRM_EX_QUORUM; + goto done; } break; default: fprintf(stderr, "internal error: Controller reply not expected\n"); exit_code = CRM_EX_SOFTWARE; - break; + goto done; } + // Success + exit_code = CRM_EX_OK; done: - free_xml(message); - crm_node_exit(exit_code); - return 0; + pcmk_disconnect_ipc(controld_api); + pcmk_quit_main_loop(mainloop, 10); } static void run_controller_mainloop(uint32_t nodeid) { - crm_ipc_t *controller = NULL; + pcmk_ipc_api_t *controld_api = NULL; int rc; - controller = new_mainloop_for_ipc(CRM_SYSTEM_CRMD, dispatch_controller); + // Set disconnect exit code to handle unexpected disconnects + exit_code = CRM_EX_DISCONNECT; - rc = send_controller_hello(controller); - if (rc < 0) { - fprintf(stderr, "error: Could not register with controller: %s\n", - pcmk_strerror(rc)); - crm_node_exit(crm_errno2exit(rc)); + // Create controller IPC object + rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + return; } + pcmk_register_ipc_callback(controld_api, controller_event_cb, NULL); - rc = send_node_info_request(controller, nodeid); - if (rc < 0) { + // Connect to controller + rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_main); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); + return; + } + + rc = pcmk_controld_api_node_info(controld_api, nodeid); + if (rc != pcmk_rc_ok) { fprintf(stderr, "error: Could not ping controller: %s\n", - pcmk_strerror(rc)); - crm_node_exit(crm_errno2exit(rc)); + pcmk_rc_str(rc)); + pcmk_disconnect_ipc(controld_api); + exit_code = pcmk_rc2exitc(rc); + return; } - // Run main loop to get controller reply via dispatch_controller() + // Run main loop to get controller reply via controller_event_cb() + mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); mainloop = NULL; + pcmk_free_ipc_api(controld_api); } static void print_node_name(void) { // Check environment first (i.e. when called by resource agent) const char *name = getenv("OCF_RESKEY_" CRM_META "_" XML_LRM_ATTR_TARGET); if (name != NULL) { printf("%s\n", name); exit_code = CRM_EX_OK; return; } else { // Otherwise ask the controller run_controller_mainloop(0); } } static int cib_remove_node(long id, const char *name) { int rc; cib_t *cib = NULL; xmlNode *node = NULL; xmlNode *node_state = NULL; crm_trace("Removing %s from the CIB", name); if(name == NULL && id == 0) { return -ENOTUNIQ; } node = create_xml_node(NULL, XML_CIB_TAG_NODE); node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(node, XML_ATTR_UNAME, name); crm_xml_add(node_state, XML_ATTR_UNAME, name); if (id > 0) { crm_xml_set_id(node, "%ld", id); crm_xml_add(node_state, XML_ATTR_ID, ID(node)); } cib = cib_new(); cib->cmds->signon(cib, crm_system_name, cib_command); rc = cib->cmds->remove(cib, XML_CIB_TAG_NODES, node, cib_sync_call); if (rc != pcmk_ok) { printf("Could not remove %s[%ld] from " XML_CIB_TAG_NODES ": %s", name, id, pcmk_strerror(rc)); } rc = cib->cmds->remove(cib, XML_CIB_TAG_STATUS, node_state, cib_sync_call); if (rc != pcmk_ok) { printf("Could not remove %s[%ld] from " XML_CIB_TAG_STATUS ": %s", name, id, pcmk_strerror(rc)); } cib->cmds->signoff(cib); cib_delete(cib); return rc; } +static int +controller_remove_node(const char *node_name, long nodeid) +{ + pcmk_ipc_api_t *controld_api = NULL; + int rc; + + // Create controller IPC object + rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + return ENOTCONN; + } + + // Connect to controller (without main loop) + rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_sync); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + pcmk_free_ipc_api(controld_api); + return rc; + } + + rc = pcmk_ipc_purge_node(controld_api, node_name, nodeid); + if (rc != pcmk_rc_ok) { + fprintf(stderr, + "error: Could not clear node from controller's cache: %s\n", + pcmk_rc_str(rc)); + } + + pcmk_free_ipc_api(controld_api); + return pcmk_rc_ok; +} + static int tools_remove_node_cache(const char *node_name, long nodeid, const char *target) { int rc = -1; - crm_ipc_t *conn = crm_ipc_new(target, 0); + crm_ipc_t *conn = NULL; xmlNode *cmd = NULL; + conn = crm_ipc_new(target, 0); if (!conn) { return -ENOTCONN; } - if (!crm_ipc_connect(conn)) { crm_perror(LOG_ERR, "Connection to %s failed", target); crm_ipc_destroy(conn); return -ENOTCONN; } - if(safe_str_eq(target, CRM_SYSTEM_CRMD)) { - // The controller requires a hello message before sending a request - rc = send_controller_hello(conn); - if (rc < 0) { - fprintf(stderr, "error: Could not register with controller: %s\n", - pcmk_strerror(rc)); - return rc; - } - } - crm_trace("Removing %s[%ld] from the %s membership cache", node_name, nodeid, target); if(safe_str_eq(target, T_ATTRD)) { cmd = create_xml_node(NULL, __FUNCTION__); crm_xml_add(cmd, F_TYPE, T_ATTRD); crm_xml_add(cmd, F_ORIG, crm_system_name); crm_xml_add(cmd, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); crm_xml_add(cmd, PCMK__XA_ATTR_NODE_NAME, node_name); if (nodeid > 0) { crm_xml_add_int(cmd, PCMK__XA_ATTR_NODE_ID, (int) nodeid); } - } else { - cmd = create_request(CRM_OP_RM_NODE_CACHE, - NULL, NULL, target, crm_system_name, pid_s); + } else { // Fencer or pacemakerd + cmd = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, target, + crm_system_name, NULL); if (nodeid > 0) { crm_xml_set_id(cmd, "%ld", nodeid); } crm_xml_add(cmd, XML_ATTR_UNAME, node_name); } rc = crm_ipc_send(conn, cmd, 0, 0, NULL); crm_debug("%s peer cache cleanup for %s (%ld): %d", target, node_name, nodeid, rc); if (rc > 0) { + // @TODO Should this be done just once after all the rest? rc = cib_remove_node(nodeid, node_name); } if (conn) { crm_ipc_close(conn); crm_ipc_destroy(conn); } free_xml(cmd); return rc > 0 ? 0 : rc; } static void remove_node(const char *target_uname) { + int rc; int d = 0; long nodeid = 0; const char *node_name = NULL; char *endptr = NULL; const char *daemons[] = { - CRM_SYSTEM_CRMD, "stonith-ng", T_ATTRD, CRM_SYSTEM_MCP, }; // Check whether node was specified by name or numeric ID errno = 0; nodeid = strtol(target_uname, &endptr, 10); if ((errno != 0) || (endptr == target_uname) || (*endptr != '\0') || (nodeid <= 0)) { // It's not a positive integer, so assume it's a node name nodeid = 0; node_name = target_uname; } + rc = controller_remove_node(node_name, nodeid); + if (rc != pcmk_rc_ok) { + exit_code = pcmk_rc2exitc(rc); + return; + } + for (d = 0; d < DIMOF(daemons); d++) { if (tools_remove_node_cache(node_name, nodeid, daemons[d])) { crm_err("Failed to connect to %s to remove node '%s'", daemons[d], target_uname); exit_code = CRM_EX_ERROR; return; } } exit_code = CRM_EX_OK; } static gint compare_node_xml(gconstpointer a, gconstpointer b) { const char *a_name = crm_element_value((xmlNode*) a, "uname"); const char *b_name = crm_element_value((xmlNode*) b, "uname"); return strcmp((a_name? a_name : ""), (b_name? b_name : "")); } static int node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) { GList *nodes = NULL; xmlNode *node = NULL; xmlNode *msg = string2xml(buffer); const char *uname; const char *state; if (msg == NULL) { fprintf(stderr, "error: Could not understand pacemakerd response\n"); exit_code = CRM_EX_PROTOCOL; g_main_loop_quit(mainloop); return 0; } crm_log_xml_trace(msg, "message"); for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { nodes = g_list_insert_sorted(nodes, node, compare_node_xml); } for (GList *iter = nodes; iter; iter = iter->next) { node = (xmlNode*) iter->data; uname = crm_element_value(node, "uname"); state = crm_element_value(node, "state"); if (options.command == 'l') { int id = 0; crm_element_value_int(node, "id", &id); printf("%d %s %s\n", id, (uname? uname : ""), (state? state : "")); // This is CRM_NODE_MEMBER but we don't want to include cluster header } else if ((options.command == 'p') && safe_str_eq(state, "member")) { printf("%s ", (uname? uname : "")); } } if (options.command == 'p') { fprintf(stdout, "\n"); } free_xml(msg); exit_code = CRM_EX_OK; g_main_loop_quit(mainloop); return 0; } +static void +lost_pacemakerd(gpointer user_data) +{ + fprintf(stderr, "error: Lost connection to cluster\n"); + exit_code = CRM_EX_DISCONNECT; + g_main_loop_quit(mainloop); +} + static void run_pacemakerd_mainloop(void) { crm_ipc_t *ipc = NULL; xmlNode *poke = NULL; + mainloop_io_t *source = NULL; + + struct ipc_client_callbacks ipc_callbacks = { + .dispatch = node_mcp_dispatch, + .destroy = lost_pacemakerd + }; - ipc = new_mainloop_for_ipc(CRM_SYSTEM_MCP, node_mcp_dispatch); + source = mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, + NULL, &ipc_callbacks); + ipc = mainloop_get_ipc_client(source); + if (ipc == NULL) { + fprintf(stderr, + "error: Could not connect to cluster (is it running?)\n"); + exit_code = CRM_EX_DISCONNECT; + return; + } // Sending anything will get us a list of nodes poke = create_xml_node(NULL, "poke"); crm_ipc_send(ipc, poke, 0, 0, NULL); free_xml(poke); // Handle reply via node_mcp_dispatch() + mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); mainloop = NULL; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup *group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; context = pcmk__build_arg_context(args, NULL, &group, NULL); /* Add the -q option, which cannot be part of the globally supported options * because some tools use that flag for something else. */ pcmk__add_main_args(context, extra_prog_entries); pcmk__add_arg_group(context, "commands", "Commands:", "Show command help", command_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); return context; } int main(int argc, char **argv) { pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); GError *error = NULL; GOptionContext *context = NULL; GOptionGroup *output_group = NULL; gchar **processed_args = NULL; context = build_arg_context(args, output_group); crm_log_cli_init("crm_node"); processed_args = pcmk__cmdline_preproc(argv, "NR"); if (!g_option_context_parse_strv(context, &processed_args, &error)) { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); exit_code = CRM_EX_USAGE; goto done; } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (args->version) { /* FIXME: When crm_node is converted to use formatted output, this can go. */ pcmk__cli_help('v', CRM_EX_USAGE); } if (optind > argc || options.command == 0) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); exit_code = CRM_EX_USAGE; goto done; } if (options.dangerous_cmd && options.force_flag == FALSE) { fprintf(stderr, "The supplied command is considered dangerous." " To prevent accidental destruction of the cluster," " the --force flag is required in order to proceed.\n"); exit_code = CRM_EX_USAGE; goto done; } switch (options.command) { case 'n': print_node_name(); break; case 'R': remove_node(options.target_uname); break; case 'i': case 'q': case 'N': run_controller_mainloop(options.nodeid); break; case 'l': case 'p': run_pacemakerd_mainloop(); break; default: break; } done: g_strfreev(processed_args); g_clear_error(&error); pcmk__free_arg_context(context); - crm_node_exit(exit_code); - return exit_code; + return crm_exit(exit_code); } diff --git a/tools/crm_resource.c b/tools/crm_resource.c index d8144ad1c8..a573d96411 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -1,1563 +1,1579 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include - -#include -#include - #include #include #include - #include #include #include #include #include +#include +#include +#include + bool BE_QUIET = FALSE; bool scope_master = FALSE; int cib_options = cib_sync_call; - -static GMainLoop *mainloop = NULL; +static crm_exit_t exit_code = CRM_EX_OK; // Things that should be cleaned up on exit +static GMainLoop *mainloop = NULL; static cib_t *cib_conn = NULL; -static pcmk_controld_api_t *controld_api = NULL; +static pcmk_ipc_api_t *controld_api = NULL; static pe_working_set_t *data_set = NULL; #define MESSAGE_TIMEOUT_S 60 // Clean up and exit static crm_exit_t -bye(crm_exit_t exit_code) +bye(crm_exit_t ec) { - static bool crm_resource_shutdown_flag = false; - - if (crm_resource_shutdown_flag) { - // Allow usage like "return bye(...);" - return exit_code; - } - crm_resource_shutdown_flag = true; - if (cib_conn != NULL) { cib_t *save_cib_conn = cib_conn; - cib_conn = NULL; + cib_conn = NULL; // Ensure we can't free this twice save_cib_conn->cmds->signoff(save_cib_conn); cib_delete(save_cib_conn); } if (controld_api != NULL) { - pcmk_controld_api_t *save_controld_api = controld_api; + pcmk_ipc_api_t *save_controld_api = controld_api; - controld_api = NULL; - pcmk_free_controld_api(save_controld_api); + controld_api = NULL; // Ensure we can't free this twice + pcmk_free_ipc_api(save_controld_api); + } + if (mainloop != NULL) { + g_main_loop_unref(mainloop); + mainloop = NULL; } pe_free_working_set(data_set); data_set = NULL; - crm_exit(exit_code); - return exit_code; + crm_exit(ec); + return ec; +} + +static void +quit_main_loop(crm_exit_t ec) +{ + exit_code = ec; + if (mainloop != NULL) { + GMainLoop *mloop = mainloop; + + mainloop = NULL; // Don't re-enter this block + pcmk_quit_main_loop(mloop, 10); + g_main_loop_unref(mloop); + } } static gboolean resource_ipc_timeout(gpointer data) { // Start with newline because "Waiting for ..." message doesn't have one fprintf(stderr, "\nAborting because no messages received in %d seconds\n", MESSAGE_TIMEOUT_S); crm_err("No messages received in %d seconds", MESSAGE_TIMEOUT_S); - bye(CRM_EX_TIMEOUT); + quit_main_loop(CRM_EX_TIMEOUT); return FALSE; } static void -handle_controller_reply(pcmk_controld_api_t *capi, void *api_data, - void *user_data) +controller_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, + crm_exit_t status, void *event_data, void *user_data) { - fprintf(stderr, "."); - if ((capi->replies_expected(capi) == 0) - && mainloop && g_main_loop_is_running(mainloop)) { - fprintf(stderr, " OK\n"); - crm_debug("Got all the replies we expected"); - bye(CRM_EX_OK); - } -} + switch (event_type) { + case pcmk_ipc_event_disconnect: + if (exit_code == CRM_EX_DISCONNECT) { // Unexpected + crm_info("Connection to controller was terminated"); + } + quit_main_loop(exit_code); + break; -static void -handle_controller_drop(pcmk_controld_api_t *capi, void *api_data, - void *user_data) -{ - crm_info("Connection to controller was terminated"); - bye(CRM_EX_DISCONNECT); + case pcmk_ipc_event_reply: + if (status != CRM_EX_OK) { + fprintf(stderr, "\nError: bad reply from controller: %s\n", + crm_exit_str(status)); + pcmk_disconnect_ipc(api); + quit_main_loop(status); + } else { + fprintf(stderr, "."); + if ((pcmk_controld_api_replies_expected(api) == 0) + && mainloop && g_main_loop_is_running(mainloop)) { + fprintf(stderr, " OK\n"); + crm_debug("Got all the replies we expected"); + pcmk_disconnect_ipc(api); + quit_main_loop(CRM_EX_OK); + } + } + break; + + default: + break; + } } static void -start_mainloop(pcmk_controld_api_t *capi) +start_mainloop(pcmk_ipc_api_t *capi) { - if (capi->replies_expected(capi) > 0) { - unsigned int count = capi->replies_expected(capi); + unsigned int count = pcmk_controld_api_replies_expected(capi); + if (count > 0) { fprintf(stderr, "Waiting for %d %s from the controller", count, pcmk__plural_alt(count, "reply", "replies")); + exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects mainloop = g_main_loop_new(NULL, FALSE); g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL); g_main_loop_run(mainloop); } } static int compare_id(gconstpointer a, gconstpointer b) { return strcmp((const char *)a, (const char *)b); } static GListPtr build_constraint_list(xmlNode *root) { GListPtr retval = NULL; xmlNode *cib_constraints = NULL; xmlXPathObjectPtr xpathObj = NULL; int ndx = 0; cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, root); xpathObj = xpath_search(cib_constraints, "//" XML_CONS_TAG_RSC_LOCATION); for (ndx = 0; ndx < numXpathResults(xpathObj); ndx++) { xmlNode *match = getXpathResult(xpathObj, ndx); retval = g_list_insert_sorted(retval, (gpointer) ID(match), compare_id); } freeXpathObject(xpathObj); return retval; } /* short option letters still available: eEJkKXyYZ */ static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "help", no_argument, NULL, '?', "\t\tDisplay this text and exit", pcmk__option_default }, { "version", no_argument, NULL, '$', "\t\tDisplay version information and exit", pcmk__option_default }, { "verbose", no_argument, NULL, 'V', "\t\tIncrease debug output (may be specified multiple times)", pcmk__option_default }, { "quiet", no_argument, NULL, 'Q', "\t\tBe less descriptive in results", pcmk__option_default }, { "resource", required_argument, NULL, 'r', "\tResource ID", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nQueries:", pcmk__option_default }, { "list", no_argument, NULL, 'L', "\t\tList all cluster resources with status", pcmk__option_default }, { "list-raw", no_argument, NULL, 'l', "\t\tList IDs of all instantiated resources (individual members rather " "than groups etc.)", pcmk__option_default }, { "list-cts", no_argument, NULL, 'c', NULL, pcmk__option_hidden }, { "list-operations", no_argument, NULL, 'O', "\tList active resource operations, optionally filtered by --resource " "and/or --node", pcmk__option_default }, { "list-all-operations", no_argument, NULL, 'o', "List all resource operations, optionally filtered by --resource " "and/or --node", pcmk__option_default }, { "list-standards", no_argument, NULL, 0, "\tList supported standards", pcmk__option_default }, { "list-ocf-providers", no_argument, NULL, 0, "List all available OCF providers", pcmk__option_default }, { "list-agents", required_argument, NULL, 0, "List all agents available for the named standard and/or provider", pcmk__option_default }, { "list-ocf-alternatives", required_argument, NULL, 0, "List all available providers for the named OCF agent", pcmk__option_default }, { "show-metadata", required_argument, NULL, 0, "Show the metadata for the named class:provider:agent", pcmk__option_default }, { "query-xml", no_argument, NULL, 'q', "\tShow XML configuration of resource (after any template expansion)", pcmk__option_default }, { "query-xml-raw", no_argument, NULL, 'w', "\tShow XML configuration of resource (before any template expansion)", pcmk__option_default }, { "get-parameter", required_argument, NULL, 'g', "Display named parameter for resource (use instance attribute unless " "--meta or --utilization is specified)", pcmk__option_default }, { "get-property", required_argument, NULL, 'G', "Display named property of resource ('class', 'type', or 'provider') " "(requires --resource)", pcmk__option_hidden }, { "locate", no_argument, NULL, 'W', "\t\tShow node(s) currently running resource", pcmk__option_default }, { "stack", no_argument, NULL, 'A', "\t\tDisplay the prerequisites and dependents of a resource", pcmk__option_default }, { "constraints", no_argument, NULL, 'a', "\tDisplay the (co)location constraints that apply to a resource", pcmk__option_default }, { "why", no_argument, NULL, 'Y', "\t\tShow why resources are not running, optionally filtered by " "--resource and/or --node", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nCommands:", pcmk__option_default }, { "validate", no_argument, NULL, 0, "\t\tValidate resource configuration by calling agent's validate-all " "action. The configuration may be specified either by giving an " "existing resource name with -r, or by specifying --class, " "--agent, and --provider arguments, along with any number of " "--option arguments.", pcmk__option_default }, { "cleanup", no_argument, NULL, 'C', "\t\tIf resource has any past failures, clear its history and " "fail count. Optionally filtered by --resource, --node, " "--operation, and --interval (otherwise all). --operation and " "--interval apply to fail counts, but entire history is always " "cleared, to allow current state to be rechecked. If the named " "resource is part of a group, or one numbered instance of a clone " "or bundled resource, the clean-up applies to the whole collective " "resource unless --force is given.", pcmk__option_default }, { "refresh", no_argument, NULL, 'R', "\t\tDelete resource's history (including failures) so its current " "state is rechecked. Optionally filtered by --resource and --node " "(otherwise all). If the named resource is part of a group, or one " "numbered instance of a clone or bundled resource, the refresh " "applies to the whole collective resource unless --force is given.", pcmk__option_default }, { "set-parameter", required_argument, NULL, 'p', "Set named parameter for resource (requires -v). Use instance " "attribute unless --meta or --utilization is specified.", pcmk__option_default }, { "delete-parameter", required_argument, NULL, 'd', "Delete named parameter for resource. Use instance attribute unless " "--meta or --utilization is specified.", pcmk__option_default }, { "set-property", required_argument, NULL, 'S', "Set named property of resource ('class', 'type', or 'provider') " "(requires -r, -t, -v)", pcmk__option_hidden }, { "-spacer-", no_argument, NULL, '-', "\nResource location:", pcmk__option_default }, { "move", no_argument, NULL, 'M', "\t\tCreate a constraint to move resource. If --node is specified, the " "constraint will be to move to that node, otherwise it will be to " "ban the current node. Unless --force is specified, this will " "return an error if the resource is already running on the " "specified node. If --force is specified, this will always ban the " "current node. Optional: --lifetime, --master. NOTE: This may " "prevent the resource from running on its previous location until " "the implicit constraint expires or is removed with --clear.", pcmk__option_default }, { "ban", no_argument, NULL, 'B', "\t\tCreate a constraint to keep resource off a node. Optional: " "--node, --lifetime, --master. NOTE: This will prevent the " "resource from running on the affected node until the implicit " "constraint expires or is removed with --clear. If --node is not " "specified, it defaults to the node currently running the resource " "for primitives and groups, or the master for promotable clones " "with promoted-max=1 (all other situations result in an error as " "there is no sane default).", pcmk__option_default }, { "clear", no_argument, NULL, 'U', "\t\tRemove all constraints created by the --ban and/or --move " "commands. Requires: --resource. Optional: --node, --master, " "--expired. If --node is not specified, all constraints created " "by --ban and --move will be removed for the named resource. If " "--node and --force are specified, any constraint created by " "--move will be cleared, even if it is not for the specified node. " "If --expired is specified, only those constraints whose lifetimes " "have expired will be removed.", pcmk__option_default }, { "expired", no_argument, NULL, 'e', "\t\tModifies the --clear argument to remove constraints with " "expired lifetimes.", pcmk__option_default }, { "lifetime", required_argument, NULL, 'u', "\tLifespan (as ISO 8601 duration) of created constraints (with -B, " "-M) (see https://en.wikipedia.org/wiki/ISO_8601#Durations)", pcmk__option_default }, { "master", no_argument, NULL, 0, "\t\tLimit scope of command to Master role (with -B, -M, -U). For -B " "and -M, the previous master may remain active in the Slave role.", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nAdvanced Commands:", pcmk__option_default }, { "delete", no_argument, NULL, 'D', "\t\t(Advanced) Delete a resource from the CIB. Required: -t", pcmk__option_default }, { "fail", no_argument, NULL, 'F', "\t\t(Advanced) Tell the cluster this resource has failed", pcmk__option_default }, { "restart", no_argument, NULL, 0, "\t\t(Advanced) Tell the cluster to restart this resource and " "anything that depends on it", pcmk__option_default }, { "wait", no_argument, NULL, 0, "\t\t(Advanced) Wait until the cluster settles into a stable state", pcmk__option_default }, { "force-demote", no_argument, NULL, 0, "\t(Advanced) Bypass the cluster and demote a resource on the local " "node. Unless --force is specified, this will refuse to do so if " "the cluster believes the resource is a clone instance already " "running on the local node.", pcmk__option_default }, { "force-stop", no_argument, NULL, 0, "\t(Advanced) Bypass the cluster and stop a resource on the local node", pcmk__option_default }, { "force-start", no_argument, NULL, 0, "\t(Advanced) Bypass the cluster and start a resource on the local " "node. Unless --force is specified, this will refuse to do so if " "the cluster believes the resource is a clone instance already " "running on the local node.", pcmk__option_default }, { "force-promote", no_argument, NULL, 0, "\t(Advanced) Bypass the cluster and promote a resource on the local " "node. Unless --force is specified, this will refuse to do so if " "the cluster believes the resource is a clone instance already " "running on the local node.", pcmk__option_default }, { "force-check", no_argument, NULL, 0, "\t(Advanced) Bypass the cluster and check the state of a resource on " "the local node", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nValidate Options:", pcmk__option_default }, { "class", required_argument, NULL, 0, "\tThe standard the resource agent confirms to (for example, ocf). " "Use with --agent, --provider, --option, and --validate.", pcmk__option_default }, { "agent", required_argument, NULL, 0, "\tThe agent to use (for example, IPaddr). Use with --class, " "--provider, --option, and --validate.", pcmk__option_default }, { "provider", required_argument, NULL, 0, "\tThe vendor that supplies the resource agent (for example, " "heartbeat). Use with --class, --agent, --option, and --validate.", pcmk__option_default }, { "option", required_argument, NULL, 0, "\tSpecify a device configuration parameter as NAME=VALUE (may be " "specified multiple times). Use with --validate and without the " "-r option.", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nAdditional Options:", pcmk__option_default }, { "node", required_argument, NULL, 'N', "\tNode name", pcmk__option_default }, { "recursive", no_argument, NULL, 0, "\tFollow colocation chains when using --set-parameter", pcmk__option_default }, { "resource-type", required_argument, NULL, 't', "Resource XML element (primitive, group, etc.) (with -D)", pcmk__option_default }, { "parameter-value", required_argument, NULL, 'v', "Value to use with -p", pcmk__option_default }, { "meta", no_argument, NULL, 'm', "\t\tUse resource meta-attribute instead of instance attribute " "(with -p, -g, -d)", pcmk__option_default }, { "utilization", no_argument, NULL, 'z', "\tUse resource utilization attribute instead of instance attribute " "(with -p, -g, -d)", pcmk__option_default }, { "operation", required_argument, NULL, 'n', "\tOperation to clear instead of all (with -C -r)", pcmk__option_default }, { "interval", required_argument, NULL, 'I', "\tInterval of operation to clear (default 0) (with -C -r -n)", pcmk__option_default }, { "set-name", required_argument, NULL, 's', "\t(Advanced) XML ID of attributes element to use (with -p, -d)", pcmk__option_default }, { "nvpair", required_argument, NULL, 'i', "\t(Advanced) XML ID of nvpair element to use (with -p, -d)", pcmk__option_default }, { "timeout", required_argument, NULL, 'T', "\t(Advanced) Abort if command does not finish in this time (with " "--restart, --wait, --force-*)", pcmk__option_default }, { "force", no_argument, NULL, 'f', "\t\tIf making CIB changes, do so regardless of quorum. See help for " "individual commands for additional behavior.", pcmk__option_default }, { "xml-file", required_argument, NULL, 'x', NULL, pcmk__option_hidden }, /* legacy options */ { "host-uname", required_argument, NULL, 'H', NULL, pcmk__option_hidden }, { "-spacer-", 1, NULL, '-', "\nExamples:", pcmk__option_paragraph }, { "-spacer-", 1, NULL, '-', "List the available OCF agents:", pcmk__option_paragraph }, { "-spacer-", 1, NULL, '-', " crm_resource --list-agents ocf", pcmk__option_example }, { "-spacer-", 1, NULL, '-', "List the available OCF agents from the linux-ha project:", pcmk__option_paragraph }, { "-spacer-", 1, NULL, '-', " crm_resource --list-agents ocf:heartbeat", pcmk__option_example }, { "-spacer-", 1, NULL, '-', "Move 'myResource' to a specific node:", pcmk__option_paragraph }, { "-spacer-", 1, NULL, '-', " crm_resource --resource myResource --move --node altNode", pcmk__option_example }, { "-spacer-", 1, NULL, '-', "Allow (but not force) 'myResource' to move back to its original " "location:", pcmk__option_paragraph }, { "-spacer-", 1, NULL, '-', " crm_resource --resource myResource --clear", pcmk__option_example }, { "-spacer-", 1, NULL, '-', "Stop 'myResource' (and anything that depends on it):", pcmk__option_paragraph }, { "-spacer-", 1, NULL, '-', " crm_resource --resource myResource --set-parameter target-role " "--meta --parameter-value Stopped", pcmk__option_example }, { "-spacer-", 1, NULL, '-', "Tell the cluster not to manage 'myResource' (the cluster will not " "attempt to start or stop the resource under any circumstances; " "useful when performing maintenance tasks on a resource):", pcmk__option_paragraph }, { "-spacer-", 1, NULL, '-', " crm_resource --resource myResource --set-parameter is-managed " "--meta --parameter-value false", pcmk__option_example }, { "-spacer-", 1, NULL, '-', "Erase the operation history of 'myResource' on 'aNode' (the cluster " "will 'forget' the existing resource state, including any " "errors, and attempt to recover the resource; useful when a " "resource had failed permanently and has been repaired " "by an administrator):", pcmk__option_paragraph }, { "-spacer-", 1, NULL, '-', " crm_resource --resource myResource --cleanup --node aNode", pcmk__option_example }, { 0, 0, 0, 0 } }; int main(int argc, char **argv) { char rsc_cmd = 'L'; const char *v_class = NULL; const char *v_agent = NULL; const char *v_provider = NULL; char *name = NULL; char *value = NULL; GHashTable *validate_options = NULL; const char *rsc_id = NULL; const char *host_uname = NULL; const char *prop_name = NULL; const char *prop_value = NULL; const char *rsc_type = NULL; const char *prop_id = NULL; const char *prop_set = NULL; const char *rsc_long_cmd = NULL; const char *longname = NULL; const char *operation = NULL; const char *interval_spec = NULL; const char *cib_file = getenv("CIB_file"); const char *xml_file = NULL; GHashTable *override_params = NULL; xmlNode *cib_xml_copy = NULL; pe_resource_t *rsc = NULL; bool recursive = FALSE; bool validate_cmdline = FALSE; /* whether we are just validating based on command line options */ bool require_resource = TRUE; /* whether command requires that resource be specified */ bool require_dataset = TRUE; /* whether command requires populated dataset instance */ bool require_crmd = FALSE; // whether command requires controller connection bool clear_expired = FALSE; int rc = pcmk_ok; int is_ocf_rc = 0; int option_index = 0; int timeout_ms = 0; int argerr = 0; int flag; int find_flags = 0; // Flags to use when searching for resource - crm_exit_t exit_code = CRM_EX_OK; crm_log_cli_init("crm_resource"); pcmk__set_cli_options(NULL, "| [options]", long_options, "perform tasks related to Pacemaker " "cluster resources"); validate_options = crm_str_table_new(); while (1) { flag = pcmk__next_cli_option(argc, argv, &option_index, &longname); if (flag == -1) break; switch (flag) { case 0: /* long options with no short equivalent */ if (safe_str_eq("master", longname)) { scope_master = TRUE; } else if(safe_str_eq(longname, "recursive")) { recursive = TRUE; } else if (safe_str_eq("wait", longname)) { rsc_cmd = flag; rsc_long_cmd = longname; require_resource = FALSE; require_dataset = FALSE; } else if (pcmk__str_any_of(longname, "validate", "restart", "force-demote", "force-stop", "force-start", "force-promote", "force-check", NULL)) { rsc_cmd = flag; rsc_long_cmd = longname; find_flags = pe_find_renamed|pe_find_anon; crm_log_args(argc, argv); } else if (pcmk__str_any_of(longname, "list-ocf-providers", "list-ocf-alternatives", "list-standards", NULL)) { const char *text = NULL; lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; lrmd_t *lrmd_conn = lrmd_api_new(); if (pcmk__str_any_of(longname, "list-ocf-providers", "list-ocf-alternatives", NULL)) { rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, optarg, &list); text = "OCF providers"; } else if (safe_str_eq("list-standards", longname)) { rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); text = "standards"; } if (rc > 0) { for (iter = list; iter != NULL; iter = iter->next) { printf("%s\n", iter->val); } lrmd_list_freeall(list); } else if (optarg) { fprintf(stderr, "No %s found for %s\n", text, optarg); exit_code = CRM_EX_NOSUCH; } else { fprintf(stderr, "No %s found\n", text); exit_code = CRM_EX_NOSUCH; } lrmd_api_delete(lrmd_conn); return bye(exit_code); } else if (safe_str_eq("show-metadata", longname)) { char *standard = NULL; char *provider = NULL; char *type = NULL; char *metadata = NULL; lrmd_t *lrmd_conn = lrmd_api_new(); rc = crm_parse_agent_spec(optarg, &standard, &provider, &type); if (rc == pcmk_ok) { rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard, provider, type, &metadata, 0); } else { fprintf(stderr, "'%s' is not a valid agent specification\n", optarg); rc = -ENXIO; } if (metadata) { printf("%s\n", metadata); } else { fprintf(stderr, "Metadata query for %s failed: %s\n", optarg, pcmk_strerror(rc)); exit_code = crm_errno2exit(rc); } lrmd_api_delete(lrmd_conn); return bye(exit_code); } else if (safe_str_eq("list-agents", longname)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; char *provider = strchr (optarg, ':'); lrmd_t *lrmd_conn = lrmd_api_new(); if (provider) { *provider++ = 0; } rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, optarg, provider); if (rc > 0) { for (iter = list; iter != NULL; iter = iter->next) { printf("%s\n", iter->val); } lrmd_list_freeall(list); } else { fprintf(stderr, "No agents found for standard=%s, provider=%s\n", optarg, (provider? provider : "*")); exit_code = CRM_EX_NOSUCH; } lrmd_api_delete(lrmd_conn); return bye(exit_code); } else if (safe_str_eq("class", longname)) { if (!(pcmk_get_ra_caps(optarg) & pcmk_ra_cap_params)) { if (BE_QUIET == FALSE) { fprintf(stdout, "Standard %s does not support parameters\n", optarg); } return bye(exit_code); } else { v_class = optarg; } validate_cmdline = TRUE; require_resource = FALSE; } else if (safe_str_eq("agent", longname)) { validate_cmdline = TRUE; require_resource = FALSE; v_agent = optarg; } else if (safe_str_eq("provider", longname)) { validate_cmdline = TRUE; require_resource = FALSE; v_provider = optarg; } else if (safe_str_eq("option", longname)) { crm_info("Scanning: --option %s", optarg); rc = pcmk_scan_nvpair(optarg, &name, &value); if (rc != 2) { fprintf(stderr, "Invalid option: --option %s: %s", optarg, pcmk_strerror(rc)); argerr++; } else { crm_info("Got: '%s'='%s'", name, value); } g_hash_table_replace(validate_options, name, value); } else { crm_err("Unhandled long option: %s", longname); } break; case 'V': resource_verbose++; crm_bump_log_level(argc, argv); break; case '$': case '?': pcmk__cli_help(flag, CRM_EX_OK); break; case 'x': xml_file = optarg; break; case 'Q': BE_QUIET = TRUE; break; case 'm': attr_set_type = XML_TAG_META_SETS; break; case 'z': attr_set_type = XML_TAG_UTILIZATION; break; case 'u': move_lifetime = strdup(optarg); break; case 'f': do_force = TRUE; crm_log_args(argc, argv); break; case 'i': prop_id = optarg; break; case 's': prop_set = optarg; break; case 'r': rsc_id = optarg; break; case 'v': prop_value = optarg; break; case 't': rsc_type = optarg; break; case 'T': timeout_ms = crm_get_msec(optarg); break; case 'e': clear_expired = TRUE; require_resource = FALSE; break; case 'C': case 'R': crm_log_args(argc, argv); require_resource = FALSE; if (cib_file == NULL) { require_crmd = TRUE; } rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_anon; break; case 'n': operation = optarg; break; case 'I': interval_spec = optarg; break; case 'D': require_dataset = FALSE; crm_log_args(argc, argv); rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_any; break; case 'F': require_crmd = TRUE; crm_log_args(argc, argv); rsc_cmd = flag; break; case 'U': case 'B': case 'M': crm_log_args(argc, argv); rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_anon; break; case 'c': case 'L': case 'l': case 'O': case 'o': require_resource = FALSE; rsc_cmd = flag; break; case 'Y': require_resource = FALSE; rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_anon; break; case 'q': case 'w': rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_any; break; case 'W': case 'A': case 'a': rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_anon; break; case 'S': require_dataset = FALSE; crm_log_args(argc, argv); prop_name = optarg; rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_any; break; case 'p': case 'd': crm_log_args(argc, argv); prop_name = optarg; rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_any; break; case 'G': case 'g': prop_name = optarg; rsc_cmd = flag; find_flags = pe_find_renamed|pe_find_any; break; case 'H': case 'N': crm_trace("Option %c => %s", flag, optarg); host_uname = optarg; break; default: CMD_ERR("Argument code 0%o (%c) is not (?yet?) supported", flag, flag); ++argerr; break; } } // Catch the case where the user didn't specify a command if (rsc_cmd == 'L') { require_resource = FALSE; } // --expired without --clear/-U doesn't make sense if (clear_expired == TRUE && rsc_cmd != 'U') { CMD_ERR("--expired requires --clear or -U"); argerr++; } if (optind < argc && argv[optind] != NULL && rsc_cmd == 0 && rsc_long_cmd) { override_params = crm_str_table_new(); while (optind < argc && argv[optind] != NULL) { char *name = calloc(1, strlen(argv[optind])); char *value = calloc(1, strlen(argv[optind])); int rc = sscanf(argv[optind], "%[^=]=%s", name, value); if(rc == 2) { g_hash_table_replace(override_params, name, value); } else { CMD_ERR("Error parsing '%s' as a name=value pair for --%s", argv[optind], rsc_long_cmd); free(value); free(name); argerr++; } optind++; } } else if (optind < argc && argv[optind] != NULL && rsc_cmd == 0) { CMD_ERR("non-option ARGV-elements: "); while (optind < argc && argv[optind] != NULL) { CMD_ERR("[%d of %d] %s ", optind, argc, argv[optind]); optind++; argerr++; } } if (optind > argc) { ++argerr; } // Sanity check validating from command line parameters. If everything checks out, // go ahead and run the validation. This way we don't need a CIB connection. if (validate_cmdline == TRUE) { // -r cannot be used with any of --class, --agent, or --provider if (rsc_id != NULL) { CMD_ERR("--resource cannot be used with --class, --agent, and --provider"); argerr++; // If --class, --agent, or --provider are given, --validate must also be given. } else if (!safe_str_eq(rsc_long_cmd, "validate")) { CMD_ERR("--class, --agent, and --provider require --validate"); argerr++; // Not all of --class, --agent, and --provider need to be given. Not all // classes support the concept of a provider. Check that what we were given // is valid. } else if (crm_str_eq(v_class, "stonith", TRUE)) { if (v_provider != NULL) { CMD_ERR("stonith does not support providers"); argerr++; } else if (stonith_agent_exists(v_agent, 0) == FALSE) { CMD_ERR("%s is not a known stonith agent", v_agent ? v_agent : ""); argerr++; } } else if (resources_agent_exists(v_class, v_provider, v_agent) == FALSE) { CMD_ERR("%s:%s:%s is not a known resource", v_class ? v_class : "", v_provider ? v_provider : "", v_agent ? v_agent : ""); argerr++; } if (argerr == 0) { rc = cli_resource_execute_from_params("test", v_class, v_provider, v_agent, "validate-all", validate_options, override_params, timeout_ms); exit_code = crm_errno2exit(rc); return bye(exit_code); } } if (argerr) { CMD_ERR("Invalid option(s) supplied, use --help for valid usage"); return bye(CRM_EX_USAGE); } if (do_force) { crm_debug("Forcing..."); cib_options |= cib_quorum_override; } if (require_resource && !rsc_id) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } if (find_flags && rsc_id) { require_dataset = TRUE; } // Establish a connection to the CIB cib_conn = cib_new(); if ((cib_conn == NULL) || (cib_conn->cmds == NULL)) { CMD_ERR("Could not create CIB connection: %s", pcmk_strerror(rc)); goto bail; } rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); if (rc != pcmk_ok) { CMD_ERR("Could not connect to the CIB: %s", pcmk_strerror(rc)); goto bail; } /* Populate working set from XML file if specified or CIB query otherwise */ if (require_dataset) { if (xml_file != NULL) { cib_xml_copy = filename2xml(xml_file); } else { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); } if(rc != pcmk_ok) { goto bail; } /* Populate the working set instance */ data_set = pe_new_working_set(); if (data_set == NULL) { rc = -ENOMEM; goto bail; } set_bit(data_set->flags, pe_flag_no_counts); set_bit(data_set->flags, pe_flag_no_compat); rc = update_working_set_xml(data_set, &cib_xml_copy); if (rc != pcmk_ok) { goto bail; } cluster_status(data_set); } // If command requires that resource exist if specified, find it if (find_flags && rsc_id) { rsc = pe_find_resource_with_flags(data_set->resources, rsc_id, find_flags); if (rsc == NULL) { CMD_ERR("Resource '%s' not found", rsc_id); rc = -ENXIO; goto bail; } } // Establish a connection to the controller if needed if (require_crmd) { - char *client_uuid; - pcmk_controld_api_cb_t dispatch_cb = { - handle_controller_reply, NULL - }; - pcmk_controld_api_cb_t destroy_cb = { - handle_controller_drop, NULL - }; - - - client_uuid = pcmk__getpid_s(); - controld_api = pcmk_new_controld_api(crm_system_name, client_uuid); - free(client_uuid); - - rc = controld_api->connect(controld_api, true, &dispatch_cb, - &destroy_cb); + rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); + if (rc != pcmk_rc_ok) { + CMD_ERR("Error connecting to the controller: %s", pcmk_rc_str(rc)); + rc = pcmk_rc2legacy(rc); + goto bail; + } + pcmk_register_ipc_callback(controld_api, controller_event_callback, + NULL); + rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_main); if (rc != pcmk_rc_ok) { CMD_ERR("Error connecting to the controller: %s", pcmk_rc_str(rc)); rc = pcmk_rc2legacy(rc); goto bail; } } /* Handle rsc_cmd appropriately */ if (rsc_cmd == 'L') { rc = pcmk_ok; cli_resource_print_list(data_set, FALSE); } else if (rsc_cmd == 'l') { int found = 0; GListPtr lpc = NULL; rc = pcmk_ok; for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { rsc = (pe_resource_t *) lpc->data; found++; cli_resource_print_raw(rsc); } if (found == 0) { printf("NO resources configured\n"); rc = -ENXIO; } } else if (rsc_cmd == 0 && rsc_long_cmd && safe_str_eq(rsc_long_cmd, "restart")) { /* We don't pass data_set because rsc needs to stay valid for the entire * lifetime of cli_resource_restart(), but it will reset and update the * working set multiple times, so it needs to use its own copy. */ rc = cli_resource_restart(rsc, host_uname, timeout_ms, cib_conn); } else if (rsc_cmd == 0 && rsc_long_cmd && safe_str_eq(rsc_long_cmd, "wait")) { rc = wait_till_stable(timeout_ms, cib_conn); } else if (rsc_cmd == 0 && rsc_long_cmd) { // validate, force-(stop|start|demote|promote|check) rc = cli_resource_execute(rsc, rsc_id, rsc_long_cmd, override_params, timeout_ms, cib_conn, data_set); if (rc >= 0) { is_ocf_rc = 1; } } else if (rsc_cmd == 'A' || rsc_cmd == 'a') { GListPtr lpc = NULL; xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); unpack_constraints(cib_constraints, data_set); // Constraints apply to group/clone, not member/instance rsc = uber_parent(rsc); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *r = (pe_resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } cli_resource_print_colocation(rsc, TRUE, rsc_cmd == 'A', 1); fprintf(stdout, "* %s\n", rsc->id); cli_resource_print_location(rsc, NULL); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *r = (pe_resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } cli_resource_print_colocation(rsc, FALSE, rsc_cmd == 'A', 1); } else if (rsc_cmd == 'c') { GListPtr lpc = NULL; rc = pcmk_ok; for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { rsc = (pe_resource_t *) lpc->data; cli_resource_print_cts(rsc); } cli_resource_print_cts_constraints(data_set); } else if (rsc_cmd == 'F') { rc = cli_resource_fail(controld_api, host_uname, rsc_id, data_set); if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } rc = pcmk_rc2legacy(rc); } else if (rsc_cmd == 'O') { rc = cli_resource_print_operations(rsc_id, host_uname, TRUE, data_set); } else if (rsc_cmd == 'o') { rc = cli_resource_print_operations(rsc_id, host_uname, FALSE, data_set); } else if (rsc_cmd == 'W') { rc = cli_resource_search(rsc, rsc_id, data_set); if (rc >= 0) { rc = pcmk_ok; } } else if (rsc_cmd == 'q') { rc = cli_resource_print(rsc, data_set, TRUE); } else if (rsc_cmd == 'w') { rc = cli_resource_print(rsc, data_set, FALSE); } else if (rsc_cmd == 'Y') { pe_node_t *dest = NULL; if (host_uname) { dest = pe_find_node(data_set->nodes, host_uname); if (dest == NULL) { rc = -pcmk_err_node_unknown; goto bail; } } cli_resource_why(cib_conn, data_set->resources, rsc, dest); rc = pcmk_ok; } else if (rsc_cmd == 'U') { GListPtr before = NULL; GListPtr after = NULL; GListPtr remaining = NULL; GListPtr ele = NULL; pe_node_t *dest = NULL; if (BE_QUIET == FALSE) { before = build_constraint_list(data_set->input); } if (clear_expired == TRUE) { rc = cli_resource_clear_all_expired(data_set->input, cib_conn, rsc_id, host_uname, scope_master); } else if (host_uname) { dest = pe_find_node(data_set->nodes, host_uname); if (dest == NULL) { rc = -pcmk_err_node_unknown; if (BE_QUIET == FALSE) { g_list_free(before); } goto bail; } rc = cli_resource_clear(rsc_id, dest->details->uname, NULL, cib_conn, TRUE); } else { rc = cli_resource_clear(rsc_id, NULL, data_set->nodes, cib_conn, TRUE); } if (BE_QUIET == FALSE) { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { CMD_ERR("Could not get modified CIB: %s\n", pcmk_strerror(rc)); g_list_free(before); goto bail; } data_set->input = cib_xml_copy; cluster_status(data_set); after = build_constraint_list(data_set->input); remaining = subtract_lists(before, after, (GCompareFunc) strcmp); for (ele = remaining; ele != NULL; ele = ele->next) { printf("Removing constraint: %s\n", (char *) ele->data); } g_list_free(before); g_list_free(after); g_list_free(remaining); } } else if (rsc_cmd == 'M' && host_uname) { rc = cli_resource_move(rsc, rsc_id, host_uname, cib_conn, data_set); } else if (rsc_cmd == 'B' && host_uname) { pe_node_t *dest = pe_find_node(data_set->nodes, host_uname); if (dest == NULL) { rc = -pcmk_err_node_unknown; goto bail; } rc = cli_resource_ban(rsc_id, dest->details->uname, NULL, cib_conn); } else if (rsc_cmd == 'B' || rsc_cmd == 'M') { pe_node_t *current = NULL; unsigned int nactive = 0; current = pe__find_active_requires(rsc, &nactive); if (nactive == 1) { rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn); } else if (is_set(rsc->flags, pe_rsc_promotable)) { int count = 0; GListPtr iter = NULL; current = NULL; for(iter = rsc->children; iter; iter = iter->next) { pe_resource_t *child = (pe_resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { count++; current = pe__current_node(child); } } if(count == 1 && current) { rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn); } else { rc = -EINVAL; exit_code = CRM_EX_USAGE; CMD_ERR("Resource '%s' not moved: active in %d locations (promoted in %d).", rsc_id, nactive, count); CMD_ERR("To prevent '%s' from running on a specific location, " "specify a node.", rsc_id); CMD_ERR("To prevent '%s' from being promoted at a specific " "location, specify a node and the master option.", rsc_id); } } else { rc = -EINVAL; exit_code = CRM_EX_USAGE; CMD_ERR("Resource '%s' not moved: active in %d locations.", rsc_id, nactive); CMD_ERR("To prevent '%s' from running on a specific location, " "specify a node.", rsc_id); } } else if (rsc_cmd == 'G') { rc = cli_resource_print_property(rsc, prop_name, data_set); } else if (rsc_cmd == 'S') { xmlNode *msg_data = NULL; if ((rsc_type == NULL) || !strlen(rsc_type)) { CMD_ERR("Must specify -t with resource type"); rc = -ENXIO; goto bail; } else if ((prop_value == NULL) || !strlen(prop_value)) { CMD_ERR("Must supply -v with new value"); rc = -EINVAL; goto bail; } CRM_LOG_ASSERT(prop_name != NULL); msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, rsc_id); crm_xml_add(msg_data, prop_name, prop_value); rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, cib_options); free_xml(msg_data); } else if (rsc_cmd == 'g') { rc = cli_resource_print_attribute(rsc, prop_name, data_set); } else if (rsc_cmd == 'p') { if (prop_value == NULL || strlen(prop_value) == 0) { CMD_ERR("You need to supply a value with the -v option"); rc = -EINVAL; goto bail; } /* coverity[var_deref_model] False positive */ rc = cli_resource_update_attribute(rsc, rsc_id, prop_set, prop_id, prop_name, prop_value, recursive, cib_conn, data_set); } else if (rsc_cmd == 'd') { /* coverity[var_deref_model] False positive */ rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id, prop_name, cib_conn, data_set); } else if ((rsc_cmd == 'C') && rsc) { if (do_force == FALSE) { rsc = uber_parent(rsc); } crm_debug("Erasing failures of %s (%s requested) on %s", rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); rc = cli_resource_delete(controld_api, host_uname, rsc, operation, interval_spec, TRUE, data_set); if ((rc == pcmk_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped cli_resource_check(cib_conn, rsc); } if (rc == pcmk_ok) { start_mainloop(controld_api); } } else if (rsc_cmd == 'C') { rc = cli_cleanup_all(controld_api, host_uname, operation, interval_spec, data_set); if (rc == pcmk_ok) { start_mainloop(controld_api); } } else if ((rsc_cmd == 'R') && rsc) { if (do_force == FALSE) { rsc = uber_parent(rsc); } crm_debug("Re-checking the state of %s (%s requested) on %s", rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); rc = cli_resource_delete(controld_api, host_uname, rsc, NULL, 0, FALSE, data_set); if ((rc == pcmk_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped cli_resource_check(cib_conn, rsc); } if (rc == pcmk_ok) { start_mainloop(controld_api); } } else if (rsc_cmd == 'R') { const char *router_node = host_uname; int attr_options = pcmk__node_attr_none; if (host_uname) { pe_node_t *node = pe_find_node(data_set->nodes, host_uname); if (pe__is_guest_or_remote_node(node)) { node = pe__current_node(node->details->remote_rsc); if (node == NULL) { CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", host_uname); rc = -ENXIO; goto bail; } router_node = node->details->uname; attr_options |= pcmk__node_attr_remote; } } if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s due to CIB_file\n", host_uname? host_uname : "all nodes"); rc = pcmk_ok; goto bail; } crm_debug("Re-checking the state of all resources on %s", host_uname?host_uname:"all nodes"); rc = pcmk_rc2legacy(pcmk__node_attr_request_clear(NULL, host_uname, NULL, NULL, NULL, NULL, attr_options)); - if (controld_api->reprobe(controld_api, host_uname, - router_node) == pcmk_rc_ok) { + if (pcmk_controld_api_reprobe(controld_api, host_uname, + router_node) == pcmk_rc_ok) { start_mainloop(controld_api); } } else if (rsc_cmd == 'D') { xmlNode *msg_data = NULL; if (rsc_type == NULL) { CMD_ERR("You need to specify a resource type with -t"); rc = -ENXIO; goto bail; } msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, rsc_id); rc = cib_conn->cmds->remove(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, cib_options); free_xml(msg_data); } else { CMD_ERR("Unknown command: %c", rsc_cmd); } bail: if (is_ocf_rc) { exit_code = rc; } else if (rc != pcmk_ok) { CMD_ERR("Error performing operation: %s", pcmk_strerror(rc)); if (rc == -pcmk_err_no_quorum) { CMD_ERR("To ignore quorum, use the force option"); } if (exit_code == CRM_EX_OK) { exit_code = crm_errno2exit(rc); } } return bye(exit_code); } diff --git a/tools/crm_resource.h b/tools/crm_resource.h index 0bf7bee6b7..cb7f506f3b 100644 --- a/tools/crm_resource.h +++ b/tools/crm_resource.h @@ -1,107 +1,104 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include -#include "crm_resource_controller.h" extern bool print_pending; extern bool scope_master; extern bool do_force; extern bool BE_QUIET; extern int resource_verbose; extern int cib_options; extern char *move_lifetime; extern const char *attr_set_type; -extern pcmk_controld_api_cb_t controld_api_cb; - /* ban */ int cli_resource_prefer(const char *rsc_id, const char *host, cib_t * cib_conn); int cli_resource_ban(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn); int cli_resource_clear(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn, bool clear_ban_constraints); int cli_resource_clear_all_expired(xmlNode *root, cib_t *cib_conn, const char *rsc, const char *node, bool scope_master); /* print */ void cli_resource_print_cts(pe_resource_t * rsc); void cli_resource_print_raw(pe_resource_t * rsc); void cli_resource_print_cts_constraints(pe_working_set_t * data_set); void cli_resource_print_location(pe_resource_t * rsc, const char *prefix); void cli_resource_print_colocation(pe_resource_t * rsc, bool dependents, bool recursive, int offset); int cli_resource_print(pe_resource_t *rsc, pe_working_set_t *data_set, bool expanded); int cli_resource_print_list(pe_working_set_t * data_set, bool raw); int cli_resource_print_attribute(pe_resource_t *rsc, const char *attr, pe_working_set_t *data_set); int cli_resource_print_property(pe_resource_t *rsc, const char *attr, pe_working_set_t *data_set); int cli_resource_print_operations(const char *rsc_id, const char *host_uname, bool active, pe_working_set_t * data_set); /* runtime */ void cli_resource_check(cib_t * cib, pe_resource_t *rsc); -int cli_resource_fail(pcmk_controld_api_t *controld_api, +int cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set); int cli_resource_search(pe_resource_t *rsc, const char *requested_name, pe_working_set_t *data_set); -int cli_resource_delete(pcmk_controld_api_t *controld_api, +int cli_resource_delete(pcmk_ipc_api_t *controld_api, const char *host_uname, pe_resource_t *rsc, const char *operation, const char *interval_spec, bool just_failures, pe_working_set_t *data_set); -int cli_cleanup_all(pcmk_controld_api_t *controld_api, const char *node_name, +int cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pe_working_set_t *data_set); int cli_resource_restart(pe_resource_t *rsc, const char *host, int timeout_ms, cib_t *cib); int cli_resource_move(pe_resource_t *rsc, const char *rsc_id, const char *host_name, cib_t *cib, pe_working_set_t *data_set); int cli_resource_execute_from_params(const char *rsc_name, const char *rsc_class, const char *rsc_prov, const char *rsc_type, const char *rsc_action, GHashTable *params, GHashTable *override_hash, int timeout_ms); int cli_resource_execute(pe_resource_t *rsc, const char *requested_name, const char *rsc_action, GHashTable *override_hash, int timeout_ms, cib_t *cib, pe_working_set_t *data_set); int cli_resource_update_attribute(pe_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_id, const char *attr_name, const char *attr_value, bool recursive, cib_t *cib, pe_working_set_t *data_set); int cli_resource_delete_attribute(pe_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_id, const char *attr_name, cib_t *cib, pe_working_set_t *data_set); GList* subtract_lists(GList *from, GList *items, GCompareFunc cmp); int update_working_set_xml(pe_working_set_t *data_set, xmlNode **xml); int wait_till_stable(int timeout_ms, cib_t * cib); void cli_resource_why(cib_t *cib_conn, GListPtr resources, pe_resource_t *rsc, pe_node_t *node); diff --git a/tools/crm_resource_controller.c b/tools/crm_resource_controller.c deleted file mode 100644 index 994a7be0ff..0000000000 --- a/tools/crm_resource_controller.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright 2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * - * This source code is licensed under the GNU General Public License version 2 - * or later (GPLv2+) WITHOUT ANY WARRANTY. - */ - -#include -#include -#include -#include "crm_resource.h" - -// API object's private members -struct controller_private { - char *client_name; // Client name to use with IPC - char *client_uuid; // Client UUID to use with IPC - mainloop_io_t *source; // If main loop used, I/O source for IPC - crm_ipc_t *ipc; // IPC connection to controller - int replies_expected; // How many controller replies are expected - pcmk_controld_api_cb_t dispatch_cb; // Caller's registered dispatch callback - pcmk_controld_api_cb_t destroy_cb; // Caller's registered destroy callback -}; - -static void -call_client_callback(pcmk_controld_api_t *api, pcmk_controld_api_cb_t *cb, - void *api_data) -{ - if ((cb != NULL) && (cb->callback != NULL)) { - cb->callback(api, api_data, cb->user_data); - } -} - -/* - * IPC callbacks when used with main loop - */ - -static void -controller_ipc_destroy(gpointer user_data) -{ - pcmk_controld_api_t *api = user_data; - struct controller_private *private = api->private; - - private->ipc = NULL; - private->source = NULL; - call_client_callback(api, &(private->destroy_cb), NULL); -} - -// \return < 0 if connection is no longer required, >= 0 if it is -static int -controller_ipc_dispatch(const char *buffer, ssize_t length, gpointer user_data) -{ - xmlNode *msg = NULL; - pcmk_controld_api_t *api = user_data; - - CRM_CHECK(buffer && api && api->private, return 0); - - msg = string2xml(buffer); - if (msg == NULL) { - crm_warn("Received malformed controller IPC message"); - } else { - struct controller_private *private = api->private; - - crm_log_xml_trace(msg, "controller-reply"); - private->replies_expected--; - call_client_callback(api, &(private->dispatch_cb), - get_message_xml(msg, F_CRM_DATA)); - free_xml(msg); - } - return 0; -} - -/* - * IPC utilities - */ - -// \return Standard Pacemaker return code -static int -send_hello(crm_ipc_t *ipc, const char *client_name, const char *client_uuid) -{ - xmlNode *hello = create_hello_message(client_uuid, client_name, "0", "1"); - int rc = crm_ipc_send(ipc, hello, 0, 0, NULL); - - free_xml(hello); - if (rc < 0) { - rc = pcmk_legacy2rc(rc); - crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s", - CRM_SYSTEM_CRMD /* ipc->name */, - pcmk_rc_str(rc), rc); - return rc; - } - crm_debug("Sent IPC hello to %s", CRM_SYSTEM_CRMD /* ipc->name */); - return pcmk_rc_ok; -} - -// \return Standard Pacemaker return code -static int -send_controller_request(pcmk_controld_api_t *api, const char *op, - xmlNode *msg_data, const char *node) -{ - int rc; - struct controller_private *private = api->private; - xmlNode *cmd = create_request(op, msg_data, node, CRM_SYSTEM_CRMD, - private->client_name, private->client_uuid); - const char *reference = crm_element_value(cmd, XML_ATTR_REFERENCE); - - if ((cmd == NULL) || (reference == NULL)) { - return EINVAL; - } - - //@TODO pass as args? 0=crm_ipc_flags, 0=timeout_ms (default 5s), NULL=reply - crm_log_xml_trace(cmd, "controller-request"); - rc = crm_ipc_send(private->ipc, cmd, 0, 0, NULL); - free_xml(cmd); - if (rc < 0) { - return pcmk_legacy2rc(rc); - } - private->replies_expected++; - return pcmk_rc_ok; -} - -/* - * pcmk_controld_api_t methods - */ - -static int -controller_connect_mainloop(pcmk_controld_api_t *api) -{ - struct controller_private *private = api->private; - struct ipc_client_callbacks callbacks = { - .dispatch = controller_ipc_dispatch, - .destroy = controller_ipc_destroy, - }; - - private->source = mainloop_add_ipc_client(CRM_SYSTEM_CRMD, - G_PRIORITY_DEFAULT, 0, api, - &callbacks); - if (private->source == NULL) { - return ENOTCONN; - } - - private->ipc = mainloop_get_ipc_client(private->source); - if (private->ipc == NULL) { - (void) api->disconnect(api); - return ENOTCONN; - } - - crm_debug("Connected to %s IPC (attaching to main loop)", CRM_SYSTEM_CRMD); - return pcmk_rc_ok; -} - -static int -controller_connect_no_mainloop(pcmk_controld_api_t *api) -{ - struct controller_private *private = api->private; - - private->ipc = crm_ipc_new(CRM_SYSTEM_CRMD, 0); - if (private->ipc == NULL) { - return ENOTCONN; - } - if (!crm_ipc_connect(private->ipc)) { - crm_ipc_close(private->ipc); - crm_ipc_destroy(private->ipc); - private->ipc = NULL; - return errno; - } - /* @TODO caller needs crm_ipc_get_fd(private->ipc); either add method for - * that, or replace use_mainloop with int *fd - */ - crm_debug("Connected to %s IPC", CRM_SYSTEM_CRMD); - return pcmk_rc_ok; -} - -static void -set_callback(pcmk_controld_api_cb_t *dest, pcmk_controld_api_cb_t *source) -{ - if (source) { - dest->callback = source->callback; - dest->user_data = source->user_data; - } -} - -static int -controller_api_connect(pcmk_controld_api_t *api, bool use_mainloop, - pcmk_controld_api_cb_t *dispatch_cb, - pcmk_controld_api_cb_t *destroy_cb) -{ - int rc = pcmk_rc_ok; - struct controller_private *private; - - if (api == NULL) { - return EINVAL; - } - private = api->private; - - set_callback(&(private->dispatch_cb), dispatch_cb); - set_callback(&(private->destroy_cb), destroy_cb); - - if (private->ipc != NULL) { - return pcmk_rc_ok; // already connected - } - - if (use_mainloop) { - rc = controller_connect_mainloop(api); - } else { - rc = controller_connect_no_mainloop(api); - } - if (rc != pcmk_rc_ok) { - return rc; - } - - rc = send_hello(private->ipc, private->client_name, private->client_uuid); - if (rc != pcmk_rc_ok) { - (void) api->disconnect(api); - } - return rc; -} - -static int -controller_api_disconnect(pcmk_controld_api_t *api) -{ - struct controller_private *private = api->private; - - if (private->source != NULL) { - // Attached to main loop - mainloop_del_ipc_client(private->source); - private->source = NULL; - private->ipc = NULL; - - } else if (private->ipc != NULL) { - // Not attached to main loop - crm_ipc_t *ipc = private->ipc; - - private->ipc = NULL; - crm_ipc_close(ipc); - crm_ipc_destroy(ipc); - } - crm_debug("Disconnected from %s IPC", CRM_SYSTEM_CRMD /* ipc->name */); - return pcmk_rc_ok; -} - -//@TODO dispatch function for non-mainloop a la stonith_dispatch() -//@TODO convenience retry-connect function a la stonith_api_connect_retry() - -static unsigned int -controller_api_replies_expected(pcmk_controld_api_t *api) -{ - if (api != NULL) { - struct controller_private *private = api->private; - - return private->replies_expected; - } - return 0; -} - -static xmlNode * -create_reprobe_message_data(const char *target_node, const char *router_node) -{ - xmlNode *msg_data; - - msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE); - crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); - if ((router_node != NULL) && safe_str_neq(router_node, target_node)) { - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - return msg_data; -} - -static int -controller_api_reprobe(pcmk_controld_api_t *api, const char *target_node, - const char *router_node) -{ - int rc = EINVAL; - - if (api != NULL) { - xmlNode *msg_data; - - crm_debug("Sending %s IPC request to reprobe %s via %s", - CRM_SYSTEM_CRMD, crm_str(target_node), crm_str(router_node)); - msg_data = create_reprobe_message_data(target_node, router_node); - rc = send_controller_request(api, CRM_OP_REPROBE, msg_data, - (router_node? router_node : target_node)); - free_xml(msg_data); - } - return rc; -} - -// \return Standard Pacemaker return code -static int -controller_resource_op(pcmk_controld_api_t *api, const char *op, - const char *target_node, const char *router_node, - bool cib_only, const char *rsc_id, - const char *rsc_long_id, const char *standard, - const char *provider, const char *type) -{ - int rc; - char *key; - xmlNode *msg_data, *xml_rsc, *params; - - if (api == NULL) { - return EINVAL; - } - if (router_node == NULL) { - router_node = target_node; - } - - msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); - - /* The controller logs the transition key from resource op requests, so we - * need to have *something* for it. - */ - key = pcmk__transition_key(0, getpid(), 0, - "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); - crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); - free(key); - - crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); - if (safe_str_neq(router_node, target_node)) { - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - - if (cib_only) { - // Indicate that only the CIB needs to be cleaned - crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); - } - - xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); - crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); - crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc_long_id); - crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, standard); - crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, provider); - crm_xml_add(xml_rsc, XML_ATTR_TYPE, type); - - params = create_xml_node(msg_data, XML_TAG_ATTRS); - crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); - - // The controller parses the timeout from the request - key = crm_meta_name(XML_ATTR_TIMEOUT); - crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg - free(key); - - rc = send_controller_request(api, op, msg_data, router_node); - free_xml(msg_data); - return rc; -} - -static int -controller_api_fail_resource(pcmk_controld_api_t *api, - const char *target_node, const char *router_node, - const char *rsc_id, const char *rsc_long_id, - const char *standard, const char *provider, - const char *type) -{ - crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s", - CRM_SYSTEM_CRMD, crm_str(rsc_id), crm_str(rsc_long_id), - crm_str(target_node), crm_str(router_node)); - return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node, - router_node, false, rsc_id, rsc_long_id, - standard, provider, type); -} - -static int -controller_api_refresh_resource(pcmk_controld_api_t *api, - const char *target_node, - const char *router_node, - const char *rsc_id, const char *rsc_long_id, - const char *standard, const char *provider, - const char *type, bool cib_only) -{ - crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s", - CRM_SYSTEM_CRMD, crm_str(rsc_id), crm_str(rsc_long_id), - crm_str(target_node), crm_str(router_node)); - return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node, - router_node, cib_only, rsc_id, rsc_long_id, - standard, provider, type); -} - -pcmk_controld_api_t * -pcmk_new_controld_api(const char *client_name, const char *client_uuid) -{ - struct controller_private *private; - pcmk_controld_api_t *api = calloc(1, sizeof(pcmk_controld_api_t)); - - CRM_ASSERT(api != NULL); - - api->private = calloc(1, sizeof(struct controller_private)); - CRM_ASSERT(api->private != NULL); - private = api->private; - - if (client_name == NULL) { - client_name = crm_system_name? crm_system_name : "client"; - } - private->client_name = strdup(client_name); - CRM_ASSERT(private->client_name != NULL); - - if (client_uuid == NULL) { - private->client_uuid = crm_generate_uuid(); - } else { - private->client_uuid = strdup(client_uuid); - } - CRM_ASSERT(private->client_uuid != NULL); - - api->connect = controller_api_connect; - api->disconnect = controller_api_disconnect; - api->replies_expected = controller_api_replies_expected; - api->reprobe = controller_api_reprobe; - api->fail_resource = controller_api_fail_resource; - api->refresh_resource = controller_api_refresh_resource; - return api; -} - -void -pcmk_free_controld_api(pcmk_controld_api_t *api) -{ - if (api != NULL) { - struct controller_private *private = api->private; - - api->disconnect(api); - free(private->client_name); - free(private->client_uuid); - free(api->private); - free(api); - } -} diff --git a/tools/crm_resource_controller.h b/tools/crm_resource_controller.h deleted file mode 100644 index 50e20b43f7..0000000000 --- a/tools/crm_resource_controller.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright 2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * - * This source code is licensed under the GNU Lesser General Public License - * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. - */ -#ifndef PCMK__CONTROLD_API_H -#define PCMK__CONTROLD_API_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include // bool - -/* This is a demonstration of an abstracted controller IPC API. It is expected - * that this will be improved and moved to libcrmcommon. - * - * @TODO We could consider whether it's reasonable to have a single type for - * all daemons' IPC APIs (e.g. pcmk_ipc_api_t instead of pcmk_*_api_t). They - * could potentially have common connect/disconnect methods and then a void* to - * a group of API-specific methods. - * - * In that case, the callback type would also need to be generic, taking - * (pcmk_ipc_api_t *api, void *api_data, void *user_data), with individual APIs - * having functions for getting useful info from api_data. If all APIs followed - * the call_id model, we could use int call_id instead of api_data. - * - * A major annoyance is that the controller IPC protocol currently does not have - * any way to tie a particular reply to a particular request. The current - * clients (crmadmin, crm_node, and crm_resource) simply know what kind of reply - * to expect for the kind of request they sent. In crm_resource's case, all it - * does is count replies, ignoring their content altogether. - * - * That really forces us to have a single callback for all events rather than a - * per-request callback. That in turn implies that callers can only provide a - * single user data pointer. - * - * @TODO Define protocol version constants to use in hello message. - * @TODO Allow callers to specify timeouts. - * @TODO Define call IDs for controller ops, while somehow maintaining backward - * compatibility, since a client running on a Pacemaker Remote node could - * be older or newer than the controller on the connection's cluster - * node. - * @TODO The controller currently does not respond to hello messages. We should - * establish a common connection handshake protocol for all daemons that - * involves a hello message and acknowledgement. We should support sync - * or async connection (i.e. block until the ack is received, or return - * after the hello is sent and call a connection callback when the hello - * ack is received). - */ - -//! \internal -typedef struct pcmk_controld_api_s pcmk_controld_api_t; - -//! \internal -typedef struct pcmk_controld_api_callback_s { - void (*callback)(pcmk_controld_api_t *api, void *api_data, void *user_data); - void *user_data; -} pcmk_controld_api_cb_t; - -//! \internal -struct pcmk_controld_api_s { - //! \internal - void *private; - - /*! - * \internal - * \brief Connect to the local controller - * - * \param[in] api Controller API instance - * \param[in] use_mainloop If true, attach IPC to main loop - * \param[in] dispatch_cb If not NULL, call this when replies are received - * \param[in] destroy_cb If not NULL, call this if connection drops - * - * \return Standard Pacemaker return code - * \note Only the pointers inside the callback objects need to be - * persistent, not the callback objects themselves. The destroy_cb - * will be called only for unrequested disconnects. - */ - int (*connect)(pcmk_controld_api_t *api, bool use_mainloop, - pcmk_controld_api_cb_t *dispatch_cb, - pcmk_controld_api_cb_t *destroy_cb); - - /*! - * \internal - * \brief Disconnect from the local controller - * - * \param[in] api Controller API instance - * - * \return Standard Pacemaker return code - */ - int (*disconnect)(pcmk_controld_api_t *api); - - /*! - * \internal - * \brief Check number of replies still expected from controller - * - * \param[in] api Controller API instance - * - * \return Number of expected replies - */ - unsigned int (*replies_expected)(pcmk_controld_api_t *api); - - /*! - * \internal - * \brief Send a reprobe controller operation - * - * \param[in] api Controller API instance - * \param[in] target_node Name of node to reprobe - * \param[in] router_node Router node for host - * - * \return Standard Pacemaker return code - */ - int (*reprobe)(pcmk_controld_api_t *api, const char *target_node, - const char *router_node); - - /* @TODO These methods have a lot of arguments. One possibility would be to - * make a struct for agent info (standard/provider/type), which theortically - * could be used throughout pacemaker code. However that would end up being - * really awkward to use generically, since sometimes you need to allocate - * those strings (char *) and other times you only have references into XML - * (const char *). We could make some structs just for this API. - */ - - /*! - * \internal - * \brief Ask the controller to fail a resource - * - * \param[in] api Controller API instance - * \param[in] target_node Name of node resource is on - * \param[in] router_node Router node for target - * \param[in] rsc_id ID of resource to fail - * \param[in] rsc_long_id Long ID of resource (if any) - * \param[in] standard Standard of resource - * \param[in] provider Provider of resource (if any) - * \param[in] type Type of resource to fail - * - * \return Standard Pacemaker return code - */ - int (*fail_resource)(pcmk_controld_api_t *api, const char *target_node, - const char *router_node, const char *rsc_id, - const char *rsc_long_id, const char *standard, - const char *provider, const char *type); - - /*! - * \internal - * \brief Ask the controller to refresh a resource - * - * \param[in] api Controller API instance - * \param[in] target_node Name of node resource is on - * \param[in] router_node Router node for target - * \param[in] rsc_id ID of resource to refresh - * \param[in] rsc_long_id Long ID of resource (if any) - * \param[in] standard Standard of resource - * \param[in] provider Provider of resource (if any) - * \param[in] type Type of resource - * \param[in] cib_only If true, clean resource from CIB only - * - * \return Standard Pacemaker return code - */ - int (*refresh_resource)(pcmk_controld_api_t *api, const char *target_node, - const char *router_node, const char *rsc_id, - const char *rsc_long_id, const char *standard, - const char *provider, const char *type, - bool cib_only); -}; - -/*! - * \internal - * \brief Create new controller IPC API object for clients - * - * \param[in] client_name Client name to use with IPC - * \param[in] client_uuid Client UUID to use with IPC - * - * \return Newly allocated object - * \note This function asserts on errors, so it will never return NULL. - * The caller is responsible for freeing the result with - * pcmk_free_controld_api(). - */ -pcmk_controld_api_t *pcmk_new_controld_api(const char *client_name, - const char *client_uuid); - -/*! - * \internal - * \brief Free a controller IPC API object - * - * \param[in] api Controller IPC API object to free - */ -void pcmk_free_controld_api(pcmk_controld_api_t *api); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 295460cf4b..185a9a3a28 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,2089 +1,2089 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include +#include int resource_verbose = 0; bool do_force = FALSE; const char *attr_set_type = XML_TAG_ATTR_SETS; static int do_find_resource(const char *rsc, pe_resource_t * the_rsc, pe_working_set_t * data_set) { int found = 0; GListPtr lpc = NULL; for (lpc = the_rsc->running_on; lpc != NULL; lpc = lpc->next) { pe_node_t *node = (pe_node_t *) lpc->data; if (BE_QUIET) { fprintf(stdout, "%s\n", node->details->uname); } else { const char *state = ""; if (!pe_rsc_is_clone(the_rsc) && the_rsc->fns->state(the_rsc, TRUE) == RSC_ROLE_MASTER) { state = "Master"; } fprintf(stdout, "resource %s is running on: %s %s\n", rsc, node->details->uname, state); } found++; } if (BE_QUIET == FALSE && found == 0) { fprintf(stderr, "resource %s is NOT running\n", rsc); } return found; } int cli_resource_search(pe_resource_t *rsc, const char *requested_name, pe_working_set_t *data_set) { int found = 0; pe_resource_t *parent = uber_parent(rsc); if (pe_rsc_is_clone(rsc)) { for (GListPtr iter = rsc->children; iter != NULL; iter = iter->next) { found += do_find_resource(requested_name, iter->data, data_set); } /* The anonymous clone children's common ID is supplied */ } else if (pe_rsc_is_clone(parent) && is_not_set(rsc->flags, pe_rsc_unique) && rsc->clone_name && safe_str_eq(requested_name, rsc->clone_name) && safe_str_neq(requested_name, rsc->id)) { for (GListPtr iter = parent->children; iter; iter = iter->next) { found += do_find_resource(requested_name, iter->data, data_set); } } else { found += do_find_resource(requested_name, rsc, data_set); } return found; } #define XPATH_MAX 1024 static int find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, char **value) { int offset = 0; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; if(value) { *value = NULL; } if(the_cib == NULL) { return -ENOTCONN; } xpath_string = calloc(1, XPATH_MAX); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", get_object_path("resources")); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//*[@id=\"%s\"]", rsc); if (set_type) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s", set_type); if (set_name) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "[@id=\"%s\"]", set_name); } } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//nvpair["); if (attr_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@id=\"%s\"", attr_id); } if (attr_name) { if (attr_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, " and "); } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@name=\"%s\"", attr_name); } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "]"); CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { xmlNode *child = NULL; rc = -EINVAL; printf("Multiple attributes match name=%s\n", attr_name); for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) { printf(" Value: %s \t(id=%s)\n", crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); } } else if(value) { const char *tmp = crm_element_value(xml_search, attr); if (tmp) { *value = strdup(tmp); } } bail: free(xpath_string); free_xml(xml_search); return rc; } /* PRIVATE. Use the find_matching_attr_resources instead. */ static void find_matching_attr_resources_recursive(GList/* */ ** result, pe_resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd, int depth) { int rc = pcmk_ok; char *lookup_id = clone_strip(rsc->id); char *local_attr_id = NULL; /* visit the children */ for(GList *gIter = rsc->children; gIter; gIter = gIter->next) { find_matching_attr_resources_recursive(result, (pe_resource_t*)gIter->data, rsc_id, attr_set, attr_id, attr_name, cib, cmd, depth+1); /* do it only once for clones */ if(pe_clone == rsc->variant) { break; } } rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); /* Post-order traversal. * The root is always on the list and it is the last item. */ if((0 == depth) || (pcmk_ok == rc)) { /* push the head */ *result = g_list_append(*result, rsc); } free(local_attr_id); free(lookup_id); } /* The result is a linearized pre-ordered tree of resources. */ static GList/**/ * find_matching_attr_resources(pe_resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd) { int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; GList * result = NULL; /* If --force is used, update only the requested resource (clone or primitive). * Otherwise, if the primitive has the attribute, use that. * Otherwise use the clone. */ if(do_force == TRUE) { return g_list_append(result, rsc); } if(rsc->parent && pe_clone == rsc->parent->variant) { int rc = pcmk_ok; char *local_attr_id = NULL; rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); free(local_attr_id); if(rc != pcmk_ok) { rsc = rsc->parent; if (BE_QUIET == FALSE) { printf("Performing %s of '%s' on '%s', the parent of '%s'\n", cmd, attr_name, rsc->id, rsc_id); } } return g_list_append(result, rsc); } else if(rsc->parent == NULL && rsc->children && pe_clone == rsc->variant) { pe_resource_t *child = rsc->children->data; if(child->variant == pe_native) { lookup_id = clone_strip(child->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if(rc == pcmk_ok) { rsc = child; if (BE_QUIET == FALSE) { printf("A value for '%s' already exists in child '%s', performing %s on that instead of '%s'\n", attr_name, lookup_id, cmd, rsc_id); } } free(local_attr_id); free(lookup_id); } return g_list_append(result, rsc); } /* If the resource is a group ==> children inherit the attribute if defined. */ find_matching_attr_resources_recursive(&result, rsc, rsc_id, attr_set, attr_id, attr_name, cib, cmd, 0); return result; } int cli_resource_update_attribute(pe_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_id, const char *attr_name, const char *attr_value, bool recursive, cib_t *cib, pe_working_set_t *data_set) { int rc = pcmk_ok; static bool need_init = TRUE; char *local_attr_id = NULL; char *local_attr_set = NULL; GList/**/ *resources = NULL; const char *common_attr_id = attr_id; if(attr_id == NULL && do_force == FALSE && find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL) == -EINVAL) { printf("\n"); } if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { if (do_force == FALSE) { rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, XML_TAG_META_SETS, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", uber_parent(rsc)->id, attr_name, local_attr_id); printf(" Delete '%s' first or use the force option to override\n", local_attr_id); } free(local_attr_id); if (rc == pcmk_ok) { return -ENOTUNIQ; } } resources = g_list_append(resources, rsc); } else { resources = find_matching_attr_resources(rsc, requested_name, attr_set, attr_id, attr_name, cib, "update"); } /* If either attr_set or attr_id is specified, * one clearly intends to modify a single resource. * It is the last item on the resource list.*/ for(GList *gIter = (attr_set||attr_id) ? g_list_last(resources) : resources ; gIter; gIter = gIter->next) { char *lookup_id = NULL; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; local_attr_id = NULL; local_attr_set = NULL; rsc = (pe_resource_t*)gIter->data; attr_id = common_attr_id; lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok) { crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id); attr_id = local_attr_id; } else if (rc != -ENXIO) { free(lookup_id); free(local_attr_id); g_list_free(resources); return rc; } else { const char *tag = crm_element_name(rsc->xml); if (attr_set == NULL) { local_attr_set = crm_strdup_printf("%s-%s", lookup_id, attr_set_type); attr_set = local_attr_set; } if (attr_id == NULL) { local_attr_id = crm_strdup_printf("%s-%s", attr_set, attr_name); attr_id = local_attr_id; } xml_top = create_xml_node(NULL, tag); crm_xml_add(xml_top, XML_ATTR_ID, lookup_id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); } xml_obj = crm_create_nvpair_xml(xml_obj, attr_id, attr_name, attr_value); if (xml_top == NULL) { xml_top = xml_obj; } crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Set '%s' option: id=%s%s%s%s%s value=%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value); } free_xml(xml_top); free(lookup_id); free(local_attr_id); free(local_attr_set); if(recursive && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { GListPtr lpc = NULL; if(need_init) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); need_init = FALSE; unpack_constraints(cib_constraints, data_set); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *r = (pe_resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } } crm_debug("Looking for dependencies %p", rsc->rsc_cons_lhs); set_bit(rsc->flags, pe_rsc_allocating); for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; pe_resource_t *peer = cons->rsc_lh; crm_debug("Checking %s %d", cons->id, cons->score); if (cons->score > 0 && is_not_set(peer->flags, pe_rsc_allocating)) { /* Don't get into colocation loops */ crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, peer->id); cli_resource_update_attribute(peer, peer->id, NULL, NULL, attr_name, attr_value, recursive, cib, data_set); } } } } g_list_free(resources); return rc; } int cli_resource_delete_attribute(pe_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_id, const char *attr_name, cib_t *cib, pe_working_set_t *data_set) { int rc = pcmk_ok; GList/**/ *resources = NULL; if(attr_id == NULL && do_force == FALSE && find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL) == -EINVAL) { printf("\n"); } if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { resources = find_matching_attr_resources(rsc, requested_name, attr_set, attr_id, attr_name, cib, "delete"); } else { resources = g_list_append(resources, rsc); } for(GList *gIter = resources; gIter; gIter = gIter->next) { char *lookup_id = NULL; xmlNode *xml_obj = NULL; char *local_attr_id = NULL; rsc = (pe_resource_t*)gIter->data; lookup_id = clone_strip(rsc->id); rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == -ENXIO) { free(lookup_id); rc = pcmk_ok; continue; } else if (rc != pcmk_ok) { free(lookup_id); g_list_free(resources); return rc; } if (attr_id == NULL) { attr_id = local_attr_id; } xml_obj = crm_create_nvpair_xml(NULL, attr_id, attr_name, NULL); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->remove(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Deleted '%s' option: id=%s%s%s%s%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free(lookup_id); free_xml(xml_obj); free(local_attr_id); } g_list_free(resources); return rc; } // \return Standard Pacemaker return code static int -send_lrm_rsc_op(pcmk_controld_api_t *controld_api, bool do_fail_resource, +send_lrm_rsc_op(pcmk_ipc_api_t *controld_api, bool do_fail_resource, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { const char *router_node = host_uname; const char *rsc_api_id = NULL; const char *rsc_long_id = NULL; const char *rsc_class = NULL; const char *rsc_provider = NULL; const char *rsc_type = NULL; bool cib_only = false; pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Resource %s not found", rsc_id); return ENXIO; } else if (rsc->variant != pe_native) { CMD_ERR("We can only process primitive resources, not %s", rsc_id); return EINVAL; } rsc_class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rsc_provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER), rsc_type = crm_element_value(rsc->xml, XML_ATTR_TYPE); if ((rsc_class == NULL) || (rsc_type == NULL)) { CMD_ERR("Resource %s does not have a class and type", rsc_id); return EINVAL; } if (host_uname == NULL) { CMD_ERR("Please specify a node name"); return EINVAL; } else { pe_node_t *node = pe_find_node(data_set->nodes, host_uname); if (node == NULL) { CMD_ERR("Node %s not found", host_uname); return pcmk_rc_node_unknown; } if (!(node->details->online)) { if (do_fail_resource) { CMD_ERR("Node %s is not online", host_uname); return ENOTCONN; } else { cib_only = true; } } if (!cib_only && pe__is_guest_or_remote_node(node)) { node = pe__current_node(node->details->remote_rsc); if (node == NULL) { CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", host_uname); return ENOTCONN; } router_node = node->details->uname; } } if (rsc->clone_name) { rsc_api_id = rsc->clone_name; rsc_long_id = rsc->id; } else { rsc_api_id = rsc->id; } if (do_fail_resource) { - return controld_api->fail_resource(controld_api, host_uname, - router_node, rsc_api_id, rsc_long_id, - rsc_class, rsc_provider, rsc_type); + return pcmk_controld_api_fail(controld_api, host_uname, router_node, + rsc_api_id, rsc_long_id, + rsc_class, rsc_provider, rsc_type); } else { - return controld_api->refresh_resource(controld_api, host_uname, - router_node, rsc_api_id, - rsc_long_id, rsc_class, - rsc_provider, rsc_type, cib_only); + return pcmk_controld_api_refresh(controld_api, host_uname, router_node, + rsc_api_id, rsc_long_id, rsc_class, + rsc_provider, rsc_type, cib_only); } } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(pe_resource_t *rsc) { const char *name = (rsc->clone_name? rsc->clone_name : rsc->id); return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } // \return Standard Pacemaker return code static int -clear_rsc_history(pcmk_controld_api_t *controld_api, const char *host_uname, +clear_rsc_history(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { int rc = pcmk_ok; /* Erase the resource's entire LRM history in the CIB, even if we're only * clearing a single operation's fail count. If we erased only entries for a * single operation, we might wind up with a wrong idea of the current * resource state, and we might not re-probe the resource. */ rc = send_lrm_rsc_op(controld_api, false, host_uname, rsc_id, data_set); if (rc != pcmk_rc_ok) { return rc; } crm_trace("Processing %d mainloop inputs", - controld_api->replies_expected(controld_api)); + pcmk_controld_api_replies_expected(controld_api)); while (g_main_context_iteration(NULL, FALSE)) { crm_trace("Processed mainloop input, %d still remaining", - controld_api->replies_expected(controld_api)); + pcmk_controld_api_replies_expected(controld_api)); } return rc; } static int -clear_rsc_failures(pcmk_controld_api_t *controld_api, const char *node_name, +clear_rsc_failures(pcmk_ipc_api_t *controld_api, const char *node_name, const char *rsc_id, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { int rc = pcmk_ok; const char *failed_value = NULL; const char *failed_id = NULL; const char *interval_ms_s = NULL; GHashTable *rscs = NULL; GHashTableIter iter; /* Create a hash table to use as a set of resources to clean. This lets us * clean each resource only once (per node) regardless of how many failed * operations it has. */ rscs = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); // Normalize interval to milliseconds for comparison to history entry if (operation) { interval_ms_s = crm_strdup_printf("%u", crm_parse_interval_spec(interval_spec)); } for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; xml_op = __xml_next(xml_op)) { failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); if (failed_id == NULL) { // Malformed history entry, should never happen continue; } // No resource specified means all resources match if (rsc_id) { pe_resource_t *fail_rsc = pe_find_resource_with_flags(data_set->resources, failed_id, pe_find_renamed|pe_find_anon); if (!fail_rsc || safe_str_neq(rsc_id, fail_rsc->id)) { continue; } } // Host name should always have been provided by this point failed_value = crm_element_value(xml_op, XML_ATTR_UNAME); if (safe_str_neq(node_name, failed_value)) { continue; } // No operation specified means all operations match if (operation) { failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK); if (safe_str_neq(operation, failed_value)) { continue; } // Interval (if operation was specified) defaults to 0 (not all) failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); if (safe_str_neq(interval_ms_s, failed_value)) { continue; } } /* not available until glib 2.32 g_hash_table_add(rscs, (gpointer) failed_id); */ g_hash_table_insert(rscs, (gpointer) failed_id, (gpointer) failed_id); } g_hash_table_iter_init(&iter, rscs); while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { crm_debug("Erasing failures of %s on %s", failed_id, node_name); rc = clear_rsc_history(controld_api, node_name, failed_id, data_set); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } } g_hash_table_destroy(rscs); return rc; } static int clear_rsc_fail_attrs(pe_resource_t *rsc, const char *operation, const char *interval_spec, pe_node_t *node) { int rc = pcmk_ok; int attr_options = pcmk__node_attr_none; char *rsc_name = rsc_fail_name(rsc); if (pe__is_guest_or_remote_node(node)) { attr_options |= pcmk__node_attr_remote; } rc = pcmk__node_attr_request_clear(NULL, node->details->uname, rsc_name, operation, interval_spec, NULL, attr_options); free(rsc_name); return rc; } int -cli_resource_delete(pcmk_controld_api_t *controld_api, const char *host_uname, +cli_resource_delete(pcmk_ipc_api_t *controld_api, const char *host_uname, pe_resource_t *rsc, const char *operation, const char *interval_spec, bool just_failures, pe_working_set_t *data_set) { int rc = pcmk_ok; pe_node_t *node = NULL; if (rsc == NULL) { return -ENXIO; } else if (rsc->children) { GListPtr lpc = NULL; for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { pe_resource_t *child = (pe_resource_t *) lpc->data; rc = cli_resource_delete(controld_api, host_uname, child, operation, interval_spec, just_failures, data_set); if (rc != pcmk_ok) { return rc; } } return pcmk_ok; } else if (host_uname == NULL) { GListPtr lpc = NULL; GListPtr nodes = g_hash_table_get_values(rsc->known_on); if(nodes == NULL && do_force) { nodes = pcmk__copy_node_list(data_set->nodes, false); } else if(nodes == NULL && rsc->exclusive_discover) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void**)&node)) { if(node->weight >= 0) { nodes = g_list_prepend(nodes, node); } } } else if(nodes == NULL) { nodes = g_hash_table_get_values(rsc->allowed_nodes); } for (lpc = nodes; lpc != NULL; lpc = lpc->next) { node = (pe_node_t *) lpc->data; if (node->details->online) { rc = cli_resource_delete(controld_api, node->details->uname, rsc, operation, interval_spec, just_failures, data_set); } if (rc != pcmk_ok) { g_list_free(nodes); return rc; } } g_list_free(nodes); return pcmk_ok; } node = pe_find_node(data_set->nodes, host_uname); if (node == NULL) { printf("Unable to clean up %s because node %s not found\n", rsc->id, host_uname); return -ENODEV; } if (!node->details->rsc_discovery_enabled) { printf("Unable to clean up %s because resource discovery disabled on %s\n", rsc->id, host_uname); return -EOPNOTSUPP; } if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s on %s due to CIB_file\n", rsc->id, host_uname); return pcmk_ok; } rc = clear_rsc_fail_attrs(rsc, operation, interval_spec, node); if (rc != pcmk_rc_ok) { printf("Unable to clean up %s failures on %s: %s\n", rsc->id, host_uname, pcmk_rc_str(rc)); return pcmk_rc2legacy(rc); } if (just_failures) { rc = clear_rsc_failures(controld_api, host_uname, rsc->id, operation, interval_spec, data_set); } else { rc = clear_rsc_history(controld_api, host_uname, rsc->id, data_set); rc = pcmk_rc2legacy(rc); } if (rc != pcmk_ok) { printf("Cleaned %s failures on %s, but unable to clean history: %s\n", rsc->id, host_uname, pcmk_strerror(rc)); } else { printf("Cleaned up %s on %s\n", rsc->id, host_uname); } return rc; } int -cli_cleanup_all(pcmk_controld_api_t *controld_api, const char *node_name, +cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { int rc = pcmk_ok; int attr_options = pcmk__node_attr_none; const char *display_name = node_name? node_name : "all nodes"; if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s due to CIB_file\n", display_name); return pcmk_ok; } if (node_name) { pe_node_t *node = pe_find_node(data_set->nodes, node_name); if (node == NULL) { CMD_ERR("Unknown node: %s", node_name); return -ENXIO; } if (pe__is_guest_or_remote_node(node)) { attr_options |= pcmk__node_attr_remote; } } rc = pcmk__node_attr_request_clear(NULL, node_name, NULL, operation, interval_spec, NULL, attr_options); if (rc != pcmk_rc_ok) { printf("Unable to clean up all failures on %s: %s\n", display_name, pcmk_rc_str(rc)); return pcmk_rc2legacy(rc); } if (node_name) { rc = clear_rsc_failures(controld_api, node_name, NULL, operation, interval_spec, data_set); if (rc != pcmk_ok) { printf("Cleaned all resource failures on %s, but unable to clean history: %s\n", node_name, pcmk_strerror(rc)); return rc; } } else { for (GList *iter = data_set->nodes; iter; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; rc = clear_rsc_failures(controld_api, node->details->uname, NULL, operation, interval_spec, data_set); if (rc != pcmk_ok) { printf("Cleaned all resource failures on all nodes, but unable to clean history: %s\n", pcmk_strerror(rc)); return rc; } } } printf("Cleaned up all resources on %s\n", display_name); return pcmk_ok; } void cli_resource_check(cib_t * cib_conn, pe_resource_t *rsc) { bool printed = false; char *role_s = NULL; char *managed = NULL; pe_resource_t *parent = uber_parent(rsc); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); if(role_s) { enum rsc_role_e role = text2role(role_s); free(role_s); if(role == RSC_ROLE_UNKNOWN) { // Treated as if unset } else if(role == RSC_ROLE_STOPPED) { printf("\n * Configuration specifies '%s' should remain stopped\n", parent->id); printed = true; } else if (is_set(parent->flags, pe_rsc_promotable) && (role == RSC_ROLE_SLAVE)) { printf("\n * Configuration specifies '%s' should not be promoted\n", parent->id); printed = true; } } if (managed && !crm_is_true(managed)) { printf("%s * Configuration prevents cluster from stopping or starting unmanaged '%s'\n", (printed? "" : "\n"), parent->id); printed = true; } free(managed); if (rsc->lock_node) { printf("%s * '%s' is locked to node %s due to shutdown\n", (printed? "" : "\n"), parent->id, rsc->lock_node->details->uname); } if (printed) { printf("\n"); } } // \return Standard Pacemaker return code int -cli_resource_fail(pcmk_controld_api_t *controld_api, const char *host_uname, +cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { crm_notice("Failing %s on %s", rsc_id, host_uname); return send_lrm_rsc_op(controld_api, true, host_uname, rsc_id, data_set); } static GHashTable * generate_resource_params(pe_resource_t * rsc, pe_working_set_t * data_set) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; if (!rsc) { crm_err("Resource does not exist in config"); return NULL; } params = crm_str_table_new(); meta = crm_str_table_new(); combined = crm_str_table_new(); get_rsc_attributes(params, rsc, NULL /* TODO: Pass in local node */ , data_set); get_meta_attributes(meta, rsc, NULL /* TODO: Pass in local node */ , data_set); if (params) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { g_hash_table_insert(combined, strdup(key), strdup(value)); } g_hash_table_destroy(params); } if (meta) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } static bool resource_is_running_on(pe_resource_t *rsc, const char *host) { bool found = TRUE; GListPtr hIter = NULL; GListPtr hosts = NULL; if(rsc == NULL) { return FALSE; } rsc->fns->location(rsc, &hosts, TRUE); for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) { pe_node_t *node = (pe_node_t *) hIter->data; if(strcmp(host, node->details->uname) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } else if(strcmp(host, node->details->id) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } } if(host != NULL) { crm_trace("Resource %s is not running on: %s\n", rsc->id, host); found = FALSE; } else if(host == NULL && hosts == NULL) { crm_trace("Resource %s is not running\n", rsc->id); found = FALSE; } done: g_list_free(hosts); return found; } /*! * \internal * \brief Create a list of all resources active on host from a given list * * \param[in] host Name of host to check whether resources are active * \param[in] rsc_list List of resources to check * * \return New list of resources from list that are active on host */ static GList * get_active_resources(const char *host, GList *rsc_list) { GList *rIter = NULL; GList *active = NULL; for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) { pe_resource_t *rsc = (pe_resource_t *) rIter->data; /* Expand groups to their members, because if we're restarting a member * other than the first, we can't otherwise tell which resources are * stopping and starting. */ if (rsc->variant == pe_group) { active = g_list_concat(active, get_active_resources(host, rsc->children)); } else if (resource_is_running_on(rsc, host)) { active = g_list_append(active, strdup(rsc->id)); } } return active; } GList* subtract_lists(GList *from, GList *items, GCompareFunc cmp) { GList *item = NULL; GList *result = g_list_copy(from); for (item = items; item != NULL; item = item->next) { GList *candidate = NULL; for (candidate = from; candidate != NULL; candidate = candidate->next) { crm_info("Comparing %s with %s", (const char *) candidate->data, (const char *) item->data); if(cmp(candidate->data, item->data) == 0) { result = g_list_remove(result, candidate->data); break; } } } return result; } static void dump_list(GList *items, const char *tag) { int lpc = 0; GList *item = NULL; for (item = items; item != NULL; item = item->next) { crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data); lpc++; } } static void display_list(GList *items, const char *tag) { GList *item = NULL; for (item = items; item != NULL; item = item->next) { fprintf(stdout, "%s%s\n", tag, (const char *)item->data); } } /*! * \internal * \brief Upgrade XML to latest schema version and use it as working set input * * This also updates the working set timestamp to the current time. * * \param[in] data_set Working set instance to update * \param[in] xml XML to use as input * * \return pcmk_ok on success, -ENOKEY if unable to upgrade XML * \note On success, caller is responsible for freeing memory allocated for * data_set->now. * \todo This follows the example of other callers of cli_config_update() * and returns -ENOKEY ("Required key not available") if that fails, * but perhaps -pcmk_err_schema_validation would be better in that case. */ int update_working_set_xml(pe_working_set_t *data_set, xmlNode **xml) { if (cli_config_update(xml, NULL, FALSE) == FALSE) { return -ENOKEY; } data_set->input = *xml; data_set->now = crm_time_new(NULL); return pcmk_ok; } /*! * \internal * \brief Update a working set's XML input based on a CIB query * * \param[in] data_set Data set instance to initialize * \param[in] cib Connection to the CIB manager * * \return pcmk_ok on success, -errno on failure * \note On success, caller is responsible for freeing memory allocated for * data_set->input and data_set->now. */ static int update_working_set_from_cib(pe_working_set_t * data_set, cib_t *cib) { xmlNode *cib_xml_copy = NULL; int rc; rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { fprintf(stderr, "Could not obtain the current CIB: %s (%d)\n", pcmk_strerror(rc), rc); return rc; } rc = update_working_set_xml(data_set, &cib_xml_copy); if (rc != pcmk_ok) { fprintf(stderr, "Could not upgrade the current CIB XML\n"); free_xml(cib_xml_copy); return rc; } return pcmk_ok; } static int update_dataset(cib_t *cib, pe_working_set_t * data_set, bool simulate) { char *pid = NULL; char *shadow_file = NULL; cib_t *shadow_cib = NULL; int rc; pe_reset_working_set(data_set); rc = update_working_set_from_cib(data_set, cib); if (rc != pcmk_ok) { return rc; } if(simulate) { pid = pcmk__getpid_s(); shadow_cib = cib_shadow_new(pid); shadow_file = get_shadow_file(pid); if (shadow_cib == NULL) { fprintf(stderr, "Could not create shadow cib: '%s'\n", pid); rc = -ENXIO; goto cleanup; } rc = write_xml_file(data_set->input, shadow_file, FALSE); if (rc < 0) { fprintf(stderr, "Could not populate shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command); if(rc != pcmk_ok) { fprintf(stderr, "Could not connect to shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } pcmk__schedule_actions(data_set, data_set->input, NULL); run_simulation(data_set, shadow_cib, NULL, TRUE); rc = update_dataset(shadow_cib, data_set, FALSE); } else { cluster_status(data_set); } cleanup: /* Do not free data_set->input here, we need rsc->xml to be valid later on */ cib_delete(shadow_cib); free(pid); if(shadow_file) { unlink(shadow_file); free(shadow_file); } return rc; } static int max_delay_for_resource(pe_working_set_t * data_set, pe_resource_t *rsc) { int delay = 0; int max_delay = 0; if(rsc && rsc->children) { GList *iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { pe_resource_t *child = (pe_resource_t *)iter->data; delay = max_delay_for_resource(data_set, child); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, child->id); max_delay = delay; } } } else if(rsc) { char *key = crm_strdup_printf("%s_%s_0", rsc->id, RSC_STOP); pe_action_t *stop = custom_action(rsc, key, RSC_STOP, NULL, TRUE, FALSE, data_set); const char *value = g_hash_table_lookup(stop->meta, XML_ATTR_TIMEOUT); max_delay = value? (int) crm_parse_ll(value, NULL) : -1; pe_free_action(stop); } return max_delay; } static int max_delay_in(pe_working_set_t * data_set, GList *resources) { int max_delay = 0; GList *item = NULL; for (item = resources; item != NULL; item = item->next) { int delay = 0; pe_resource_t *rsc = pe_find_resource(data_set->resources, (const char *)item->data); delay = max_delay_for_resource(data_set, rsc); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, rsc->id); max_delay = delay; } } return 5 + (max_delay / 1000); } #define waiting_for_starts(d, r, h) ((d != NULL) || \ (resource_is_running_on((r), (h)) == FALSE)) /*! * \internal * \brief Restart a resource (on a particular host if requested). * * \param[in] rsc The resource to restart * \param[in] host The host to restart the resource on (or NULL for all) * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but a two-second * granularity is actually used; if 0, a timeout will be * calculated based on the resource timeout) * \param[in] cib Connection to the CIB manager * * \return pcmk_ok on success, -errno on failure (exits on certain failures) */ int cli_resource_restart(pe_resource_t *rsc, const char *host, int timeout_ms, cib_t *cib) { int rc = 0; int lpc = 0; int before = 0; int step_timeout_s = 0; int sleep_interval = 2; int timeout = timeout_ms / 1000; bool stop_via_ban = FALSE; char *rsc_id = NULL; char *orig_target_role = NULL; GList *list_delta = NULL; GList *target_active = NULL; GList *current_active = NULL; GList *restart_target_active = NULL; pe_working_set_t *data_set = NULL; if(resource_is_running_on(rsc, host) == FALSE) { const char *id = rsc->clone_name?rsc->clone_name:rsc->id; if(host) { printf("%s is not running on %s and so cannot be restarted\n", id, host); } else { printf("%s is not running anywhere and so cannot be restarted\n", id); } return -ENXIO; } /* We might set the target-role meta-attribute */ attr_set_type = XML_TAG_META_SETS; rsc_id = strdup(rsc->id); if ((pe_rsc_is_clone(rsc) || pe_bundle_replicas(rsc)) && host) { stop_via_ban = TRUE; } /* grab full cib determine originally active resources disable or ban poll cib and watch for affected resources to get stopped without --timeout, calculate the stop timeout for each step and wait for that if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down if everything stopped, re-enable or un-ban poll cib and watch for affected resources to get started without --timeout, calculate the start timeout for each step and wait for that if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up report success Optimizations: - use constraints to determine ordered list of affected resources - Allow a --no-deps option (aka. --force-restart) */ data_set = pe_new_working_set(); if (data_set == NULL) { crm_perror(LOG_ERR, "Could not allocate working set"); rc = -ENOMEM; goto done; } set_bit(data_set->flags, pe_flag_no_counts); set_bit(data_set->flags, pe_flag_no_compat); rc = update_dataset(cib, data_set, FALSE); if(rc != pcmk_ok) { fprintf(stdout, "Could not get new resource list: %s (%d)\n", pcmk_strerror(rc), rc); goto done; } restart_target_active = get_active_resources(host, data_set->resources); current_active = get_active_resources(host, data_set->resources); dump_list(current_active, "Origin"); if (stop_via_ban) { /* Stop the clone or bundle instance by banning it from the host */ BE_QUIET = TRUE; rc = cli_resource_ban(rsc_id, host, NULL, cib); } else { /* Stop the resource by setting target-role to Stopped. * Remember any existing target-role so we can restore it later * (though it only makes any difference if it's Slave). */ char *lookup_id = clone_strip(rsc->id); find_resource_attr(cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role); free(lookup_id); rc = cli_resource_update_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, RSC_STOPPED, FALSE, cib, data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not set target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); if (current_active) { g_list_free_full(current_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } goto done; } rc = update_dataset(cib, data_set, TRUE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources would be stopped\n"); goto failure; } target_active = get_active_resources(host, data_set->resources); dump_list(target_active, "Target"); list_delta = subtract_lists(current_active, target_active, (GCompareFunc) strcmp); fprintf(stdout, "Waiting for %d resources to stop:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (list_delta != NULL) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for(lpc = 0; (lpc < step_timeout_s) && (list_delta != NULL); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were stopped\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } current_active = get_active_resources(host, data_set->resources); g_list_free(list_delta); list_delta = subtract_lists(current_active, target_active, (GCompareFunc) strcmp); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before); if(before == g_list_length(list_delta)) { /* aborted during stop phase, print the contents of list_delta */ fprintf(stderr, "Could not complete shutdown of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } if (stop_via_ban) { rc = cli_resource_clear(rsc_id, host, NULL, cib, TRUE); } else if (orig_target_role) { rc = cli_resource_update_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, data_set); free(orig_target_role); orig_target_role = NULL; } else { rc = cli_resource_delete_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not unset target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); goto done; } if (target_active) { g_list_free_full(target_active, free); } target_active = restart_target_active; list_delta = subtract_lists(target_active, current_active, (GCompareFunc) strcmp); fprintf(stdout, "Waiting for %d resources to start again:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (waiting_for_starts(list_delta, rsc, host)) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were started\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } /* It's OK if dependent resources moved to a different node, * so we check active resources on all nodes. */ current_active = get_active_resources(NULL, data_set->resources); g_list_free(list_delta); list_delta = subtract_lists(target_active, current_active, (GCompareFunc) strcmp); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } if(before == g_list_length(list_delta)) { /* aborted during start phase, print the contents of list_delta */ fprintf(stdout, "Could not complete restart of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } rc = pcmk_ok; goto done; failure: if (stop_via_ban) { cli_resource_clear(rsc_id, host, NULL, cib, TRUE); } else if (orig_target_role) { cli_resource_update_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, data_set); free(orig_target_role); } else { cli_resource_delete_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, data_set); } done: if (list_delta) { g_list_free(list_delta); } if (current_active) { g_list_free_full(current_active, free); } if (target_active && (target_active != restart_target_active)) { g_list_free_full(target_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } free(rsc_id); pe_free_working_set(data_set); return rc; } static inline int action_is_pending(pe_action_t *action) { if(is_set(action->flags, pe_action_optional)) { return FALSE; } else if(is_set(action->flags, pe_action_runnable) == FALSE) { return FALSE; } else if(is_set(action->flags, pe_action_pseudo)) { return FALSE; } else if(safe_str_eq("notify", action->task)) { return FALSE; } return TRUE; } /*! * \internal * \brief Return TRUE if any actions in a list are pending * * \param[in] actions List of actions to check * * \return TRUE if any actions in the list are pending, FALSE otherwise */ static bool actions_are_pending(GListPtr actions) { GListPtr action; for (action = actions; action != NULL; action = action->next) { pe_action_t *a = (pe_action_t *)action->data; if (action_is_pending(a)) { crm_notice("Waiting for %s (flags=0x%.8x)", a->uuid, a->flags); return TRUE; } } return FALSE; } /*! * \internal * \brief Print pending actions to stderr * * \param[in] actions List of actions to check * * \return void */ static void print_pending_actions(GListPtr actions) { GListPtr action; fprintf(stderr, "Pending actions:\n"); for (action = actions; action != NULL; action = action->next) { pe_action_t *a = (pe_action_t *) action->data; if (action_is_pending(a)) { fprintf(stderr, "\tAction %d: %s", a->id, a->uuid); if (a->node) { fprintf(stderr, "\ton %s", a->node->details->uname); } fprintf(stderr, "\n"); } } } /* For --wait, timeout (in seconds) to use if caller doesn't specify one */ #define WAIT_DEFAULT_TIMEOUT_S (60 * 60) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) /*! * \internal * \brief Wait until all pending cluster actions are complete * * This waits until either the CIB's transition graph is idle or a timeout is * reached. * * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but one-second granularity * is actually used; if 0, a default will be used) * \param[in] cib Connection to the CIB manager * * \return pcmk_ok on success, -errno on failure */ int wait_till_stable(int timeout_ms, cib_t * cib) { pe_working_set_t *data_set = NULL; int rc = -1; int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S; time_t expire_time = time(NULL) + timeout_s; time_t time_diff; bool printed_version_warning = BE_QUIET; // i.e. don't print if quiet data_set = pe_new_working_set(); if (data_set == NULL) { return -ENOMEM; } set_bit(data_set->flags, pe_flag_no_counts); set_bit(data_set->flags, pe_flag_no_compat); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); if (time_diff > 0) { crm_info("Waiting up to %ld seconds for cluster actions to complete", time_diff); } else { print_pending_actions(data_set->actions); pe_free_working_set(data_set); return -ETIME; } if (rc == pcmk_ok) { /* this avoids sleep on first loop iteration */ sleep(WAIT_SLEEP_S); } /* Get latest transition graph */ pe_reset_working_set(data_set); rc = update_working_set_from_cib(data_set, cib); if (rc != pcmk_ok) { pe_free_working_set(data_set); return rc; } pcmk__schedule_actions(data_set, data_set->input, NULL); if (!printed_version_warning) { /* If the DC has a different version than the local node, the two * could come to different conclusions about what actions need to be * done. Warn the user in this case. * * @TODO A possible long-term solution would be to reimplement the * wait as a new controller operation that would be forwarded to the * DC. However, that would have potential problems of its own. */ const char *dc_version = g_hash_table_lookup(data_set->config_hash, "dc-version"); if (safe_str_neq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION)) { printf("warning: wait option may not work properly in " "mixed-version cluster\n"); printed_version_warning = TRUE; } } } while (actions_are_pending(data_set->actions)); pe_free_working_set(data_set); return pcmk_ok; } int cli_resource_execute_from_params(const char *rsc_name, const char *rsc_class, const char *rsc_prov, const char *rsc_type, const char *action, GHashTable *params, GHashTable *override_hash, int timeout_ms) { GHashTable *params_copy = NULL; int rc = pcmk_ok; svc_action_t *op = NULL; if (safe_str_eq(rsc_class, PCMK_RESOURCE_CLASS_STONITH)) { CMD_ERR("Sorry, the %s option doesn't support %s resources yet", action, rsc_class); crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); } /* If no timeout was provided, grab the default. */ if (timeout_ms == 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } /* add meta_timeout env needed by some resource agents */ g_hash_table_insert(params, strdup("CRM_meta_timeout"), crm_strdup_printf("%d", timeout_ms)); /* add crm_feature_set env needed by some resource agents */ g_hash_table_insert(params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); /* resources_action_create frees the params hash table it's passed, but we * may need to reuse it in a second call to resources_action_create. Thus * we'll make a copy here so that gets freed and the original remains for * reuse. */ params_copy = crm_str_table_dup(params); op = resources_action_create(rsc_name, rsc_class, rsc_prov, rsc_type, action, 0, timeout_ms, params_copy, 0); if (op == NULL) { /* Re-run with stderr enabled so we can display a sane error message */ crm_enable_stderr(TRUE); params_copy = crm_str_table_dup(params); op = resources_action_create(rsc_name, rsc_class, rsc_prov, rsc_type, action, 0, timeout_ms, params_copy, 0); /* Callers of cli_resource_execute expect that the params hash table will * be freed. That function uses this one, so for that reason and for * making the two act the same, we should free the hash table here too. */ g_hash_table_destroy(params); /* We know op will be NULL, but this makes static analysis happy */ services_action_free(op); crm_exit(CRM_EX_DATAERR); return rc; // Never reached, but helps static analysis } setenv("HA_debug", resource_verbose > 0 ? "1" : "0", 1); if(resource_verbose > 1) { setenv("OCF_TRACE_RA", "1", 1); } /* A resource agent using the standard ocf-shellfuncs library will not print * messages to stderr if it doesn't have a controlling terminal (e.g. if * crm_resource is called via script or ssh). This forces it to do so. */ setenv("OCF_TRACE_FILE", "/dev/stderr", 0); if (override_hash) { GHashTableIter iter; char *name = NULL; char *value = NULL; g_hash_table_iter_init(&iter, override_hash); while (g_hash_table_iter_next(&iter, (gpointer *) & name, (gpointer *) & value)) { printf("Overriding the cluster configuration for '%s' with '%s' = '%s'\n", rsc_name, name, value); g_hash_table_replace(op->params, strdup(name), strdup(value)); } } if (services_action_sync(op)) { int more, lpc, last; char *local_copy = NULL; rc = op->rc; if (op->status == PCMK_LRM_OP_DONE) { printf("Operation %s for %s (%s:%s:%s) returned: '%s' (%d)\n", action, rsc_name, rsc_class, rsc_prov ? rsc_prov : "", rsc_type, services_ocf_exitcode_str(op->rc), op->rc); } else { printf("Operation %s for %s (%s:%s:%s) failed: '%s' (%d)\n", action, rsc_name, rsc_class, rsc_prov ? rsc_prov : "", rsc_type, services_lrm_status_str(op->status), op->status); } /* hide output for validate-all if not in verbose */ if (resource_verbose == 0 && safe_str_eq(action, "validate-all")) goto done; if (op->stdout_data) { local_copy = strdup(op->stdout_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stdout: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } if (op->stderr_data) { local_copy = strdup(op->stderr_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stderr: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } } else { rc = op->rc == 0 ? pcmk_err_generic : op->rc; } done: services_action_free(op); /* See comment above about why we free params here. */ g_hash_table_destroy(params); return rc; } int cli_resource_execute(pe_resource_t *rsc, const char *requested_name, const char *rsc_action, GHashTable *override_hash, int timeout_ms, cib_t * cib, pe_working_set_t *data_set) { int rc = pcmk_ok; const char *rid = NULL; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; const char *action = NULL; GHashTable *params = NULL; if (safe_str_eq(rsc_action, "validate")) { action = "validate-all"; } else if (safe_str_eq(rsc_action, "force-check")) { action = "monitor"; } else if (safe_str_eq(rsc_action, "force-stop")) { action = rsc_action+6; } else if (pcmk__str_any_of(rsc_action, "force-start", "force-demote", "force-promote", NULL)) { action = rsc_action+6; if(pe_rsc_is_clone(rsc)) { rc = cli_resource_search(rsc, requested_name, data_set); if(rc > 0 && do_force == FALSE) { CMD_ERR("It is not safe to %s %s here: the cluster claims it is already active", action, rsc->id); CMD_ERR("Try setting target-role=Stopped first or specifying " "the force option"); crm_exit(CRM_EX_UNSAFE); } } } else { action = rsc_action; } if(pe_rsc_is_clone(rsc)) { /* Grab the first child resource in the hope it's not a group */ rsc = rsc->children->data; } if(rsc->variant == pe_group) { CMD_ERR("Sorry, the %s option doesn't support group resources", rsc_action); crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); } rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); params = generate_resource_params(rsc, data_set); if (timeout_ms == 0) { timeout_ms = pe_get_configured_timeout(rsc, action, data_set); } rid = pe_rsc_is_anon_clone(rsc->parent)? requested_name : rsc->id; rc = cli_resource_execute_from_params(rid, rclass, rprov, rtype, action, params, override_hash, timeout_ms); return rc; } int cli_resource_move(pe_resource_t *rsc, const char *rsc_id, const char *host_name, cib_t *cib, pe_working_set_t *data_set) { int rc = pcmk_ok; unsigned int count = 0; pe_node_t *current = NULL; pe_node_t *dest = pe_find_node(data_set->nodes, host_name); bool cur_is_dest = FALSE; if (dest == NULL) { return -pcmk_err_node_unknown; } if (scope_master && is_not_set(rsc->flags, pe_rsc_promotable)) { pe_resource_t *p = uber_parent(rsc); if (is_set(p->flags, pe_rsc_promotable)) { CMD_ERR("Using parent '%s' for move instead of '%s'.", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { CMD_ERR("Ignoring master option: %s is not promotable", rsc_id); scope_master = FALSE; } } current = pe__find_active_requires(rsc, &count); if (is_set(rsc->flags, pe_rsc_promotable)) { GListPtr iter = NULL; unsigned int master_count = 0; pe_node_t *master_node = NULL; for(iter = rsc->children; iter; iter = iter->next) { pe_resource_t *child = (pe_resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { rsc = child; master_node = pe__current_node(child); master_count++; } } if (scope_master || master_count) { count = master_count; current = master_node; } } if (count > 1) { if (pe_rsc_is_clone(rsc)) { current = NULL; } else { return -pcmk_err_multiple; } } if (current && (current->details == dest->details)) { cur_is_dest = TRUE; if (do_force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", rsc_id, scope_master?"promoted":"active", dest->details->uname); } else { return -pcmk_err_already; } } /* Clear any previous prefer constraints across all nodes. */ cli_resource_clear(rsc_id, NULL, data_set->nodes, cib, FALSE); /* Clear any previous ban constraints on 'dest'. */ cli_resource_clear(rsc_id, dest->details->uname, data_set->nodes, cib, TRUE); /* Record an explicit preference for 'dest' */ rc = cli_resource_prefer(rsc_id, dest->details->uname, cib); crm_trace("%s%s now prefers node %s%s", rsc->id, scope_master?" (master)":"", dest->details->uname, do_force?"(forced)":""); /* only ban the previous location if current location != destination location. * it is possible to use -M to enforce a location without regard of where the * resource is currently located */ if(do_force && (cur_is_dest == FALSE)) { /* Ban the original location if possible */ if(current) { (void)cli_resource_ban(rsc_id, current->details->uname, NULL, cib); } else if(count > 1) { CMD_ERR("Resource '%s' is currently %s in %d locations. " "One may now move to %s", rsc_id, (scope_master? "promoted" : "active"), count, dest->details->uname); CMD_ERR("To prevent '%s' from being %s at a specific location, " "specify a node.", rsc_id, (scope_master? "promoted" : "active")); } else { crm_trace("Not banning %s from its current location: not active", rsc_id); } } return rc; } static void cli_resource_why_without_rsc_and_host(cib_t *cib_conn,GListPtr resources) { GListPtr lpc = NULL; GListPtr hosts = NULL; for (lpc = resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; rsc->fns->location(rsc, &hosts, TRUE); if (hosts == NULL) { printf("Resource %s is not running\n", rsc->id); } else { printf("Resource %s is running\n", rsc->id); } cli_resource_check(cib_conn, rsc); g_list_free(hosts); hosts = NULL; } } static void cli_resource_why_with_rsc_and_host(cib_t *cib_conn, GListPtr resources, pe_resource_t *rsc, const char *host_uname) { if (resource_is_running_on(rsc, host_uname)) { printf("Resource %s is running on host %s\n",rsc->id,host_uname); } else { printf("Resource %s is not running on host %s\n", rsc->id, host_uname); } cli_resource_check(cib_conn, rsc); } static void cli_resource_why_without_rsc_with_host(cib_t *cib_conn,GListPtr resources,pe_node_t *node) { const char* host_uname = node->details->uname; GListPtr allResources = node->details->allocated_rsc; GListPtr activeResources = node->details->running_rsc; GListPtr unactiveResources = subtract_lists(allResources,activeResources,(GCompareFunc) strcmp); GListPtr lpc = NULL; for (lpc = activeResources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; printf("Resource %s is running on host %s\n",rsc->id,host_uname); cli_resource_check(cib_conn,rsc); } for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; printf("Resource %s is assigned to host %s but not running\n", rsc->id, host_uname); cli_resource_check(cib_conn,rsc); } g_list_free(allResources); g_list_free(activeResources); g_list_free(unactiveResources); } static void cli_resource_why_with_rsc_without_host(cib_t *cib_conn, GListPtr resources, pe_resource_t *rsc) { GListPtr hosts = NULL; rsc->fns->location(rsc, &hosts, TRUE); printf("Resource %s is %srunning\n", rsc->id, (hosts? "" : "not ")); cli_resource_check(cib_conn, rsc); g_list_free(hosts); } void cli_resource_why(cib_t *cib_conn, GListPtr resources, pe_resource_t *rsc, pe_node_t *node) { const char *host_uname = (node == NULL)? NULL : node->details->uname; if ((rsc == NULL) && (host_uname == NULL)) { cli_resource_why_without_rsc_and_host(cib_conn, resources); } else if ((rsc != NULL) && (host_uname != NULL)) { cli_resource_why_with_rsc_and_host(cib_conn, resources, rsc, host_uname); } else if ((rsc == NULL) && (host_uname != NULL)) { cli_resource_why_without_rsc_with_host(cib_conn, resources, node); } else if ((rsc != NULL) && (host_uname == NULL)) { cli_resource_why_with_rsc_without_host(cib_conn, resources, rsc); } } diff --git a/tools/crmadmin.c b/tools/crmadmin.c index 3e9e9592e1..4688458988 100644 --- a/tools/crmadmin.c +++ b/tools/crmadmin.c @@ -1,541 +1,473 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include -#include - #include -#include -#include +#include +#include // atoi() -#include -#include -#include -#include #include // gboolean, GMainLoop, etc. +#include // xmlNode #include +#include #include #include - +#include #include -#include - #define DEFAULT_MESSAGE_TIMEOUT_MS 30000 static guint message_timer_id = 0; static guint message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS; static GMainLoop *mainloop = NULL; -static crm_ipc_t *crmd_channel = NULL; -static char *admin_uuid = NULL; - -gboolean do_init(void); -int do_work(void); -void crmadmin_ipc_connection_destroy(gpointer user_data); -int admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata); -int do_find_node_list(xmlNode * xml_node); + +bool do_work(pcmk_ipc_api_t *api); +void do_find_node_list(xmlNode *xml_node); gboolean admin_message_timeout(gpointer data); +static enum { + cmd_none, + cmd_shutdown, + cmd_health, + cmd_elect_dc, + cmd_whois_dc, + cmd_list_nodes, +} command = cmd_none; + static gboolean BE_VERBOSE = FALSE; -static int expected_responses = 1; static gboolean BASH_EXPORT = FALSE; -static gboolean DO_HEALTH = FALSE; -static gboolean DO_RESET = FALSE; -static gboolean DO_RESOURCE = FALSE; -static gboolean DO_ELECT_DC = FALSE; -static gboolean DO_WHOIS_DC = FALSE; -static gboolean DO_NODE_LIST = FALSE; static gboolean BE_SILENT = FALSE; -static gboolean DO_RESOURCE_LIST = FALSE; -static const char *crmd_operation = NULL; static char *dest_node = NULL; static crm_exit_t exit_code = CRM_EX_OK; -static const char *sys_to = NULL; static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "help", no_argument, NULL, '?', "\tThis text", pcmk__option_default }, { "version", no_argument, NULL, '$', "\tVersion information", pcmk__option_default }, { "quiet", no_argument, NULL, 'q', "\tDisplay only the essential query information", pcmk__option_default }, { "verbose", no_argument, NULL, 'V', "\tIncrease debug output", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nCommands:", pcmk__option_default }, /* daemon options */ { "status", required_argument, NULL, 'S', "Display the status of the specified node.", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\n\tResult is state of node's internal finite state machine, which " "can be useful for debugging\n", pcmk__option_default }, { "dc_lookup", no_argument, NULL, 'D', "Display the uname of the node co-ordinating the cluster.", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\n\tThis is an internal detail rarely useful to administrators " "except when deciding on which node to examine the logs.\n", pcmk__option_default }, { "nodes", no_argument, NULL, 'N', "\tDisplay the uname of all member nodes", pcmk__option_default }, { "election", no_argument, NULL, 'E', "(Advanced) Start an election for the cluster co-ordinator", pcmk__option_default }, { "kill", required_argument, NULL, 'K', "(Advanced) Stop controller (not rest of cluster stack) on " "specified node", pcmk__option_default }, { "health", no_argument, NULL, 'H', NULL, pcmk__option_hidden }, { "-spacer-", no_argument, NULL, '-', "\nAdditional Options:", pcmk__option_default }, { XML_ATTR_TIMEOUT, required_argument, NULL, 't', "Time (in milliseconds) to wait before declaring the operation failed", pcmk__option_default }, { "bash-export", no_argument, NULL, 'B', - "Create Bash export entries of the form 'export uname=uuid'\n", + "Display nodes as shell commands of the form 'export uname=uuid' " + "(valid with -N/--nodes)'\n", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "Notes:", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', - " The -K and -E commands are rarely used and may be removed in " - "future versions.", + "The -K and -E commands do not work and may be removed in a future " + "version.", pcmk__option_default }, { 0, 0, 0, 0 } }; +static void +quit_main_loop(crm_exit_t ec) +{ + exit_code = ec; + if (mainloop != NULL) { + GMainLoop *mloop = mainloop; + + mainloop = NULL; // Don't re-enter this block + pcmk_quit_main_loop(mloop, 10); + g_main_loop_unref(mloop); + } +} + +static void +controller_event_cb(pcmk_ipc_api_t *controld_api, + enum pcmk_ipc_event event_type, crm_exit_t status, + void *event_data, void *user_data) +{ + pcmk_controld_api_reply_t *reply = event_data; + + switch (event_type) { + case pcmk_ipc_event_disconnect: + if (exit_code == CRM_EX_DISCONNECT) { // Unexpected + fprintf(stderr, "error: Lost connection to controller\n"); + } + goto done; + break; + + case pcmk_ipc_event_reply: + break; + + default: + return; + } + + if (message_timer_id != 0) { + g_source_remove(message_timer_id); + message_timer_id = 0; + } + + if (status != CRM_EX_OK) { + fprintf(stderr, "error: Bad reply from controller: %s", + crm_exit_str(status)); + exit_code = status; + goto done; + } + + if (reply->reply_type != pcmk_controld_reply_ping) { + fprintf(stderr, "error: Unknown reply type %d from controller\n", + reply->reply_type); + goto done; + } + + // Parse desired information from reply + switch (command) { + case cmd_health: + printf("Status of %s@%s: %s (%s)\n", + reply->data.ping.sys_from, + reply->host_from, + reply->data.ping.fsa_state, + reply->data.ping.result); + if (BE_SILENT && (reply->data.ping.fsa_state != NULL)) { + fprintf(stderr, "%s\n", reply->data.ping.fsa_state); + } + exit_code = CRM_EX_OK; + break; + + case cmd_whois_dc: + printf("Designated Controller is: %s\n", reply->host_from); + if (BE_SILENT && (reply->host_from != NULL)) { + fprintf(stderr, "%s\n", reply->host_from); + } + exit_code = CRM_EX_OK; + break; + + default: // Not really possible here + exit_code = CRM_EX_SOFTWARE; + break; + } + +done: + pcmk_disconnect_ipc(controld_api); + quit_main_loop(exit_code); +} + // \return Standard Pacemaker return code static int list_nodes() { cib_t *the_cib = cib_new(); xmlNode *output = NULL; int rc; if (the_cib == NULL) { return ENOMEM; } rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); if (rc != pcmk_ok) { return pcmk_legacy2rc(rc); } rc = the_cib->cmds->query(the_cib, NULL, &output, cib_scope_local | cib_sync_call); if (rc == pcmk_ok) { do_find_node_list(output); free_xml(output); } the_cib->cmds->signoff(the_cib); return pcmk_legacy2rc(rc); } int main(int argc, char **argv) { int option_index = 0; int argerr = 0; int flag; + int rc; + pcmk_ipc_api_t *controld_api = NULL; + bool need_controld_api = true; crm_log_cli_init("crmadmin"); pcmk__set_cli_options(NULL, " [options]", long_options, "query and manage the Pacemaker controller"); if (argc < 2) { pcmk__cli_help('?', CRM_EX_USAGE); } while (1) { flag = pcmk__next_cli_option(argc, argv, &option_index, NULL); if (flag == -1) break; switch (flag) { case 'V': BE_VERBOSE = TRUE; crm_bump_log_level(argc, argv); break; case 't': message_timeout_ms = (guint) atoi(optarg); if (message_timeout_ms < 1) { message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS; } break; case '$': case '?': pcmk__cli_help(flag, CRM_EX_OK); break; case 'D': - DO_WHOIS_DC = TRUE; + command = cmd_whois_dc; break; case 'B': BASH_EXPORT = TRUE; break; case 'K': - DO_RESET = TRUE; + command = cmd_shutdown; crm_trace("Option %c => %s", flag, optarg); + if (dest_node != NULL) { + free(dest_node); + } dest_node = strdup(optarg); - crmd_operation = CRM_OP_LOCAL_SHUTDOWN; break; case 'q': BE_SILENT = TRUE; break; case 'S': - DO_HEALTH = TRUE; + command = cmd_health; crm_trace("Option %c => %s", flag, optarg); + if (dest_node != NULL) { + free(dest_node); + } dest_node = strdup(optarg); break; case 'E': - DO_ELECT_DC = TRUE; + command = cmd_elect_dc; break; case 'N': - DO_NODE_LIST = TRUE; + command = cmd_list_nodes; + need_controld_api = false; break; case 'H': - DO_HEALTH = TRUE; + fprintf(stderr, "Cluster-wide health option not supported\n"); + ++argerr; break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (optind > argc) { ++argerr; } + if (command == cmd_none) { + fprintf(stderr, "error: Must specify a command option\n\n"); + ++argerr; + } + if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } - if (do_init()) { - int res = 0; - - res = do_work(); - if (res > 0) { - /* wait for the reply by creating a mainloop and running it until - * the callbacks are invoked... - */ - mainloop = g_main_loop_new(NULL, FALSE); - crm_trace("Waiting for %d replies from the local CRM", expected_responses); - - message_timer_id = g_timeout_add(message_timeout_ms, admin_message_timeout, NULL); - - g_main_loop_run(mainloop); - - } else if (res < 0) { - crm_err("No message to send"); - exit_code = CRM_EX_ERROR; + // Connect to the controller if needed + if (need_controld_api) { + rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); + if (controld_api == NULL) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); + goto done; } - } else { - crm_warn("Init failed, could not perform requested operations"); - exit_code = CRM_EX_UNAVAILABLE; - } - - crm_trace("%s exiting normally", crm_system_name); - return exit_code; -} - -int -do_work(void) -{ - int ret = 1; - - /* construct the request */ - xmlNode *msg_data = NULL; - gboolean all_is_good = TRUE; - - if (DO_HEALTH == TRUE) { - crm_trace("Querying the system"); - - sys_to = CRM_SYSTEM_DC; - - if (dest_node != NULL) { - sys_to = CRM_SYSTEM_CRMD; - crmd_operation = CRM_OP_PING; - - if (BE_VERBOSE) { - expected_responses = 1; - } - - } else { - crm_info("Cluster-wide health not available yet"); - all_is_good = FALSE; + pcmk_register_ipc_callback(controld_api, controller_event_cb, NULL); + rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_main); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); + goto done; } - - } else if (DO_ELECT_DC) { - /* tell the local node to initiate an election */ - - dest_node = NULL; - sys_to = CRM_SYSTEM_CRMD; - crmd_operation = CRM_OP_VOTE; - ret = 0; /* no return message */ - - } else if (DO_WHOIS_DC) { - dest_node = NULL; - sys_to = CRM_SYSTEM_DC; - crmd_operation = CRM_OP_PING; - - } else if (DO_NODE_LIST) { - crm_exit(pcmk_rc2exitc(list_nodes())); - - } else if (DO_RESET) { - /* tell dest_node to initiate the shutdown procedure - * - * if dest_node is NULL, the request will be sent to the - * local node - */ - sys_to = CRM_SYSTEM_CRMD; - ret = 0; /* no return message */ - - } else { - crm_err("Unknown options"); - all_is_good = FALSE; } - if (all_is_good == FALSE) { - crm_err("Creation of request failed. No message to send"); - return -1; + if (do_work(controld_api)) { + // A reply is needed from controller, so run main loop to get it + exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects + mainloop = g_main_loop_new(NULL, FALSE); + message_timer_id = g_timeout_add(message_timeout_ms, + admin_message_timeout, NULL); + g_main_loop_run(mainloop); } -/* send it */ - if (crmd_channel == NULL) { - crm_err("The IPC connection is not valid, cannot send anything"); - return -1; - } - - if (sys_to == NULL) { - if (dest_node != NULL) { - sys_to = CRM_SYSTEM_CRMD; - } else { - sys_to = CRM_SYSTEM_DC; - } - } - - { - xmlNode *cmd = create_request(crmd_operation, msg_data, dest_node, sys_to, - crm_system_name, admin_uuid); - - crm_ipc_send(crmd_channel, cmd, 0, 0, NULL); - free_xml(cmd); - } - - return ret; -} +done: + if (controld_api != NULL) { + pcmk_ipc_api_t *capi = controld_api; -void -crmadmin_ipc_connection_destroy(gpointer user_data) -{ - crm_err("Connection to controller was terminated"); - if (mainloop) { - g_main_loop_quit(mainloop); - } else { - crm_exit(CRM_EX_DISCONNECT); + controld_api = NULL; // Ensure we can't free this twice + pcmk_free_ipc_api(capi); } -} - -struct ipc_client_callbacks crm_callbacks = { - .dispatch = admin_msg_callback, - .destroy = crmadmin_ipc_connection_destroy -}; - -gboolean -do_init(void) -{ - mainloop_io_t *source = - mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks); - - admin_uuid = pcmk__getpid_s(); - - crmd_channel = mainloop_get_ipc_client(source); - - if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST) { - return TRUE; - - } else if (crmd_channel != NULL) { - xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1"); - - crm_ipc_send(crmd_channel, xml, 0, 0, NULL); - return TRUE; + if (mainloop != NULL) { + g_main_loop_unref(mainloop); + mainloop = NULL; } - return FALSE; + return crm_exit(exit_code); } -static bool -validate_crm_message(xmlNode * msg, const char *sys, const char *uuid, const char *msg_type) +// \return True if reply from controller is needed +bool +do_work(pcmk_ipc_api_t *controld_api) { - const char *type = NULL; - const char *crm_msg_reference = NULL; - - if (msg == NULL) { - return FALSE; - } + bool need_reply = false; + int rc = pcmk_rc_ok; - type = crm_element_value(msg, F_CRM_MSG_TYPE); - crm_msg_reference = crm_element_value(msg, XML_ATTR_REFERENCE); - - if (type == NULL) { - crm_info("No message type defined."); - return FALSE; - - } else if (msg_type != NULL && strcasecmp(msg_type, type) != 0) { - crm_info("Expecting a (%s) message but received a (%s).", msg_type, type); - return FALSE; - } - - if (crm_msg_reference == NULL) { - crm_info("No message crm_msg_reference defined."); - return FALSE; - } - - return TRUE; -} - -int -admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata) -{ - static int received_responses = 0; - xmlNode *xml = string2xml(buffer); - - received_responses++; - g_source_remove(message_timer_id); - - crm_log_xml_trace(xml, "ipc"); - - if (xml == NULL) { - crm_info("XML in IPC message was not valid... " "discarding."); - - } else if (validate_crm_message(xml, crm_system_name, admin_uuid, XML_ATTR_RESPONSE) == FALSE) { - crm_trace("Message was not a CRM response. Discarding."); - - } else if (DO_HEALTH) { - xmlNode *data = get_message_xml(xml, F_CRM_DATA); - const char *state = crm_element_value(data, XML_PING_ATTR_CRMDSTATE); + switch (command) { + case cmd_shutdown: + rc = pcmk_controld_api_shutdown(controld_api, dest_node); + break; - printf("Status of %s@%s: %s (%s)\n", - crm_element_value(data, XML_PING_ATTR_SYSFROM), - crm_element_value(xml, F_CRM_HOST_FROM), - state, crm_element_value(data, XML_PING_ATTR_STATUS)); + case cmd_health: // dest_node != NULL + case cmd_whois_dc: // dest_node == NULL + rc = pcmk_controld_api_ping(controld_api, dest_node); + need_reply = true; + break; - if (BE_SILENT && state != NULL) { - fprintf(stderr, "%s\n", state); - } + case cmd_elect_dc: + rc = pcmk_controld_api_start_election(controld_api); + break; - } else if (DO_WHOIS_DC) { - const char *dc = crm_element_value(xml, F_CRM_HOST_FROM); + case cmd_list_nodes: + rc = list_nodes(); + break; - printf("Designated Controller is: %s\n", dc); - if (BE_SILENT && dc != NULL) { - fprintf(stderr, "%s\n", dc); - } - crm_exit(CRM_EX_OK); + case cmd_none: // not actually possible here + break; } - - free_xml(xml); - - if (received_responses >= expected_responses) { - crm_trace("Received expected number (%d) of replies, exiting normally", - expected_responses); - crm_exit(CRM_EX_OK); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Command failed: %s", pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); } - - message_timer_id = g_timeout_add(message_timeout_ms, admin_message_timeout, NULL); - return 0; + return need_reply; } gboolean admin_message_timeout(gpointer data) { - fprintf(stderr, "No messages received in %d seconds.. aborting\n", - (int)message_timeout_ms / 1000); - crm_err("No messages received in %d seconds", (int)message_timeout_ms / 1000); - exit_code = CRM_EX_TIMEOUT; - g_main_loop_quit(mainloop); - return FALSE; + fprintf(stderr, + "error: No reply received from controller before timeout (%dms)\n", + message_timeout_ms); + message_timer_id = 0; + quit_main_loop(CRM_EX_TIMEOUT); + return FALSE; // Tells glib to remove source } -int +void do_find_node_list(xmlNode * xml_node) { int found = 0; xmlNode *node = NULL; xmlNode *nodes = get_object_root(XML_CIB_TAG_NODES, xml_node); - for (node = __xml_first_child_element(nodes); node != NULL; - node = __xml_next_element(node)) { + for (node = first_named_child(nodes, XML_CIB_TAG_NODE); node != NULL; + node = crm_next_same_xml(node)) { - if (crm_str_eq((const char *)node->name, XML_CIB_TAG_NODE, TRUE)) { + if (BASH_EXPORT) { + printf("export %s=%s\n", + crm_element_value(node, XML_ATTR_UNAME), + crm_element_value(node, XML_ATTR_ID)); + } else { + const char *node_type = crm_element_value(node, XML_ATTR_TYPE); - if (BASH_EXPORT) { - printf("export %s=%s\n", - crm_element_value(node, XML_ATTR_UNAME), - crm_element_value(node, XML_ATTR_ID)); - } else { - printf("%s node: %s (%s)\n", - crm_element_value(node, XML_ATTR_TYPE), - crm_element_value(node, XML_ATTR_UNAME), - crm_element_value(node, XML_ATTR_ID)); + if (node_type == NULL) { + node_type = "member"; } - found++; + printf("%s node: %s (%s)\n", node_type, + crm_element_value(node, XML_ATTR_UNAME), + crm_element_value(node, XML_ATTR_ID)); } + found++; } + // @TODO List Pacemaker Remote nodes that don't have a entry if (found == 0) { - printf("NO nodes configured\n"); + printf("No nodes configured\n"); } - - return found; }