diff --git a/cib/main.c b/cib/main.c index cc3a463d18..1e7fb00439 100644 --- a/cib/main.c +++ b/cib/main.c @@ -1,586 +1,590 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common.h" #if HAVE_LIBXML2 # include #endif #ifdef HAVE_GETOPT_H # include #endif #if HAVE_BZLIB_H # include #endif extern int init_remote_listener(int port, gboolean encrypted); gboolean cib_shutdown_flag = FALSE; int cib_status = pcmk_ok; crm_cluster_t crm_cluster; #if SUPPORT_HEARTBEAT oc_ev_t *cib_ev_token; ll_cluster_t *hb_conn = NULL; extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int); gboolean cib_register_ha(ll_cluster_t * hb_cluster, const char *client_name); #else void *hb_conn = NULL; #endif GMainLoop *mainloop = NULL; const char *cib_root = NULL; char *cib_our_uname = NULL; gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; int remote_fd = 0; int remote_tls_fd = 0; int cib_init(void); void cib_shutdown(int nsig); gboolean startCib(const char *filename); extern int write_cib_contents(gpointer p); GHashTable *config_hash = NULL; GHashTable *local_notify_queue = NULL; char *channel1 = NULL; char *channel2 = NULL; char *channel3 = NULL; char *channel4 = NULL; char *channel5 = NULL; #define OPTARGS "maswr:V?" void cib_cleanup(void); static void cib_enable_writes(int nsig) { crm_info("(Re)enabling disk writes"); cib_writes_enabled = TRUE; } static void log_cib_client(gpointer key, gpointer value, gpointer user_data) { crm_info("Client %s", crm_client_name(value)); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"per-action-cib", 0, 0, 'a', "\tAdvanced use only"}, {"stand-alone", 0, 0, 's', "\tAdvanced use only"}, {"disk-writes", 0, 0, 'w', "\tAdvanced use only"}, {"cib-root", 1, 0, 'r', "\tAdvanced use only"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag; int rc = 0; int index = 0; int argerr = 0; struct passwd *pwentry = NULL; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for storing and replicating the cluster configuration"); crm_peer_init(); mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 's': stand_alone = TRUE; preserve_status = TRUE; cib_writes_enabled = FALSE; pwentry = getpwnam(CRM_DAEMON_USER); CRM_CHECK(pwentry != NULL, crm_perror(LOG_ERR, "Invalid uid (%s) specified", CRM_DAEMON_USER); return 100); rc = setgid(pwentry->pw_gid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set group to %d", pwentry->pw_gid); return 100; } rc = initgroups(CRM_DAEMON_GROUP, pwentry->pw_gid); if (rc < 0) { crm_perror(LOG_ERR, "Could not setup groups for user %d", pwentry->pw_uid); return 100; } rc = setuid(pwentry->pw_uid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set user to %d", pwentry->pw_uid); return 100; } break; case '?': /* Help message */ crm_help(flag, EX_OK); break; case 'w': cib_writes_enabled = TRUE; break; case 'r': cib_root = optarg; break; case 'm': cib_metadata(); return 0; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { cib_metadata(); return 0; } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); if (cib_root == NULL) { char *path = crm_strdup_printf("%s/cib.xml", CRM_CONFIG_DIR); char *legacy = crm_strdup_printf("%s/cib.xml", CRM_LEGACY_CONFIG_DIR); if (g_file_test(path, G_FILE_TEST_EXISTS)) { cib_root = CRM_CONFIG_DIR; } else if (g_file_test(legacy, G_FILE_TEST_EXISTS)) { cib_root = CRM_LEGACY_CONFIG_DIR; crm_notice("Using legacy config location: %s", cib_root); } else { cib_root = CRM_CONFIG_DIR; crm_notice("Using new config location: %s", cib_root); } free(legacy); free(path); } else { crm_notice("Using custom config location: %s", cib_root); } if (crm_is_writable(cib_root, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { crm_err("Bad permissions on %s. Terminating", cib_root); fprintf(stderr, "ERROR: Bad permissions on %s. See logs for details\n", cib_root); fflush(stderr); return 100; } /* read local config file */ rc = cib_init(); CRM_CHECK(crm_hash_table_size(client_connections) == 0, crm_warn("Not all clients gone at exit")); g_hash_table_foreach(client_connections, log_cib_client, NULL); cib_cleanup(); #if SUPPORT_HEARTBEAT if (hb_conn) { hb_conn->llc_ops->delete(hb_conn); } #endif crm_info("Done"); return rc; } void cib_cleanup(void) { crm_peer_destroy(); if (local_notify_queue) { g_hash_table_destroy(local_notify_queue); } crm_client_cleanup(); g_hash_table_destroy(config_hash); free(cib_our_uname); free(channel1); free(channel2); free(channel3); free(channel4); free(channel5); } unsigned long cib_num_ops = 0; const char *cib_stat_interval = "10min"; unsigned long cib_num_local = 0, cib_num_updates = 0, cib_num_fail = 0; unsigned long cib_bad_connects = 0, cib_num_timeouts = 0; #if SUPPORT_HEARTBEAT gboolean ccm_connect(void); static void ccm_connection_destroy(gpointer user_data) { crm_err("CCM connection failed... blocking while we reconnect"); CRM_ASSERT(ccm_connect()); return; } static void *ccm_library = NULL; gboolean ccm_connect(void) { gboolean did_fail = TRUE; int num_ccm_fails = 0; int max_ccm_fails = 30; int ret; int cib_ev_fd; int (*ccm_api_register) (oc_ev_t ** token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_register", 1); int (*ccm_api_set_callback) (const oc_ev_t * token, oc_ev_class_t class, oc_ev_callback_t * fn, oc_ev_callback_t ** prev_fn) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_set_callback", 1); void (*ccm_api_special) (const oc_ev_t *, oc_ev_class_t, int) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_special", 1); int (*ccm_api_activate) (const oc_ev_t * token, int *fd) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_activate", 1); int (*ccm_api_unregister) (oc_ev_t * token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_unregister", 1); static struct mainloop_fd_callbacks ccm_fd_callbacks = { .dispatch = cib_ccm_dispatch, .destroy = ccm_connection_destroy, }; while (did_fail) { did_fail = FALSE; crm_info("Registering with CCM..."); ret = (*ccm_api_register) (&cib_ev_token); if (ret != 0) { did_fail = TRUE; } if (did_fail == FALSE) { crm_trace("Setting up CCM callbacks"); ret = (*ccm_api_set_callback) (cib_ev_token, OC_EV_MEMB_CLASS, cib_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if (did_fail == FALSE) { (*ccm_api_special) (cib_ev_token, OC_EV_MEMB_CLASS, 0); crm_trace("Activating CCM token"); ret = (*ccm_api_activate) (cib_ev_token, &cib_ev_fd); if (ret != 0) { crm_warn("CCM Activation failed"); did_fail = TRUE; } } if (did_fail) { num_ccm_fails++; (*ccm_api_unregister) (cib_ev_token); if (num_ccm_fails < max_ccm_fails) { crm_warn("CCM Connection failed %d times (%d max)", num_ccm_fails, max_ccm_fails); sleep(3); } else { crm_err("CCM Activation failed %d (max) times", num_ccm_fails); return FALSE; } } } crm_debug("CCM Activation passed... all set to go!"); mainloop_add_fd("heartbeat-ccm", G_PRIORITY_MEDIUM, cib_ev_fd, cib_ev_token, &ccm_fd_callbacks); return TRUE; } #endif #if SUPPORT_COROSYNC static void cib_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ cib_peer_callback(xml, NULL); } free_xml(xml); free(data); } static void cib_cs_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { crm_err("Corosync connection lost! Exiting."); terminate_cib(__FUNCTION__, -1); } } #endif static void cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { switch (type) { case crm_status_processes: - if (legacy_mode && is_not_set(node->processes, crm_get_cluster_proc())) { +#if !SUPPORT_PLUGIN + if (cib_legacy_mode() + && is_not_set(node->processes, crm_get_cluster_proc())) { + uint32_t old = data? *(const uint32_t *)data : 0; if ((node->processes ^ old) & crm_proc_cpg) { crm_info("Attempting to disable legacy mode after %s left the cluster", node->uname); legacy_mode = FALSE; } } +#endif break; case crm_status_uname: case crm_status_rstate: case crm_status_nstate: if (cib_shutdown_flag && (crm_active_peers() < 2) && crm_hash_table_size(client_connections) == 0) { crm_info("No more peers"); terminate_cib(__FUNCTION__, 1); } break; } } #if SUPPORT_HEARTBEAT static void cib_ha_connection_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); terminate_cib(__FUNCTION__, 0); } else { crm_err("Heartbeat connection lost! Exiting."); terminate_cib(__FUNCTION__, -1); } } #endif int cib_init(void) { if (is_openais_cluster()) { #if SUPPORT_COROSYNC crm_cluster.destroy = cib_cs_destroy; crm_cluster.cpg.cpg_deliver_fn = cib_cs_dispatch; crm_cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT crm_cluster.hb_dispatch = cib_ha_peer_callback; crm_cluster.destroy = cib_ha_connection_destroy; #endif } config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (startCib("cib.xml") == FALSE) { crm_crit("Cannot start CIB... terminating"); crm_exit(ENODATA); } if (stand_alone == FALSE) { if (is_openais_cluster()) { crm_set_status_callback(&cib_peer_update_callback); } if (crm_cluster_connect(&crm_cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(DAEMON_RESPAWN_STOP); } cib_our_uname = crm_cluster.uname; #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { gboolean was_error = FALSE; hb_conn = crm_cluster.hb_conn; if (was_error == FALSE) { if (HA_OK != hb_conn->llc_ops->set_cstatus_callback(hb_conn, cib_client_status_callback, hb_conn)) { crm_err("Cannot set cstatus callback: %s", hb_conn->llc_ops->errmsg(hb_conn)); was_error = TRUE; } } if (was_error == FALSE) { was_error = (ccm_connect() == FALSE); } if (was_error == FALSE) { /* Async get client status information in the cluster */ crm_info("Requesting the list of configured nodes"); hb_conn->llc_ops->client_status(hb_conn, NULL, CRM_SYSTEM_CIB, -1); } } #endif } else { cib_our_uname = strdup("localhost"); } cib_ipc_servers_init(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks, &ipc_rw_callbacks); if (stand_alone) { cib_is_master = TRUE; } /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); /* If main loop returned, clean up and exit. We disconnect in case * terminate_cib() was called with fast=1. */ crm_cluster_disconnect(&crm_cluster); cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); return crm_exit(pcmk_ok); } gboolean startCib(const char *filename) { gboolean active = FALSE; xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); CRM_ASSERT(cib != NULL); if (activateCibXml(cib, TRUE, "start") == 0) { int port = 0; const char *port_s = NULL; active = TRUE; cib_read_config(config_hash, cib); port_s = crm_element_value(cib, "remote-tls-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_tls_fd = init_remote_listener(port, TRUE); } port_s = crm_element_value(cib, "remote-clear-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_fd = init_remote_listener(port, FALSE); } crm_info("CIB Initialization completed successfully"); } return active; } diff --git a/cib/messages.c b/cib/messages.c index 4e704dc616..65f57b1983 100644 --- a/cib/messages.c +++ b/cib/messages.c @@ -1,577 +1,578 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_DIFF_RETRY 5 #ifdef CIBPIPE gboolean cib_is_master = TRUE; #else gboolean cib_is_master = FALSE; #endif xmlNode *the_cib = NULL; gboolean syncd_once = FALSE; extern const char *cib_our_uname; int revision_check(xmlNode * cib_update, xmlNode * cib_copy, int flags); int get_revision(xmlNode * xml_obj, int cur_revision); int updateList(xmlNode * local_cib, xmlNode * update_command, xmlNode * failed, int operation, const char *section); gboolean check_generation(xmlNode * newCib, xmlNode * oldCib); gboolean update_results(xmlNode * failed, xmlNode * target, const char *operation, int return_code); int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); int sync_our_cib(xmlNode * request, gboolean all); extern xmlNode *cib_msg_copy(const xmlNode * msg, gboolean with_data); extern gboolean cib_shutdown_flag; int cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int result = pcmk_ok; const char *host = crm_element_value(req, F_ORIG); *answer = NULL; if (crm_element_value(req, F_CIB_ISREPLY) == NULL) { crm_info("Shutdown REQ from %s", host); return pcmk_ok; } else if (cib_shutdown_flag) { crm_info("Shutdown ACK from %s", host); terminate_cib(__FUNCTION__, 0); return pcmk_ok; } else { crm_err("Shutdown ACK from %s - not shutting down", host); result = -EINVAL; } return result; #endif } int cib_process_default(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); *answer = NULL; if (op == NULL) { result = -EINVAL; crm_err("No operation specified"); } else if (strcasecmp(CRM_OP_NOOP, op) == 0) { ; } else { result = -EPROTONOSUPPORT; crm_err("Action [%s] is not supported by the CIB", op); } return result; } int cib_process_quit(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); crm_warn("The CRMd has asked us to exit... complying"); crm_exit(pcmk_ok); return result; } int cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); if (safe_str_eq(op, CIB_OP_ISMASTER)) { if (cib_is_master == TRUE) { result = pcmk_ok; } else { result = -EPERM; } return result; } if (safe_str_eq(op, CIB_OP_MASTER)) { if (cib_is_master == FALSE) { crm_info("We are now in R/W mode"); cib_is_master = TRUE; syncd_once = TRUE; } else { crm_debug("We are still in R/W mode"); } } else if (cib_is_master) { crm_info("We are now in R/O mode"); cib_is_master = FALSE; } return result; #endif } int sync_in_progress = 0; void send_sync_request(const char *host) { xmlNode *sync_me = create_xml_node(NULL, "sync-me"); crm_info("Requesting re-sync from peer"); sync_in_progress++; crm_xml_add(sync_me, F_TYPE, "cib"); crm_xml_add(sync_me, F_CIB_OPERATION, CIB_OP_SYNC_ONE); crm_xml_add(sync_me, F_CIB_DELEGATED, cib_our_uname); send_cluster_message(host ? crm_get_peer(0, host) : NULL, crm_msg_cib, sync_me, FALSE); free_xml(sync_me); } int cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else const char *host = crm_element_value(req, F_ORIG); const char *seq = crm_element_value(req, F_CIB_PING_ID); char *digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); static struct qb_log_callsite *cs = NULL; crm_trace("Processing \"%s\" event %s from %s", op, seq, host); *answer = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(*answer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(*answer, XML_ATTR_DIGEST, digest); crm_xml_add(*answer, F_CIB_PING_ID, seq); if (cs == NULL) { cs = qb_log_callsite_get(__func__, __FILE__, __FUNCTION__, LOG_TRACE, __LINE__, crm_trace_nonlog); } if (cs && cs->targets) { /* Append additional detail so the reciever can log the differences */ add_message_xml(*answer, F_CIB_CALLDATA, the_cib); } else { /* Always include at least the version details */ const char *tag = TYPE(the_cib); xmlNode *shallow = create_xml_node(NULL, tag); copy_in_properties(shallow, the_cib); add_message_xml(*answer, F_CIB_CALLDATA, shallow); free_xml(shallow); } crm_info("Reporting our current digest to %s: %s for %s.%s.%s (%p %d)", host, digest, crm_element_value(existing_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(existing_cib, XML_ATTR_GENERATION), crm_element_value(existing_cib, XML_ATTR_NUMUPDATES), existing_cib, cs && cs->targets); free(digest); return pcmk_ok; #endif } int cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else return sync_our_cib(req, TRUE); #endif } int cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int rc = pcmk_ok; *answer = NULL; if(crm_element_value(req, F_CIB_SCHEMA_MAX)) { return cib_process_upgrade( op, options, section, req, input, existing_cib, result_cib, answer); } else { int new_version = 0; int current_version = 0; xmlNode *scratch = copy_xml(existing_cib); const char *host = crm_element_value(req, F_ORIG); const char *value = crm_element_value(existing_cib, XML_ATTR_VALIDATION); crm_trace("Processing \"%s\" event", op); if (value != NULL) { current_version = get_schema_version(value); } rc = update_validation(&scratch, &new_version, 0, TRUE, TRUE); if (new_version > current_version) { xmlNode *up = create_xml_node(NULL, __FUNCTION__); rc = pcmk_ok; crm_notice("Upgrade request from %s verified", host); crm_xml_add(up, F_TYPE, "cib"); crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE); crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version)); crm_xml_add(up, F_CIB_DELEGATED, host); crm_xml_add(up, F_CIB_CLIENTID, crm_element_value(req, F_CIB_CLIENTID)); crm_xml_add(up, F_CIB_CALLOPTS, crm_element_value(req, F_CIB_CALLOPTS)); crm_xml_add(up, F_CIB_CALLID, crm_element_value(req, F_CIB_CALLID)); if (cib_legacy_mode() && cib_is_master) { rc = cib_process_upgrade( op, options, section, up, input, existing_cib, result_cib, answer); } else { send_cluster_message(NULL, crm_msg_cib, up, FALSE); } free_xml(up); } else if(rc == pcmk_ok) { rc = -pcmk_err_schema_unchanged; } free_xml(scratch); } return rc; #endif } int cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else return sync_our_cib(req, FALSE); #endif } int cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; if (cib_is_master) { /* the master is never waiting for a resync */ sync_in_progress = 0; } if (sync_in_progress > MAX_DIFF_RETRY) { /* request another full-sync, * the last request may have been lost */ sync_in_progress = 0; } if (sync_in_progress) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; cib_diff_version_details(input, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); sync_in_progress++; crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates); return -pcmk_err_diff_resync; } rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer); crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc, cib_is_master?"master":"slave"); if (rc == -pcmk_err_diff_resync && cib_is_master == FALSE) { free_xml(*result_cib); *result_cib = NULL; send_sync_request(NULL); } else if (rc == -pcmk_err_diff_resync) { rc = -pcmk_err_diff_failed; if (options & cib_force_diff) { crm_warn("Not requesting full refresh in R/W mode"); } } else if(rc != pcmk_ok && cib_legacy_mode()) { - crm_warn("Something went wrong in compatibility mode, requesting full refresh"); + crm_warn("Requesting full CIB refresh because update failed: %s" + CRM_XS " rc=%d", pcmk_strerror(rc), rc); xml_log_patchset(LOG_INFO, __FUNCTION__, input); free_xml(*result_cib); *result_cib = NULL; send_sync_request(NULL); } return rc; } int cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *tag = crm_element_name(input); int rc = cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer); if (rc == pcmk_ok && safe_str_eq(tag, XML_TAG_CIB)) { sync_in_progress = 0; } return rc; } static int delete_cib_object(xmlNode * parent, xmlNode * delete_spec) { const char *object_name = NULL; const char *object_id = NULL; xmlNode *equiv_node = NULL; int result = pcmk_ok; if (delete_spec != NULL) { object_name = crm_element_name(delete_spec); } object_id = crm_element_value(delete_spec, XML_ATTR_ID); crm_trace("Processing: <%s id=%s>", crm_str(object_name), crm_str(object_id)); if (delete_spec == NULL) { result = -EINVAL; } else if (parent == NULL) { result = -EINVAL; } else if (object_id == NULL) { /* placeholder object */ equiv_node = find_xml_node(parent, object_name, FALSE); } else { equiv_node = find_entity(parent, object_name, object_id); } if (result != pcmk_ok) { ; /* nothing */ } else if (equiv_node == NULL) { result = pcmk_ok; } else if (xml_has_children(delete_spec) == FALSE) { /* only leaves are deleted */ crm_debug("Removing leaf: <%s id=%s>", crm_str(object_name), crm_str(object_id)); free_xml(equiv_node); equiv_node = NULL; } else { xmlNode *child = NULL; for (child = __xml_first_child(delete_spec); child != NULL; child = __xml_next(child)) { int tmp_result = delete_cib_object(equiv_node, child); /* only the first error is likely to be interesting */ if (tmp_result != pcmk_ok && result == pcmk_ok) { result = tmp_result; } } } return result; } int cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { xmlNode *failed = NULL; int result = pcmk_ok; xmlNode *update_section = NULL; crm_trace("Processing \"%s\" event for section=%s", op, crm_str(section)); if (safe_str_eq(XML_CIB_TAG_SECTION_ALL, section)) { section = NULL; } else if (safe_str_eq(XML_TAG_CIB, section)) { section = NULL; } else if (safe_str_eq(crm_element_name(input), XML_TAG_CIB)) { section = NULL; } CRM_CHECK(strcasecmp(CIB_OP_DELETE, op) == 0, return -EINVAL); if (input == NULL) { crm_err("Cannot perform modification with no data"); return -EINVAL; } failed = create_xml_node(NULL, XML_TAG_FAILED); update_section = get_object_root(section, *result_cib); result = delete_cib_object(update_section, input); update_results(failed, input, op, result); if (xml_has_children(failed)) { CRM_CHECK(result != pcmk_ok, result = -EINVAL); } if (result != pcmk_ok) { crm_log_xml_err(failed, "CIB Update failures"); *answer = failed; } else { free_xml(failed); } return result; } gboolean check_generation(xmlNode * newCib, xmlNode * oldCib) { if (cib_compare_generation(newCib, oldCib) >= 0) { return TRUE; } crm_warn("Generation from update is older than the existing one"); return FALSE; } #ifndef CIBPIPE int sync_our_cib(xmlNode * request, gboolean all) { int result = pcmk_ok; char *digest = NULL; const char *host = crm_element_value(request, F_ORIG); const char *op = crm_element_value(request, F_CIB_OPERATION); xmlNode *replace_request = cib_msg_copy(request, FALSE); CRM_CHECK(the_cib != NULL,;); CRM_CHECK(replace_request != NULL,;); crm_debug("Syncing CIB to %s", all ? "all peers" : host); if (all == FALSE && host == NULL) { crm_log_xml_err(request, "bad sync"); } /* remove the "all == FALSE" condition * * sync_from was failing, the local client wasn't being notified * because it didn't know it was a reply * setting this does not prevent the other nodes from applying it * if all == TRUE */ if (host != NULL) { crm_xml_add(replace_request, F_CIB_ISREPLY, host); } if (all) { xml_remove_prop(replace_request, F_CIB_HOST); } crm_xml_add(replace_request, F_CIB_OPERATION, CIB_OP_REPLACE); crm_xml_add(replace_request, "original_" F_CIB_OPERATION, op); crm_xml_add(replace_request, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); crm_xml_add(replace_request, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); crm_xml_add(replace_request, XML_ATTR_DIGEST, digest); add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); if (send_cluster_message (all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) { result = -ENOTCONN; } free_xml(replace_request); free(digest); return result; } #endif diff --git a/crmd/Makefile.am b/crmd/Makefile.am index da5e8d3020..f0addfc643 100644 --- a/crmd/Makefile.am +++ b/crmd/Makefile.am @@ -1,57 +1,57 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common halibdir = $(CRM_DAEMON_DIR) ## binary progs halib_PROGRAMS = crmd ## SOURCES noinst_HEADERS = crmd.h crmd_fsa.h crmd_messages.h fsa_defines.h \ fsa_matrix.h fsa_proto.h crmd_utils.h crmd_callbacks.h \ crmd_lrm.h te_callbacks.h tengine.h crmd_CFLAGS = $(CFLAGS_HARDENED_EXE) crmd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) crmd_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/transition/libtransitioner.la \ $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(CLUSTERLIBS) crmd_SOURCES = main.c crmd.c corosync.c notify.c \ - fsa.c control.c messages.c membership.c callbacks.c \ + fsa.c control.c messages.c membership.c callbacks.c attrd.c \ election.c join_client.c join_dc.c subsystems.c throttle.c \ cib.c pengine.c tengine.c lrm.c lrm_state.c remote_lrmd_ra.c \ utils.c misc.c te_events.c te_actions.c te_utils.c te_callbacks.c if BUILD_HEARTBEAT_SUPPORT crmd_SOURCES += heartbeat.c endif if BUILD_XML_HELP man7_MANS = crmd.7 endif CLEANFILES = $(man7_MANS) diff --git a/crmd/attrd.c b/crmd/attrd.c new file mode 100644 index 0000000000..ded56a6231 --- /dev/null +++ b/crmd/attrd.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2006-2017 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include + +#include +#include +#include + +crm_ipc_t *attrd_ipc = NULL; + +#if !HAVE_ATOMIC_ATTRD +static int +update_without_attrd(const char *host_uuid, const char *name, const char *value, + const char *user_name, gboolean is_remote_node, + char command) +{ + int call_opt = cib_none; + + if (fsa_cib_conn == NULL) { + return -1; + } + + call_opt = crmd_cib_smart_opt(); + + if (command == 'C') { + erase_status_tag(host_uuid, XML_TAG_TRANSIENT_NODEATTRS, call_opt); + return pcmk_ok; + } + + crm_trace("updating status for host_uuid %s, %s=%s", + host_uuid, (name? name : ""), (value? value : "")); + if (value) { + return update_attr_delegate(fsa_cib_conn, call_opt, XML_CIB_TAG_STATUS, + host_uuid, NULL, NULL, NULL, name, value, + FALSE, user_name, + (is_remote_node? "remote" : NULL)); + } else { + return delete_attr_delegate(fsa_cib_conn, call_opt, XML_CIB_TAG_STATUS, + host_uuid, NULL, NULL, NULL, name, NULL, + FALSE, user_name); + } +} +#endif + +static void +log_attrd_error(const char *host, const char *name, const char *value, + gboolean is_remote, char command, int rc) +{ + const char *display_command; /* for commands without name/value */ + const char *node_type = (is_remote? "Pacemaker Remote" : "cluster"); + gboolean shutting_down = is_set(fsa_input_register, R_SHUTDOWN); + const char *when = (shutting_down? " at shutdown" : ""); + + switch (command) { + case 'R': + display_command = "refresh"; + break; + case 'C': + display_command = "purge"; + break; + default: + display_command = NULL; + } + + if (display_command) { + crm_err("Could not request %s of %s node %s%s: %s (%d)", + display_command, node_type, host, when, pcmk_strerror(rc), rc); + } else { + crm_err("Could not request update of %s=%s for %s node %s%s: %s (%d)", + name, value, node_type, host, when, pcmk_strerror(rc), rc); + } + + /* If we can't request shutdown via attribute, fast-track it */ + if ((command == 'U') && shutting_down) { + register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL); + } +} + +static void +update_attrd_helper(const char *host, const char *name, const char *value, + const char *user_name, gboolean is_remote_node, + char command) +{ + gboolean rc; + int max = 5; + int attrd_opts = attrd_opt_none; + + if (is_remote_node) { +#if HAVE_ATOMIC_ATTRD + attrd_opts |= attrd_opt_remote; +#else + /* Talk directly to cib for remote nodes if it's legacy attrd */ + int rc; + + /* host is required for updating a remote node */ + CRM_CHECK(host != NULL, return;); + + /* remote node uname and uuid are equal */ + rc = update_without_attrd(host, name, value, user_name, is_remote_node, + command); + if (rc < pcmk_ok) { + log_attrd_error(host, name, value, is_remote_node, command, rc); + } + return; +#endif + } + + if (attrd_ipc == NULL) { + attrd_ipc = crm_ipc_new(T_ATTRD, 0); + } + + do { + if (crm_ipc_connected(attrd_ipc) == FALSE) { + crm_ipc_close(attrd_ipc); + crm_info("Connecting to attribute manager ... %d retries remaining", + max); + if (crm_ipc_connect(attrd_ipc) == FALSE) { + crm_perror(LOG_INFO, "Connection to attribute manager failed"); + } + } + + rc = attrd_update_delegate(attrd_ipc, command, host, name, value, + XML_CIB_TAG_STATUS, NULL, NULL, user_name, + attrd_opts); + if (rc == pcmk_ok) { + break; + + } else if (rc != -EAGAIN && rc != -EALREADY) { + crm_info("Disconnecting from attribute manager: %s (%d)", + pcmk_strerror(rc), rc); + crm_ipc_close(attrd_ipc); + } + + sleep(5 - max); + + } while (max--); + + if (rc != pcmk_ok) { + log_attrd_error(host, name, value, is_remote_node, command, rc); + } +} + +void +update_attrd(const char *host, const char *name, const char *value, + const char *user_name, gboolean is_remote_node) +{ + update_attrd_helper(host, name, value, user_name, is_remote_node, 'U'); +} + +void +update_attrd_remote_node_removed(const char *host, const char *user_name) +{ + crm_trace("Asking attrd to purge Pacemaker Remote node %s", host); + update_attrd_helper(host, NULL, NULL, user_name, TRUE, 'C'); +} diff --git a/crmd/lrm.c b/crmd/lrm.c index 9068652e48..9075fa715d 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,2531 +1,2532 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 #define s_if_plural(i) (((i) == 1)? "" : "s") struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } } static char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; op_id = calloc(1, strlen(rsc) + 34); if (op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval == existing->interval) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.class); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.class = strdup(rsc->class); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval > 0) { crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* We must store failed monitors here * - otherwise the block below will cause them to be forgetten them when a stop happens */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: %s_%s_%d", g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); CRM_ASSERT(op != NULL); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } void lrm_op_callback(lrmd_event_data_t * op) { const char *nodename = NULL; lrm_state_t *lrm_state = NULL; CRM_CHECK(op != NULL, return); /* determine the node name for this connection. */ nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname; if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) { /* if this is the local lrmd ipc connection, set the right bits in the * crmd when the connection goes down */ lrm_connection_destroy(); return; } else if (op->type != lrmd_event_exec_complete) { /* we only need to process execution results */ return; } lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local lrmd connections. Remote connections are handled as * resources within the pengine. Connecting and disconnecting from remote lrmd instances * handled differently than the local. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the LRM"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state); crm_notice("Disconnected from the LRM"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the LRM"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the LRM %d time%s (%d max)", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS); crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to connect to the LRM the max allowed %d time%s", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("LRM connection established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, s_if_plural(removed), when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending LRM operation%s at %s", counter, s_if_plural(counter), when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } GHashTable *metadata_hash = NULL; static char * get_rsc_metadata(const char *type, const char *rclass, const char *provider, bool force) { int rc = pcmk_ok; int len = 0; char *key = NULL; char *metadata = NULL; /* Always use a local connection for this operation */ lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname); CRM_CHECK(type != NULL, return NULL); CRM_CHECK(rclass != NULL, return NULL); CRM_CHECK(lrm_state != NULL, return NULL); if (provider == NULL) { provider = "heartbeat"; } if (metadata_hash == NULL) { metadata_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } len = strlen(type) + strlen(rclass) + strlen(provider) + 4; key = malloc(len); if(key == NULL) { return NULL; } snprintf(key, len, "%s::%s:%s", rclass, provider, type); if(force == FALSE) { metadata = g_hash_table_lookup(metadata_hash, key); if (metadata) { crm_trace("Retrieved cached metadata for %s", key); } } if(metadata == NULL) { rc = lrm_state_get_metadata(lrm_state, rclass, provider, type, &metadata, 0); if(rc == pcmk_ok) { crm_trace("Retrieved live metadata for %s", key); CRM_LOG_ASSERT(metadata != NULL); g_hash_table_insert(metadata_hash, key, metadata); key = NULL; } else { crm_trace("No metadata found for %s: %s" CRM_XS " rc=%d", key, pcmk_strerror(rc), rc); CRM_CHECK(metadata == NULL, metadata = NULL); } } free(key); return metadata; } static char * build_parameter_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode *result, const char *criteria, bool target, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; xmlNode *param = NULL; xmlNode *params = NULL; const char *secure_terms[] = { "password", "passwd", "user", }; if(safe_str_eq("private", criteria)) { /* It will take time for the agents to be updated * Check for some common terms */ max = DIMOF(secure_terms); } params = find_xml_node(metadata, "parameters", TRUE); for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) { if (crm_str_eq((const char *)param->name, "parameter", TRUE)) { bool accept = FALSE; const char *name = crm_element_value(param, "name"); const char *value = crm_element_value(param, criteria); if(max && value) { /* Turn off the compatibility logic once an agent has been updated to know about 'private' */ max = 0; } if (name == NULL) { crm_err("Invalid parameter in %s metadata", op->rsc_id); } else if(target == crm_is_true(value)) { accept = TRUE; } else if(max) { int lpc = 0; bool found = FALSE; for(lpc = 0; found == FALSE && lpc < max; lpc++) { if(safe_str_eq(secure_terms[lpc], name)) { found = TRUE; } } if(found == target) { accept = TRUE; } } if(accept) { int start = len; crm_trace("Attr %s is %s%s", name, target?"":"not ", criteria); len += strlen(name) + 2; list = realloc_safe(list, len + 1); sprintf(list + start, " %s ", name); } else { crm_trace("Rejecting %s for %s", name, criteria); } if(invert_for_xml) { crm_trace("Inverting %s match for %s xml", name, criteria); accept = !accept; } if(result && accept) { value = g_hash_table_lookup(op->params, name); if(value != NULL) { char *summary = crm_versioned_param_summary(op->versioned_params, name); if (summary) { crm_trace("Adding attr %s=%s to the xml result", name, summary); crm_xml_add(result, name, summary); free(summary); } else { crm_trace("Adding attr %s=%s to the xml result", name, value); crm_xml_add(result, name, value); } } } } } return list; } static bool resource_supports_action(xmlNode *metadata, const char *name) { const char *value = NULL; xmlNode *action = NULL; xmlNode *actions = NULL; actions = find_xml_node(metadata, "actions", TRUE); for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) { if (crm_str_eq((const char *)action->name, "action", TRUE)) { value = crm_element_value(action, "name"); if (safe_str_eq(name, value)) { return TRUE; } } } return FALSE; } static void append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval > 0) { /* monitors are not reloadable */ return; } if(resource_supports_action(metadata, "reload")) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Any parameters with unique="1" should be added into the "op-force-restart" list. */ list = build_parameter_list(op, metadata, restart, "unique", TRUE, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, "private", TRUE, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; xmlNode *metadata = NULL; const char *m_string = NULL; const char *caller_version = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if (rsc == NULL || op->params == NULL || crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) { /* Stopped resources don't need the digest logic */ crm_trace("No digests needed for %s %p %p %s", op->rsc_id, op->params, rsc, op->op_type); return TRUE; } m_string = get_rsc_metadata(rsc->type, rsc->class, rsc->provider, safe_str_eq(op->op_type, RSC_START)); if(m_string == NULL) { crm_err("No metadata for %s::%s:%s", rsc->class, rsc->provider, rsc->type); return TRUE; } metadata = string2xml(m_string); if(metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->class, rsc->provider, rsc->type); return TRUE; } crm_trace("Including additional digests for %s::%s:%s", rsc->class, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); free_xml(metadata); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current state of the LRM"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); xmlNode *xml_state; if (!lrm_state) { crm_err("Could not query lrm state for lrmd node %s", node_name); return NULL; } xml_state = do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); /* In case this function is called to generate a join confirmation to * send to the DC, force the current and expected join state to member. * This isn't necessary for newer DCs but is backward compatible. */ crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); return xml_state; } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not"); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); CRM_ASSERT(op != NULL); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int max = 0; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1; rsc_xpath = calloc(1, max); snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" static void delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; if (op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { int max = 0; char *op_xpath = NULL; if (call_id > 0) { max = strlen(op_call_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) + 10; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_call_template, lrm_state->node_name, rsc_id, key, call_id); } else { max = strlen(op_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) + 1; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_template, lrm_state->node_name, rsc_id, key); } crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } void lrm_clear_last_failure(const char *rsc_id, const char *node_name) { char *attr = NULL; GHashTableIter iter; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; rsc_history_t *entry = NULL; attr = generate_op_key(rsc_id, "last_failure", 0); /* This clears last failure for every lrm state that has this rsc.*/ for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (node_name != NULL) { if (strcmp(node_name, lrm_state->node_name) != 0) { /* filter by node_name if node_name is present */ continue; } } delete_op_entry(lrm_state, NULL, rsc_id, attr, 0); if (!lrm_state->resource_history) { continue; } g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { if (crm_str_eq(rsc_id, entry->id, TRUE)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } free(attr); g_list_free(lrm_state_list); } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } static lrmd_rsc_info_t * get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create) { lrmd_rsc_info_t *rsc = NULL; const char *id = ID(resource); const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_trace("Retrieving %s from the LRM.", id); CRM_CHECK(id != NULL, return NULL); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc && long_id) { rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0); } if (!rsc && do_create) { CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_trace("Adding rsc %s before operation", id); lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name); /* only register this as a internal error if this involves the local * lrmd. Otherwise we're likely dealing with an unresponsive remote-node * which is not a FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } return rsc; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* And finally, _delete_ the value in attrd * Setting it to FALSE results in the PE sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if(xml_rsc == NULL) { /* @TODO Should we do something else, like direct ack? */ crm_info("Skipping %s=%d on %s (%p): no resource", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Skipping %s=%d on %s (%p): no operation", crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); CRM_ASSERT(op != NULL); op->call_id = get_fake_call_id(lrm_state, op->rsc_id); if(safe_str_eq(operation, RSC_NOTIFY)) { /* Notifications can't fail yet */ op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = rc; } op->t_run = time(NULL); op->t_rcchange = op->t_run; crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state); if(lrm_state) { process_lrm_event(lrm_state, op, NULL); } else { lrmd_rsc_info_t rsc; rsc.id = strdup(op->rsc_id); rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); do_update_resource(target_node, &rsc, op); free(rsc.id); free(rsc.type); free(rsc.class); free(rsc.provider); } lrmd_free_event(op); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean create_rsc = TRUE; lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; gboolean crm_rsc_delete = FALSE; if (input->xml != NULL) { /* Remote node operations are routed here to their remote connections */ target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); } if (target_node == NULL) { target_node = fsa_our_uname; } else if (safe_str_neq(target_node, fsa_our_uname)) { is_remote_node = TRUE; } lrm_state = lrm_state_find(target_node); if (lrm_state == NULL && is_remote_node) { crm_err("Failing action because remote node %s has no connection to cluster node %s", target_node, fsa_our_uname); /* The action must be recorded here and in the CIB as failed */ synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("LRM command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("LRM command from: %s", from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { /* remember this delete op came from crm_resource */ crm_rsc_delete = TRUE; operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { operation = CRM_OP_LRM_REFRESH; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The lrmd can not fail a resource, it does not understand the * concept of success or failure in relation to a resource, it simply * executes operations and reports the results. We determine what a failure is. * Because of this, if we want to fail a resource we have to fake what we * understand a failure to look like. * * To do this we create a fake lrmd operation event for the resource * we want to fail. We then pass that event to the lrmd client callback * so it will be processed as if it actually came from the lrmd. */ op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon"); CRM_ASSERT(op != NULL); free((char *)op->user_data); op->user_data = NULL; op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->interval = 0; op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(input->msg, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local LRM refresh: call=%d", rc); - if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) { + if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(input->msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); - if(strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0 - && strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0) { + if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys) + && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) { + xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } if(lrm_state_is_connected(lrm_state) == FALSE) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc == NULL && create_rsc) { crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "bad input"); /* if the operation couldn't complete because we can't register * the resource, return a generic error */ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); } else if (rsc == NULL) { crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); /* Deleting something that does not exist is a success */ send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, from_host, from_sys); } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; gboolean in_progress = FALSE; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); op_interval = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id ? call_id : "NA"); call = crm_parse_int(call_id, "0"); if (call == 0) { /* the normal case when the PE cancels a recurring op */ in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } /* Acknowledge the cancellation operation if it's for a remote connection resource */ if (in_progress == FALSE || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } delete_op_entry(lrm_state, NULL, rsc->id, op_key, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, pcmk_strerror(cib_rc)); op = construct_op(lrm_state, input->xml, rsc->id, operation); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); lrmd_free_rsc_info(rsc); return; } #endif if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; GHashTable *params = NULL; xmlNode *versioned_params = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id != NULL); op = calloc(1, sizeof(lrmd_event_data_t)); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); if (!is_remote_lrmd_ra(NULL, NULL, rsc_id)) { xmlNode *ptr = first_named_child(rsc_op, XML_TAG_VER_ATTRS); if (ptr) { versioned_params = copy_xml(ptr); } } op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL); op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); if (safe_str_neq(operation, RSC_STOP)) { op->params = params; op->versioned_params = versioned_params; } else { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; op->versioned_params = versioned_params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; op->versioned_params = NULL; free_xml(versioned_params); } } if (op->versioned_params) { char *versioned_params_text = dump_xml_unformatted(op->versioned_params); if (versioned_params_text) { g_hash_table_insert(op->params, strdup("#" XML_TAG_VER_ATTRS), versioned_params_text); } } /* sanity */ if (op->interval < 0) { op->interval = 0; } if (op->timeout <= 0) { op->timeout = op->interval; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %d", op->interval); op->interval = 0; } } crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { CRM_CHECK(node_name != NULL, return); CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); if (op->op_type == NULL || safe_str_eq(op->op_type, CRMD_ACTION_CANCEL) || safe_str_eq(op->op_type, CRMD_ACTION_DELETE)) { return; } if (op->params == NULL) { return; } else { const char *record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if (record_pending == NULL || crm_is_true(record_pending) == FALSE) { return; } } op->call_id = -1; op->op_status = PCMK_LRM_OP_PENDING; op->rc = PCMK_OCF_UNKNOWN; op->t_run = time(NULL); op->t_rcchange = op->t_run; /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op %s_%s_%d on %s in the CIB", op->rsc_id, op->op_type, op->interval, node_name); do_update_resource(node_name, rsc, op); } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; bool send_nack = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && op->interval == 0 && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is unexpectedly * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if (op->interval == 0 && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for %s_%s_%d", removed, s_if_plural(removed), rsc->id, operation, op->interval); } } /* now do the op */ crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); send_nack = TRUE; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { send_nack = TRUE; } if(send_nack) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), is_set(fsa_input_register, R_SHUTDOWN)?"true":"false"); op->rc = CRM_DIRECT_NACK_RC; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } record_pending_op(lrm_state->node_name, rsc, op); op_id = generate_op_key(rsc->id, op->op_type, op->interval); if (op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); op->call_id = get_fake_call_id(lrm_state, rsc->id); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; process_lrm_event(lrm_state, op, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = strdup(op->user_data); g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%d on %s", rc, op->op_type, op->interval, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); if(pending == NULL) { remove = TRUE; pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (safe_str_eq(op->op_type, RSC_NOTIFY)) { /* Keep notify ops out of the CIB */ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { update_id = do_update_resource(lrm_state->node_name, rsc, op); } } else if (op->interval == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* We don't need to do anything for cancelled ops * that are not in our pending op list. There are no * transition actions waiting on these operations. */ } else if (op->user_data == NULL) { /* At this point we have a pending entry, but no transition * key present in the user_data field. report this */ crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ delete_op_entry(lrm_state, op, op->rsc_id, op_key, op->call_id); } else if (pending && op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine * will continue waiting for this update to occur until it is timed out. * We don't want this update going to the cib though, so use a direct ack. */ crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) { removed = TRUE; g_hash_table_remove(lrm_state->pending_ops, op_id); } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: crm_info("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false")); break; case PCMK_LRM_OP_DONE: do_crm_log(op->interval?LOG_INFO:LOG_NOTICE, "Result of %s operation for %s on %s: %d (%s) " CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, op->rc, services_ocf_exitcode_str(op->rc), op->call_id, op_key, (removed? "true" : "false"), update_id); break; case PCMK_LRM_OP_TIMEOUT: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s timeout=%dms", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, op->timeout); break; default: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } crmd_notify_resource_op(lrm_state->node_name, op); if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(lrm_state, rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/crmd/messages.c b/crmd/messages.c index 990a7a1893..2f285f03a7 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -1,1050 +1,1050 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); extern crm_ipc_t *attrd_ipc; void handle_response(xmlNode * stored_msg); enum crmd_fsa_input handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause); enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg); #define ROUTER_RESULT(x) crm_trace("Router result: %s", x) /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ crm_info("Resetting the current action list"); fsa_dump_actions(fsa_actions, "Drop"); fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } int register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; CRM_CHECK(raised_from != NULL, raised_from = ""); if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return 0; } if (input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", raised_from, fsa_cause2string(cause), data, old_len); if (old_len > 0) { fsa_dump_queue(LOG_TRACE); prepend = FALSE; } if (data == NULL) { fsa_actions |= with_actions; fsa_dump_actions(with_actions, "Restored"); return 0; } /* Store everything in the new event and reset fsa_actions */ with_actions |= fsa_actions; fsa_actions = A_NOTHING; } last_data_id++; crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data", raised_from, prepend ? "prepended" : "appended", last_data_id, fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without"); fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16llx to input", with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_trace("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); crmd_exit(pcmk_err_generic); break; } crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if (prepend) { crm_trace("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_trace("Queue len: %d", g_list_length(fsa_message_queue)); /* fsa_dump_queue(LOG_DEBUG_2); */ if (old_len == g_list_length(fsa_message_queue)) { crm_err("Couldn't add message to the queue"); } if (fsa_source && input != I_WAIT_FOR_EVENT) { crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } return last_data_id; } void fsa_dump_queue(int log_level) { int offset = 0; GListPtr lpc = NULL; for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) { fsa_data_t *data = (fsa_data_t *) lpc->data; do_crm_log_unlikely(log_level, "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, data->data, data->data_type, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { ha_msg_input_t *copy = NULL; xmlNodePtr data = NULL; if (orig != NULL) { crm_trace("Copy msg"); data = copy_xml(orig->msg); } else { crm_trace("No message to copy"); } copy = new_ha_msg_input(data); if (orig && orig->msg != NULL) { CRM_CHECK(copy->msg != NULL, crm_err("copy failed")); } return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Don't know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); crmd_exit(pcmk_err_generic); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input, cause); /* done or process later? */ switch (result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { int dest = 1; int is_for_dc = 0; int is_for_dcib = 0; int is_for_te = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); const char *msg_error = NULL; crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE)); if (msg == NULL) { msg_error = "Cannot route empty message"; } else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) { /* quietly ignore */ processing_complete = TRUE; } else if (safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if (sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if (msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_xml_warn(msg, "bad msg"); } if (processing_complete) { return TRUE; } processing_complete = TRUE; is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if (host_to == NULL || strlen(host_to) == 0) { if (is_for_dc || is_for_te) { is_local = 0; } else if (is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if (safe_str_eq(fsa_our_uname, host_to)) { is_local = 1; } if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC && is_for_te) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else if (AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay to DC"); send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); } } else if (is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else { crm_node_t *node_to = NULL; #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); if (dest == crm_msg_none || dest > crm_msg_stonith_ng) { dest = crm_msg_crmd; } } #endif if (host_to) { node_to = crm_find_peer(0, host_to); if (node_to == NULL) { crm_err("Cannot route message to unknown node %s", host_to); return TRUE; } } ROUTER_RESULT("Message result: External relay"); send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE); } return processing_complete; } static gboolean process_hello_message(xmlNode * hello, char **client_name, char **major_version, char **minor_version) { const char *local_client_name; const char *local_major_version; const char *local_minor_version; *client_name = NULL; *major_version = NULL; *minor_version = NULL; if (hello == NULL) { return FALSE; } local_client_name = crm_element_value(hello, "client_name"); local_major_version = crm_element_value(hello, "major_version"); local_minor_version = crm_element_value(hello, "minor_version"); if (local_client_name == NULL || strlen(local_client_name) == 0) { crm_err("Hello message was not valid (field %s not found)", "client name"); return FALSE; } else if (local_major_version == NULL || strlen(local_major_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "major version"); return FALSE; } else if (local_minor_version == NULL || strlen(local_minor_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "minor version"); return FALSE; } *client_name = strdup(local_client_name); *major_version = strdup(local_major_version); *minor_version = strdup(local_minor_version); crm_trace("Hello message ok"); return TRUE; } gboolean crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session) { char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; gboolean auth_result = FALSE; xmlNode *xml = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); const char *uuid = curr_client ? curr_client->id : proxy_session; if (uuid == NULL) { crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); return FALSE; } else if (safe_str_neq(CRM_OP_HELLO, op)) { return TRUE; } xml = get_message_xml(client_msg, F_CRM_DATA); auth_result = process_hello_message(xml, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if (client_name == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), uuid); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_trace("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } } if (auth_result == TRUE) { crm_trace("Accepted client %s", client_name); if (curr_client) { curr_client->userdata = strdup(client_name); } crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } else { crm_warn("Rejected client logon request"); if (curr_client) { qb_ipcs_disconnect(curr_client->ipcs); } } free(minor_version); free(major_version); free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(xmlNode * msg, enum crmd_fsa_cause cause) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, F_CRM_MSG_TYPE); if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) { return handle_request(msg, cause); } else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) { handle_response(msg); return I_NULL; } crm_err("Unknown message type: %s", type); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; const char *uname = NULL; gboolean is_remote_node = FALSE; xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR); if (xml_rsc) { rsc = ID(xml_rsc); } uname = crm_element_value(stored_msg, XML_LRM_ATTR_TARGET); if (crm_element_value(stored_msg, XML_LRM_ATTR_ROUTER_NODE)) { is_remote_node = TRUE; } if (rsc) { char *attr = NULL; crm_info("Removing failcount for %s", rsc); - attr = crm_concat("fail-count", rsc, '-'); + attr = crm_failcount_name(rsc); update_attrd(uname, attr, NULL, NULL, is_remote_node); free(attr); - attr = crm_concat("last-failure", rsc, '-'); + attr = crm_lastfailure_name(rsc); update_attrd(uname, attr, NULL, NULL, is_remote_node); free(attr); lrm_clear_last_failure(rsc, uname); } else { crm_log_xml_warn(stored_msg, "invalid failcount op"); } return I_NULL; } /*! * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_remote_state(xmlNode *msg) { const char *remote_uname = ID(msg); const char *remote_is_up = crm_element_value(msg, XML_NODE_IN_CLUSTER); crm_node_t *remote_peer; CRM_CHECK(remote_uname && remote_is_up, return I_NULL); remote_peer = crm_remote_peer_get(remote_uname); CRM_CHECK(remote_peer, return I_NULL); crm_update_peer_state(__FUNCTION__, remote_peer, crm_is_true(remote_is_up)? CRM_NODE_MEMBER : CRM_NODE_LOST, 0); return I_NULL; } enum crmd_fsa_input handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); /* Optimize this for the DC - it has the most to do */ if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); return I_NULL; } if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_node_t *node = crm_find_peer(0, from); crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN); if(AM_I_DC == FALSE) { return I_NULL; /* Done */ } } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting ourselves down (DC)"); return I_STOP; } else if (dc_match) { crm_err("We didn't ask to be shut down, yet our" " TE is telling us to. Better get out now!"); return I_TERMINATE; } else if (fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); return I_ELECTION; } } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ return handle_shutdown_request(stored_msg); } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) { /* a remote connection host is letting us know the node state */ return handle_remote_state(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); } else if (strcmp(op, CRM_OP_THROTTLE) == 0) { throttle_update(stored_msg); if (AM_I_DC && transition_graph != NULL) { if (transition_graph->complete == FALSE) { crm_debug("The throttle changed. Trigger a graph."); trigger_graph(); } } return I_NULL; } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if (fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; #if 0 } else if (AM_I_DC) { /* This is the old way of doing things but what is gained? */ return I_ELECTION; #endif } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0 || strcmp(op, CRM_OP_LRM_FAIL) == 0 || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) { crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*return I_SHUTDOWN; */ return I_NULL; /*========== (NOT_DC)-Only Actions ==========*/ } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (dc_match || fsa_our_dc == NULL) { if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) { crm_err("We didn't ask to be shut down, yet our DC is telling us to."); set_bit(fsa_input_register, R_STAYDOWN); return I_STOP; } crm_info("Shutting down"); return I_STOP; } else { crm_warn("Discarding %s op from %s", op, host_from); } } else if (strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO); xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to); crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); /* Ok, so technically not so interesting, but CTS needs to see this */ crm_notice("Current ping state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg, ping); if (msg) { (void)relay_message(msg, TRUE); } free_xml(ping); free_xml(msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { int id = 0; const char *name = NULL; crm_element_value_int(stored_msg, XML_ATTR_ID, &id); name = crm_element_value(stored_msg, XML_ATTR_UNAME); if(cause == C_IPC_MESSAGE) { msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { crm_err("Could not instruct peers to remove references to node %s/%u", name, id); } else { crm_notice("Instructing peers to remove references to node %s/%u", name, id); } free_xml(msg); } else { reap_crm_member(id, name); } } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } void handle_response(xmlNode * stored_msg) { const char *op = crm_element_value(stored_msg, F_CRM_TASK); if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { /* Check if the PE answer been superseded by a subsequent request? */ const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (safe_str_eq(msg_ref, fsa_pe_ref)) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); crm_trace("Completed: %s...", fsa_pe_ref); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "CRMd"); } } enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/procedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; time_t now = time(NULL); const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = fsa_our_uname; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state)); crm_log_xml_trace(stored_msg, "message"); now_s = crm_itoa(now); update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } /* msg is deleted by the time this returns */ extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data); gboolean send_msg_via_ipc(xmlNode * msg, const char *sys) { gboolean send_ok = TRUE; crm_client_t *client_channel = crm_client_get_by_id(sys); if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ send_ok = crm_ipcs_send(client_channel, 0, msg, crm_ipc_server_event); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) { xmlNode *data = get_message_xml(msg, F_CRM_DATA); process_te_message(msg, data); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; fsa_input.msg = msg; fsa_input.xml = get_message_xml(msg, F_CRM_DATA); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __FUNCTION__; fsa_data.data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE); #endif do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); } else if (sys != NULL && crmd_is_proxy_session(sys)) { crmd_proxy_send(sys, msg); } else { crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } return send_ok; } ha_msg_input_t * new_ha_msg_input(xmlNode * orig) { ha_msg_input_t *input_copy = NULL; input_copy = calloc(1, sizeof(ha_msg_input_t)); input_copy->msg = orig; input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA); return input_copy; } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } free_xml(orig->msg); free(orig); } /*! * \internal * \brief Notify the DC of a remote node state change * * \param[in] node_name Node's name * \param[in] node_up TRUE if node is up, FALSE if down */ void send_remote_state_message(const char *node_name, gboolean node_up) { /* If we don't have a DC, or the message fails, we have a failsafe: * the DC will eventually pick up the change via the CIB node state. * The message allows it to happen sooner if possible. */ if (fsa_our_dc) { xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_info("Notifying DC %s of pacemaker_remote node %s %s", fsa_our_dc, node_name, (node_up? "coming up" : "going down")); crm_xml_add(msg, XML_ATTR_ID, node_name); crm_xml_add_boolean(msg, XML_NODE_IN_CLUSTER, node_up); send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, msg, TRUE); free_xml(msg); } else { crm_debug("No DC to notify of pacemaker_remote node %s %s", node_name, (node_up? "coming up" : "going down")); } } diff --git a/crmd/te_events.c b/crmd/te_events.c index e42e63d288..13801e2724 100644 --- a/crmd/te_events.c +++ b/crmd/te_events.c @@ -1,535 +1,535 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include char *failed_stop_offset = NULL; char *failed_start_offset = NULL; gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node) { const char *target_uuid = NULL; const char *router = NULL; const char *router_uuid = NULL; xmlNode *last_action = NULL; GListPtr gIter = NULL; GListPtr gIter2 = NULL; if (graph == NULL || graph->complete) { return FALSE; } gIter = graph->synapses; for (; gIter != NULL; gIter = gIter->next) { synapse_t *synapse = (synapse_t *) gIter->data; if (synapse->confirmed || synapse->failed) { /* We've already been here */ continue; } gIter2 = synapse->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { crm_action_t *action = (crm_action_t *) gIter2->data; if (action->type == action_type_pseudo || action->confirmed) { continue; } else if (action->type == action_type_crm) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRM_OP_FENCE)) { continue; } } target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (router) { crm_node_t *node = crm_get_peer(0, router); if (node) { router_uuid = node->uuid; } } if (safe_str_eq(target_uuid, down_node) || safe_str_eq(router_uuid, down_node)) { action->failed = TRUE; synapse->failed = TRUE; last_action = action->xml; stop_te_timer(action->timer); update_graph(graph, action); if (synapse->executed) { crm_notice("Action %d (%s) was pending on %s (offline)", action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); } else { crm_info("Action %d (%s) is scheduled for %s (offline)", action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); } } } } if (last_action != NULL) { crm_info("Node %s shutdown resulted in un-runnable actions", down_node); abort_transition(INFINITY, tg_restart, "Node failure", last_action); return TRUE; } return FALSE; } /*! * \internal * \brief Update failure-related node attributes if warranted * * \param[in] event XML describing operation that (maybe) failed * \param[in] event_node_uuid Node that event occurred on * \param[in] rc Actual operation return code * \param[in] target_rc Expected operation return code * \param[in] do_update If TRUE, do update regardless of operation type * \param[in] ignore_failures If TRUE, update last failure but not fail count * * \return TRUE if this was not a direct nack, success or lrm status refresh */ static gboolean update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update, gboolean ignore_failures) { int interval = 0; char *task = NULL; char *rsc_id = NULL; const char *value = NULL; const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); const char *on_uname = crm_peer_uname(event_node_uuid); const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); /* Nothing needs to be done for success, lrm status refresh, * or direct nack (internal code for "busy, try again") */ if ((rc == CRM_DIRECT_NACK_RC) || (rc == target_rc)) { return FALSE; } else if (safe_str_eq(origin, "build_active_RAs")) { crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", id, rc, on_uname); return FALSE; } /* Sanity check */ CRM_CHECK(on_uname != NULL, return TRUE); CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); goto bail); CRM_CHECK(task != NULL, goto bail); CRM_CHECK(rsc_id != NULL, goto bail); /* Decide whether update is necessary and what value to use */ if ((interval > 0) || safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { do_update = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_START)) { do_update = TRUE; if (failed_start_offset == NULL) { failed_start_offset = strdup(INFINITY_S); } value = failed_start_offset; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { do_update = TRUE; if (failed_stop_offset == NULL) { failed_stop_offset = strdup(INFINITY_S); } value = failed_stop_offset; } /* Fail count will be either incremented or set to infinity */ if (value == NULL || safe_str_neq(value, INFINITY_S)) { value = XML_NVPAIR_ATTR_VALUE "++"; } if (do_update) { char *now = crm_itoa(time(NULL)); char *attr_name = NULL; gboolean is_remote_node = FALSE; if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) { is_remote_node = TRUE; } crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)", (ignore_failures? "last failure" : "failcount"), rsc_id, on_uname, task, rc, value, now); /* Update the fail count, if we're not ignoring failures */ if (!ignore_failures) { - attr_name = crm_concat("fail-count", rsc_id, '-'); + attr_name = crm_failcount_name(rsc_id); update_attrd(on_uname, attr_name, value, NULL, is_remote_node); free(attr_name); } /* Update the last failure time (even if we're ignoring failures, * so that failure can still be detected and shown, e.g. by crm_mon) */ - attr_name = crm_concat("last-failure", rsc_id, '-'); + attr_name = crm_lastfailure_name(rsc_id); update_attrd(on_uname, attr_name, now, NULL, is_remote_node); free(attr_name); free(now); } bail: free(rsc_id); free(task); return TRUE; } /*! * \internal * \brief Return simplified operation status based on operation return code * * \param[in] action CRM action instance of operation * \param[in] orig_status Original reported operation status * \param[in] rc Actual operation return code * \param[in] target_rc Expected operation return code * * \return PCMK_LRM_OP_DONE if rc equals target_rc, PCMK_LRM_OP_ERROR otherwise * * \note This assumes that PCMK_LRM_OP_PENDING operations have already been * filtered (otherwise they will get simplified as well). */ static int status_from_rc(crm_action_t * action, int orig_status, int rc, int target_rc) { if (target_rc == rc) { crm_trace("Target rc: == %d", rc); if (orig_status != PCMK_LRM_OP_DONE) { crm_trace("Re-mapping op status to PCMK_LRM_OP_DONE for rc=%d", rc); } return PCMK_LRM_OP_DONE; } if (rc != CRM_DIRECT_NACK_RC) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s", action->id, task, uname, target_rc, rc, services_lrm_status_str(PCMK_LRM_OP_ERROR)); } return PCMK_LRM_OP_ERROR; } /*! * \internal * \brief Confirm action and update transition graph, aborting transition on failures * * \param[in/out] action CRM action instance of this operation * \param[in] event Event instance of this operation * \param[in] orig_status Original reported operation status * \param[in] op_rc Actual operation return code * \param[in] target_rc Expected operation return code * \param[in] ignore_failures Whether to ignore operation failures * * \note This assumes that PCMK_LRM_OP_PENDING operations have already been * filtered (otherwise they may be treated as failures). */ static void match_graph_event(crm_action_t *action, xmlNode *event, int op_status, int op_rc, int target_rc, gboolean ignore_failures) { const char *target = NULL; const char *this_event = NULL; const char *ignore_s = ""; /* Remap operation status based on return code */ op_status = status_from_rc(action, op_status, op_rc, target_rc); /* Process OP status */ switch (op_status) { case PCMK_LRM_OP_DONE: break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: if (ignore_failures) { ignore_s = ", ignoring failure"; } else { action->failed = TRUE; } break; case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Don't know what to do for cancelled ops yet"); break; default: /* PCMK_LRM_OP_ERROR_HARD, PCMK_LRM_OP_ERROR_FATAL, PCMK_LRM_OP_NOT_INSTALLED */ action->failed = TRUE; crm_err("Unsupported action result: %d", op_status); } /* stop this event's timer if it had one */ stop_te_timer(action->timer); te_action_confirmed(action); update_graph(transition_graph, action); trigger_graph(); if (action->failed) { abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event); } this_event = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); crm_info("Action %s (%d) confirmed on %s (rc=%d%s)", crm_str(this_event), action->id, crm_str(target), op_rc, ignore_s); } crm_action_t * get_action(int id, gboolean confirmed) { GListPtr gIter = NULL; GListPtr gIter2 = NULL; gIter = transition_graph->synapses; for (; gIter != NULL; gIter = gIter->next) { synapse_t *synapse = (synapse_t *) gIter->data; gIter2 = synapse->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { crm_action_t *action = (crm_action_t *) gIter2->data; if (action->id == id) { if (confirmed) { stop_te_timer(action->timer); te_action_confirmed(action); } return action; } } } return NULL; } crm_action_t * get_cancel_action(const char *id, const char *node) { GListPtr gIter = NULL; GListPtr gIter2 = NULL; gIter = transition_graph->synapses; for (; gIter != NULL; gIter = gIter->next) { synapse_t *synapse = (synapse_t *) gIter->data; gIter2 = synapse->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { const char *task = NULL; const char *target = NULL; crm_action_t *action = (crm_action_t *) gIter2->data; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (safe_str_neq(CRMD_ACTION_CANCEL, task)) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); if (safe_str_neq(task, id)) { crm_trace("Wrong key %s for %s on %s", task, id, node); continue; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); if (node && safe_str_neq(target, node)) { crm_trace("Wrong node %s for %s on %s", target, id, node); continue; } crm_trace("Found %s on %s", id, node); return action; } } return NULL; } /* downed nodes are listed like: ... */ #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \ "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']" /*! * \brief Find a transition event that would have made a specified node down * * \param[in] target UUID of node to match * \param[in] quiet If FALSE, log a warning if no match found * * \return Matching event if found, NULL otherwise */ crm_action_t * match_down_event(const char *target, bool quiet) { crm_action_t *match = NULL; xmlXPathObjectPtr xpath_ret = NULL; GListPtr gIter, gIter2; char *xpath = crm_strdup_printf(XPATH_DOWNED, target); for (gIter = transition_graph->synapses; gIter != NULL && match == NULL; gIter = gIter->next) { for (gIter2 = ((synapse_t*)gIter->data)->actions; gIter2 != NULL && match == NULL; gIter2 = gIter2->next) { match = (crm_action_t*)gIter2->data; xpath_ret = xpath_search(match->xml, xpath); if (numXpathResults(xpath_ret) < 1) { match = NULL; } freeXpathObject(xpath_ret); } } free(xpath); if (match != NULL) { crm_debug("Shutdown action found for node %s: action %d (%s)", target, match->id, crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY)); } else if(quiet == FALSE) { crm_warn("No reason to expect node %s to be down", target); } return match; } gboolean process_graph_event(xmlNode * event, const char *event_node) { int rc = -1; int status = -1; int callid = -1; int action_num = -1; crm_action_t *action = NULL; int target_rc = -1; int transition_num = -1; char *update_te_uuid = NULL; gboolean stop_early = FALSE; gboolean ignore_failures = FALSE; const char *id = NULL; const char *desc = NULL; const char *magic = NULL; CRM_ASSERT(event != NULL); /* */ id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); crm_element_value_int(event, XML_LRM_ATTR_RC, &rc); crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid); magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY); if (magic == NULL) { /* non-change */ return FALSE; } if (decode_transition_key(magic, &update_te_uuid, &transition_num, &action_num, &target_rc) == FALSE) { crm_err("Invalid event %s.%d detected: %s", id, callid, magic); abort_transition(INFINITY, tg_restart, "Bad event", event); return FALSE; } if (status == PCMK_LRM_OP_PENDING) { goto bail; } if (transition_num == -1) { desc = "initiated outside of the cluster"; abort_transition(INFINITY, tg_restart, "Unexpected event", event); } else if ((action_num < 0) || (crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE)) { desc = "initiated by a different node"; abort_transition(INFINITY, tg_restart, "Foreign event", event); stop_early = TRUE; /* This could be an lrm status refresh */ } else if (transition_graph->id != transition_num) { desc = "arrived really late"; abort_transition(INFINITY, tg_restart, "Old event", event); stop_early = TRUE; /* This could be an lrm status refresh */ } else if (transition_graph->complete) { desc = "arrived late"; abort_transition(INFINITY, tg_restart, "Inactive graph", event); } else { action = get_action(action_num, FALSE); if (action == NULL) { desc = "unknown"; abort_transition(INFINITY, tg_restart, "Unknown event", event); } else { ignore_failures = safe_str_eq( crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore"); match_graph_event(action, event, status, rc, target_rc, ignore_failures); } } if (action && (rc == target_rc)) { crm_trace("Processed update to %s: %s", id, magic); } else { if (update_failcount(event, event_node, rc, target_rc, (transition_num == -1), ignore_failures)) { /* Turns out this wasn't an lrm status refresh update afterall */ stop_early = FALSE; desc = "failed"; } crm_info("Detected action (%d.%d) %s.%d=%s: %s", transition_num, action_num, id, callid, services_ocf_exitcode_str(rc), desc); } bail: free(update_te_uuid); return stop_early; } diff --git a/crmd/utils.c b/crmd/utils.c index 08579a5cfc..07c323d2fa 100644 --- a/crmd/utils.c +++ b/crmd/utils.c @@ -1,1163 +1,1019 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include -#include -#include -#include -#include - #include #include -#include - #include #include - #include #include #include #include /* A_DC_TIMER_STOP, A_DC_TIMER_START, * A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START * A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START */ void do_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean timer_op_ok = TRUE; if (action & A_DC_TIMER_STOP) { timer_op_ok = crm_timer_stop(election_trigger); } else if (action & A_FINALIZE_TIMER_STOP) { timer_op_ok = crm_timer_stop(finalization_timer); } else if (action & A_INTEGRATE_TIMER_STOP) { timer_op_ok = crm_timer_stop(integration_timer); /* } else if(action & A_ELECTION_TIMEOUT_STOP) { */ /* timer_op_ok = crm_timer_stop(election_timeout); */ } /* don't start a timer that wasn't already running */ if (action & A_DC_TIMER_START && timer_op_ok) { crm_timer_start(election_trigger); if (AM_I_DC) { /* there can be only one */ register_fsa_input(cause, I_ELECTION, NULL); } } else if (action & A_FINALIZE_TIMER_START) { crm_timer_start(finalization_timer); } else if (action & A_INTEGRATE_TIMER_START) { crm_timer_start(integration_timer); /* } else if(action & A_ELECTION_TIMEOUT_START) { */ /* crm_timer_start(election_timeout); */ } } const char * get_timer_desc(fsa_timer_t * timer) { if (timer == election_trigger) { return "Election Trigger"; } else if (timer == shutdown_escalation_timer) { return "Shutdown Escalation"; } else if (timer == integration_timer) { return "Integration Timer"; } else if (timer == finalization_timer) { return "Finalization Timer"; } else if (timer == transition_timer) { return "New Transition Timer"; } else if (timer == wait_timer) { return "Wait Timer"; } else if (timer == recheck_timer) { return "PEngine Recheck Timer"; } return "Unknown Timer"; } gboolean crm_timer_popped(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *) data; if (timer == wait_timer || timer == recheck_timer || timer == transition_timer || timer == finalization_timer || timer == election_trigger) { crm_info("%s (%s) just popped (%dms)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms); timer->counter++; } else { crm_err("%s (%s) just popped in state %s! (%dms)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state), timer->period_ms); } if (timer == election_trigger && election_trigger->counter > 5) { crm_notice("We appear to be in an election loop, something may be wrong"); crm_write_blackbox(0, NULL); election_trigger->counter = 0; } if (timer->repeat == FALSE) { crm_timer_stop(timer); /* make it _not_ go off again */ } if (timer->fsa_input == I_INTEGRATED) { crm_info("Welcomed: %d, Integrated: %d", crmd_join_phase_count(crm_join_welcomed), crmd_join_phase_count(crm_join_integrated)); if (crmd_join_phase_count(crm_join_welcomed) == 0) { /* If we don't even have ourself, start again */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, __FUNCTION__); } else { register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL); } } else if (timer == recheck_timer && fsa_state != S_IDLE) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state)); } else if (timer == finalization_timer && fsa_state != S_FINALIZE_JOIN) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state)); } else if (timer->fsa_input != I_NULL) { register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL); } crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); return TRUE; } gboolean is_timer_started(fsa_timer_t * timer) { if (timer->period_ms > 0) { if (transition_timer->source_id == 0) { return FALSE; } else { return TRUE; } } return FALSE; } gboolean crm_timer_start(fsa_timer_t * timer) { const char *timer_desc = get_timer_desc(timer); if (timer->source_id == 0 && timer->period_ms > 0) { timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer); CRM_ASSERT(timer->source_id != 0); crm_debug("Started %s (%s:%dms), src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); } else if (timer->period_ms < 0) { crm_err("Tried to start %s (%s:%dms) with a -ve period", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms); } else { crm_debug("%s (%s:%dms) already running: src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); return FALSE; } return TRUE; } gboolean crm_timer_stop(fsa_timer_t * timer) { const char *timer_desc = get_timer_desc(timer); if (timer == NULL) { crm_err("Attempted to stop NULL timer"); return FALSE; } else if (timer->source_id != 0) { crm_trace("Stopping %s (%s:%dms), src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_trace("%s (%s:%dms) already stopped", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms); return FALSE; } return TRUE; } const char * fsa_input2string(enum crmd_fsa_input input) { const char *inputAsText = NULL; switch (input) { case I_NULL: inputAsText = "I_NULL"; break; case I_CIB_OP: inputAsText = "I_CIB_OP (unused)"; break; case I_CIB_UPDATE: inputAsText = "I_CIB_UPDATE"; break; case I_DC_TIMEOUT: inputAsText = "I_DC_TIMEOUT"; break; case I_ELECTION: inputAsText = "I_ELECTION"; break; case I_PE_CALC: inputAsText = "I_PE_CALC"; break; case I_RELEASE_DC: inputAsText = "I_RELEASE_DC"; break; case I_ELECTION_DC: inputAsText = "I_ELECTION_DC"; break; case I_ERROR: inputAsText = "I_ERROR"; break; case I_FAIL: inputAsText = "I_FAIL"; break; case I_INTEGRATED: inputAsText = "I_INTEGRATED"; break; case I_FINALIZED: inputAsText = "I_FINALIZED"; break; case I_NODE_JOIN: inputAsText = "I_NODE_JOIN"; break; case I_JOIN_OFFER: inputAsText = "I_JOIN_OFFER"; break; case I_JOIN_REQUEST: inputAsText = "I_JOIN_REQUEST"; break; case I_JOIN_RESULT: inputAsText = "I_JOIN_RESULT"; break; case I_NOT_DC: inputAsText = "I_NOT_DC"; break; case I_RECOVERED: inputAsText = "I_RECOVERED"; break; case I_RELEASE_FAIL: inputAsText = "I_RELEASE_FAIL"; break; case I_RELEASE_SUCCESS: inputAsText = "I_RELEASE_SUCCESS"; break; case I_RESTART: inputAsText = "I_RESTART"; break; case I_PE_SUCCESS: inputAsText = "I_PE_SUCCESS"; break; case I_ROUTER: inputAsText = "I_ROUTER"; break; case I_SHUTDOWN: inputAsText = "I_SHUTDOWN"; break; case I_STARTUP: inputAsText = "I_STARTUP"; break; case I_TE_SUCCESS: inputAsText = "I_TE_SUCCESS"; break; case I_STOP: inputAsText = "I_STOP"; break; case I_DC_HEARTBEAT: inputAsText = "I_DC_HEARTBEAT"; break; case I_WAIT_FOR_EVENT: inputAsText = "I_WAIT_FOR_EVENT"; break; case I_LRM_EVENT: inputAsText = "I_LRM_EVENT"; break; case I_PENDING: inputAsText = "I_PENDING"; break; case I_HALT: inputAsText = "I_HALT"; break; case I_TERMINATE: inputAsText = "I_TERMINATE"; break; case I_ILLEGAL: inputAsText = "I_ILLEGAL"; break; } if (inputAsText == NULL) { crm_err("Input %d is unknown", input); inputAsText = ""; } return inputAsText; } const char * fsa_state2string(enum crmd_fsa_state state) { const char *stateAsText = NULL; switch (state) { case S_IDLE: stateAsText = "S_IDLE"; break; case S_ELECTION: stateAsText = "S_ELECTION"; break; case S_INTEGRATION: stateAsText = "S_INTEGRATION"; break; case S_FINALIZE_JOIN: stateAsText = "S_FINALIZE_JOIN"; break; case S_NOT_DC: stateAsText = "S_NOT_DC"; break; case S_POLICY_ENGINE: stateAsText = "S_POLICY_ENGINE"; break; case S_RECOVERY: stateAsText = "S_RECOVERY"; break; case S_RELEASE_DC: stateAsText = "S_RELEASE_DC"; break; case S_PENDING: stateAsText = "S_PENDING"; break; case S_STOPPING: stateAsText = "S_STOPPING"; break; case S_TERMINATE: stateAsText = "S_TERMINATE"; break; case S_TRANSITION_ENGINE: stateAsText = "S_TRANSITION_ENGINE"; break; case S_STARTING: stateAsText = "S_STARTING"; break; case S_HALT: stateAsText = "S_HALT"; break; case S_ILLEGAL: stateAsText = "S_ILLEGAL"; break; } if (stateAsText == NULL) { crm_err("State %d is unknown", state); stateAsText = ""; } return stateAsText; } const char * fsa_cause2string(enum crmd_fsa_cause cause) { const char *causeAsText = NULL; switch (cause) { case C_UNKNOWN: causeAsText = "C_UNKNOWN"; break; case C_STARTUP: causeAsText = "C_STARTUP"; break; case C_IPC_MESSAGE: causeAsText = "C_IPC_MESSAGE"; break; case C_HA_MESSAGE: causeAsText = "C_HA_MESSAGE"; break; case C_CCM_CALLBACK: causeAsText = "C_CCM_CALLBACK"; break; case C_TIMER_POPPED: causeAsText = "C_TIMER_POPPED"; break; case C_SHUTDOWN: causeAsText = "C_SHUTDOWN"; break; case C_HEARTBEAT_FAILED: causeAsText = "C_HEARTBEAT_FAILED"; break; case C_SUBSYSTEM_CONNECT: causeAsText = "C_SUBSYSTEM_CONNECT"; break; case C_LRM_OP_CALLBACK: causeAsText = "C_LRM_OP_CALLBACK"; break; case C_LRM_MONITOR_CALLBACK: causeAsText = "C_LRM_MONITOR_CALLBACK"; break; case C_CRMD_STATUS_CALLBACK: causeAsText = "C_CRMD_STATUS_CALLBACK"; break; case C_HA_DISCONNECT: causeAsText = "C_HA_DISCONNECT"; break; case C_FSA_INTERNAL: causeAsText = "C_FSA_INTERNAL"; break; case C_ILLEGAL: causeAsText = "C_ILLEGAL"; break; } if (causeAsText == NULL) { crm_err("Cause %d is unknown", cause); causeAsText = ""; } return causeAsText; } const char * fsa_action2string(long long action) { const char *actionAsText = NULL; switch (action) { case A_NOTHING: actionAsText = "A_NOTHING"; break; case A_ELECTION_START: actionAsText = "A_ELECTION_START"; break; case A_DC_JOIN_FINAL: actionAsText = "A_DC_JOIN_FINAL"; break; case A_READCONFIG: actionAsText = "A_READCONFIG"; break; case O_RELEASE: actionAsText = "O_RELEASE"; break; case A_STARTUP: actionAsText = "A_STARTUP"; break; case A_STARTED: actionAsText = "A_STARTED"; break; case A_HA_CONNECT: actionAsText = "A_HA_CONNECT"; break; case A_HA_DISCONNECT: actionAsText = "A_HA_DISCONNECT"; break; case A_LRM_CONNECT: actionAsText = "A_LRM_CONNECT"; break; case A_LRM_EVENT: actionAsText = "A_LRM_EVENT"; break; case A_LRM_INVOKE: actionAsText = "A_LRM_INVOKE"; break; case A_LRM_DISCONNECT: actionAsText = "A_LRM_DISCONNECT"; break; case O_LRM_RECONNECT: actionAsText = "O_LRM_RECONNECT"; break; case A_CL_JOIN_QUERY: actionAsText = "A_CL_JOIN_QUERY"; break; case A_DC_TIMER_STOP: actionAsText = "A_DC_TIMER_STOP"; break; case A_DC_TIMER_START: actionAsText = "A_DC_TIMER_START"; break; case A_INTEGRATE_TIMER_START: actionAsText = "A_INTEGRATE_TIMER_START"; break; case A_INTEGRATE_TIMER_STOP: actionAsText = "A_INTEGRATE_TIMER_STOP"; break; case A_FINALIZE_TIMER_START: actionAsText = "A_FINALIZE_TIMER_START"; break; case A_FINALIZE_TIMER_STOP: actionAsText = "A_FINALIZE_TIMER_STOP"; break; case A_ELECTION_COUNT: actionAsText = "A_ELECTION_COUNT"; break; case A_ELECTION_VOTE: actionAsText = "A_ELECTION_VOTE"; break; case A_ELECTION_CHECK: actionAsText = "A_ELECTION_CHECK"; break; case A_CL_JOIN_ANNOUNCE: actionAsText = "A_CL_JOIN_ANNOUNCE"; break; case A_CL_JOIN_REQUEST: actionAsText = "A_CL_JOIN_REQUEST"; break; case A_CL_JOIN_RESULT: actionAsText = "A_CL_JOIN_RESULT"; break; case A_DC_JOIN_OFFER_ALL: actionAsText = "A_DC_JOIN_OFFER_ALL"; break; case A_DC_JOIN_OFFER_ONE: actionAsText = "A_DC_JOIN_OFFER_ONE"; break; case A_DC_JOIN_PROCESS_REQ: actionAsText = "A_DC_JOIN_PROCESS_REQ"; break; case A_DC_JOIN_PROCESS_ACK: actionAsText = "A_DC_JOIN_PROCESS_ACK"; break; case A_DC_JOIN_FINALIZE: actionAsText = "A_DC_JOIN_FINALIZE"; break; case A_MSG_PROCESS: actionAsText = "A_MSG_PROCESS"; break; case A_MSG_ROUTE: actionAsText = "A_MSG_ROUTE"; break; case A_RECOVER: actionAsText = "A_RECOVER"; break; case A_DC_RELEASE: actionAsText = "A_DC_RELEASE"; break; case A_DC_RELEASED: actionAsText = "A_DC_RELEASED"; break; case A_DC_TAKEOVER: actionAsText = "A_DC_TAKEOVER"; break; case A_SHUTDOWN: actionAsText = "A_SHUTDOWN"; break; case A_SHUTDOWN_REQ: actionAsText = "A_SHUTDOWN_REQ"; break; case A_STOP: actionAsText = "A_STOP "; break; case A_EXIT_0: actionAsText = "A_EXIT_0"; break; case A_EXIT_1: actionAsText = "A_EXIT_1"; break; case A_CCM_CONNECT: actionAsText = "A_CCM_CONNECT"; break; case A_CCM_DISCONNECT: actionAsText = "A_CCM_DISCONNECT"; break; case O_CIB_RESTART: actionAsText = "O_CIB_RESTART"; break; case A_CIB_START: actionAsText = "A_CIB_START"; break; case A_CIB_STOP: actionAsText = "A_CIB_STOP"; break; case A_TE_INVOKE: actionAsText = "A_TE_INVOKE"; break; case O_TE_RESTART: actionAsText = "O_TE_RESTART"; break; case A_TE_START: actionAsText = "A_TE_START"; break; case A_TE_STOP: actionAsText = "A_TE_STOP"; break; case A_TE_HALT: actionAsText = "A_TE_HALT"; break; case A_TE_CANCEL: actionAsText = "A_TE_CANCEL"; break; case A_PE_INVOKE: actionAsText = "A_PE_INVOKE"; break; case O_PE_RESTART: actionAsText = "O_PE_RESTART"; break; case A_PE_START: actionAsText = "A_PE_START"; break; case A_PE_STOP: actionAsText = "A_PE_STOP"; break; case A_NODE_BLOCK: actionAsText = "A_NODE_BLOCK"; break; case A_UPDATE_NODESTATUS: actionAsText = "A_UPDATE_NODESTATUS"; break; case A_LOG: actionAsText = "A_LOG "; break; case A_ERROR: actionAsText = "A_ERROR "; break; case A_WARN: actionAsText = "A_WARN "; break; /* Composite actions */ case A_DC_TIMER_START | A_CL_JOIN_QUERY: actionAsText = "A_DC_TIMER_START|A_CL_JOIN_QUERY"; break; } if (actionAsText == NULL) { crm_err("Action %.16llx is unknown", action); actionAsText = ""; } return actionAsText; } void fsa_dump_inputs(int log_level, const char *text, long long input_register) { if (input_register == A_NOTHING) { return; } if (text == NULL) { text = "Input register contents:"; } if (is_set(input_register, R_THE_DC)) { crm_trace("%s %.16llx (R_THE_DC)", text, R_THE_DC); } if (is_set(input_register, R_STARTING)) { crm_trace("%s %.16llx (R_STARTING)", text, R_STARTING); } if (is_set(input_register, R_SHUTDOWN)) { crm_trace("%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN); } if (is_set(input_register, R_STAYDOWN)) { crm_trace("%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN); } if (is_set(input_register, R_JOIN_OK)) { crm_trace("%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK); } if (is_set(input_register, R_READ_CONFIG)) { crm_trace("%s %.16llx (R_READ_CONFIG)", text, R_READ_CONFIG); } if (is_set(input_register, R_INVOKE_PE)) { crm_trace("%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE); } if (is_set(input_register, R_CIB_CONNECTED)) { crm_trace("%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED); } if (is_set(input_register, R_PE_CONNECTED)) { crm_trace("%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED); } if (is_set(input_register, R_TE_CONNECTED)) { crm_trace("%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED); } if (is_set(input_register, R_LRM_CONNECTED)) { crm_trace("%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED); } if (is_set(input_register, R_CIB_REQUIRED)) { crm_trace("%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED); } if (is_set(input_register, R_PE_REQUIRED)) { crm_trace("%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED); } if (is_set(input_register, R_TE_REQUIRED)) { crm_trace("%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED); } if (is_set(input_register, R_REQ_PEND)) { crm_trace("%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND); } if (is_set(input_register, R_PE_PEND)) { crm_trace("%s %.16llx (R_PE_PEND)", text, R_PE_PEND); } if (is_set(input_register, R_TE_PEND)) { crm_trace("%s %.16llx (R_TE_PEND)", text, R_TE_PEND); } if (is_set(input_register, R_RESP_PEND)) { crm_trace("%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND); } if (is_set(input_register, R_CIB_DONE)) { crm_trace("%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE); } if (is_set(input_register, R_HAVE_CIB)) { crm_trace("%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB); } if (is_set(input_register, R_CIB_ASKED)) { crm_trace("%s %.16llx (R_CIB_ASKED)", text, R_CIB_ASKED); } if (is_set(input_register, R_MEMBERSHIP)) { crm_trace("%s %.16llx (R_MEMBERSHIP)", text, R_MEMBERSHIP); } if (is_set(input_register, R_PEER_DATA)) { crm_trace("%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA); } if (is_set(input_register, R_IN_RECOVERY)) { crm_trace("%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY); } } void fsa_dump_actions(long long action, const char *text) { if (is_set(action, A_READCONFIG)) { crm_trace("Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text); } if (is_set(action, A_STARTUP)) { crm_trace("Action %.16llx (A_STARTUP) %s", A_STARTUP, text); } if (is_set(action, A_STARTED)) { crm_trace("Action %.16llx (A_STARTED) %s", A_STARTED, text); } if (is_set(action, A_HA_CONNECT)) { crm_trace("Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text); } if (is_set(action, A_HA_DISCONNECT)) { crm_trace("Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text); } if (is_set(action, A_LRM_CONNECT)) { crm_trace("Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text); } if (is_set(action, A_LRM_EVENT)) { crm_trace("Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text); } if (is_set(action, A_LRM_INVOKE)) { crm_trace("Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text); } if (is_set(action, A_LRM_DISCONNECT)) { crm_trace("Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text); } if (is_set(action, A_DC_TIMER_STOP)) { crm_trace("Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text); } if (is_set(action, A_DC_TIMER_START)) { crm_trace("Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text); } if (is_set(action, A_INTEGRATE_TIMER_START)) { crm_trace("Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text); } if (is_set(action, A_INTEGRATE_TIMER_STOP)) { crm_trace("Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text); } if (is_set(action, A_FINALIZE_TIMER_START)) { crm_trace("Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text); } if (is_set(action, A_FINALIZE_TIMER_STOP)) { crm_trace("Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text); } if (is_set(action, A_ELECTION_COUNT)) { crm_trace("Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text); } if (is_set(action, A_ELECTION_VOTE)) { crm_trace("Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text); } if (is_set(action, A_ELECTION_CHECK)) { crm_trace("Action %.16llx (A_ELECTION_CHECK) %s", A_ELECTION_CHECK, text); } if (is_set(action, A_CL_JOIN_ANNOUNCE)) { crm_trace("Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text); } if (is_set(action, A_CL_JOIN_REQUEST)) { crm_trace("Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text); } if (is_set(action, A_CL_JOIN_RESULT)) { crm_trace("Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text); } if (is_set(action, A_DC_JOIN_OFFER_ALL)) { crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text); } if (is_set(action, A_DC_JOIN_OFFER_ONE)) { crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text); } if (is_set(action, A_DC_JOIN_PROCESS_REQ)) { crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text); } if (is_set(action, A_DC_JOIN_PROCESS_ACK)) { crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text); } if (is_set(action, A_DC_JOIN_FINALIZE)) { crm_trace("Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text); } if (is_set(action, A_MSG_PROCESS)) { crm_trace("Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text); } if (is_set(action, A_MSG_ROUTE)) { crm_trace("Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text); } if (is_set(action, A_RECOVER)) { crm_trace("Action %.16llx (A_RECOVER) %s", A_RECOVER, text); } if (is_set(action, A_DC_RELEASE)) { crm_trace("Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text); } if (is_set(action, A_DC_RELEASED)) { crm_trace("Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text); } if (is_set(action, A_DC_TAKEOVER)) { crm_trace("Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text); } if (is_set(action, A_SHUTDOWN)) { crm_trace("Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text); } if (is_set(action, A_SHUTDOWN_REQ)) { crm_trace("Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text); } if (is_set(action, A_STOP)) { crm_trace("Action %.16llx (A_STOP ) %s", A_STOP, text); } if (is_set(action, A_EXIT_0)) { crm_trace("Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text); } if (is_set(action, A_EXIT_1)) { crm_trace("Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text); } if (is_set(action, A_CCM_CONNECT)) { crm_trace("Action %.16llx (A_CCM_CONNECT) %s", A_CCM_CONNECT, text); } if (is_set(action, A_CCM_DISCONNECT)) { crm_trace("Action %.16llx (A_CCM_DISCONNECT) %s", A_CCM_DISCONNECT, text); } if (is_set(action, A_CIB_START)) { crm_trace("Action %.16llx (A_CIB_START) %s", A_CIB_START, text); } if (is_set(action, A_CIB_STOP)) { crm_trace("Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text); } if (is_set(action, A_TE_INVOKE)) { crm_trace("Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text); } if (is_set(action, A_TE_START)) { crm_trace("Action %.16llx (A_TE_START) %s", A_TE_START, text); } if (is_set(action, A_TE_STOP)) { crm_trace("Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text); } if (is_set(action, A_TE_CANCEL)) { crm_trace("Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text); } if (is_set(action, A_PE_INVOKE)) { crm_trace("Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text); } if (is_set(action, A_PE_START)) { crm_trace("Action %.16llx (A_PE_START) %s", A_PE_START, text); } if (is_set(action, A_PE_STOP)) { crm_trace("Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text); } if (is_set(action, A_NODE_BLOCK)) { crm_trace("Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text); } if (is_set(action, A_UPDATE_NODESTATUS)) { crm_trace("Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text); } if (is_set(action, A_LOG)) { crm_trace("Action %.16llx (A_LOG ) %s", A_LOG, text); } if (is_set(action, A_ERROR)) { crm_trace("Action %.16llx (A_ERROR ) %s", A_ERROR, text); } if (is_set(action, A_WARN)) { crm_trace("Action %.16llx (A_WARN ) %s", A_WARN, text); } } gboolean update_dc(xmlNode * msg) { char *last_dc = fsa_our_dc; const char *dc_version = NULL; const char *welcome_from = NULL; if (msg != NULL) { gboolean invalid = FALSE; dc_version = crm_element_value(msg, F_CRM_VERSION); welcome_from = crm_element_value(msg, F_CRM_HOST_FROM); CRM_CHECK(dc_version != NULL, return FALSE); CRM_CHECK(welcome_from != NULL, return FALSE); if (AM_I_DC && safe_str_neq(welcome_from, fsa_our_uname)) { invalid = TRUE; } else if (fsa_our_dc && safe_str_neq(welcome_from, fsa_our_dc)) { invalid = TRUE; } if (invalid) { CRM_CHECK(fsa_our_dc != NULL, crm_err("We have no DC")); if (AM_I_DC) { crm_err("Not updating DC to %s (%s): we are also a DC", welcome_from, dc_version); } else { crm_warn("New DC %s is not %s", welcome_from, fsa_our_dc); } register_fsa_action(A_CL_JOIN_QUERY | A_DC_TIMER_START); return FALSE; } } free(fsa_our_dc_version); fsa_our_dc_version = NULL; fsa_our_dc = NULL; /* Free'd as last_dc */ if (welcome_from != NULL) { fsa_our_dc = strdup(welcome_from); } if (dc_version != NULL) { fsa_our_dc_version = strdup(dc_version); } if (safe_str_eq(fsa_our_dc, last_dc)) { /* do nothing */ } else if (fsa_our_dc != NULL) { crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc); crm_info("Set DC to %s (%s)", crm_str(fsa_our_dc), crm_str(fsa_our_dc_version)); crm_update_peer_expected(__FUNCTION__, dc_node, CRMD_JOINSTATE_MEMBER); } else if (last_dc != NULL) { crm_info("Unset DC. Was %s", crm_str(last_dc)); } free(last_dc); return TRUE; } #define STATUS_PATH_MAX 512 static void erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *xpath = user_data; do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Deletion of \"%s\": %s (rc=%d)", xpath, pcmk_strerror(rc), rc); } void erase_status_tag(const char *uname, const char *tag, int options) { int rc = pcmk_ok; char xpath[STATUS_PATH_MAX]; int cib_opts = cib_quorum_override | cib_xpath | options; if (fsa_cib_conn && uname) { snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", uname, tag); crm_info("Deleting xpath: %s", xpath); rc = fsa_cib_conn->cmds->delete(fsa_cib_conn, xpath, NULL, cib_opts); fsa_register_cib_callback(rc, FALSE, strdup(xpath), erase_xpath_callback); } } -crm_ipc_t *attrd_ipc = NULL; - -#if !HAVE_ATOMIC_ATTRD -static int -update_without_attrd(const char * host_uuid, const char * name, const char * value, - const char * user_name, gboolean is_remote_node, char command) -{ - int call_opt = cib_none; - - if (fsa_cib_conn == NULL) { - return -1; - } - - call_opt = crmd_cib_smart_opt(); - - if (command == 'C') { - erase_status_tag(host_uuid, XML_TAG_TRANSIENT_NODEATTRS, call_opt); - return pcmk_ok; - } - - crm_trace("updating status for host_uuid %s, %s=%s", host_uuid, name ? name : "", value ? value : ""); - if (value) { - return update_attr_delegate(fsa_cib_conn, call_opt, XML_CIB_TAG_STATUS, host_uuid, NULL, NULL, - NULL, name, value, FALSE, user_name, is_remote_node ? "remote" : NULL); - } else { - return delete_attr_delegate(fsa_cib_conn, call_opt, XML_CIB_TAG_STATUS, host_uuid, NULL, NULL, - NULL, name, NULL, FALSE, user_name); - } -} -#endif - -static void -log_attrd_error(const char *host, const char *name, const char *value, - gboolean is_remote, char command, int rc) -{ - const char *display_command; /* for commands without name/value */ - const char *node_type = (is_remote? "Pacemaker Remote" : "cluster"); - gboolean shutting_down = is_set(fsa_input_register, R_SHUTDOWN); - const char *when = (shutting_down? " at shutdown" : ""); - - switch (command) { - case 'R': - display_command = "refresh"; - break; - case 'C': - display_command = "purge"; - break; - default: - display_command = NULL; - } - - if (display_command) { - crm_err("Could not request %s of %s node %s%s: %s (%d)", - display_command, node_type, host, when, pcmk_strerror(rc), rc); - } else { - crm_err("Could not request update of %s=%s for %s node %s%s: %s (%d)", - name, value, node_type, host, when, pcmk_strerror(rc), rc); - } - - /* If we can't request shutdown via attribute, fast-track it */ - if ((command == 'U') && shutting_down) { - register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL); - } -} - -static void -update_attrd_helper(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node, char command) -{ - gboolean rc; - int max = 5; - int attrd_opts = attrd_opt_none; - - if (is_remote_node) { -#if HAVE_ATOMIC_ATTRD - attrd_opts |= attrd_opt_remote; -#else - /* Talk directly to cib for remote nodes if it's legacy attrd */ - int rc; - - /* host is required for updating a remote node */ - CRM_CHECK(host != NULL, return;); - /* remote node uname and uuid are equal */ - rc = update_without_attrd(host, name, value, user_name, is_remote_node, - command); - if (rc < pcmk_ok) { - log_attrd_error(host, name, value, is_remote_node, command, rc); - } - return; -#endif - } - - if (attrd_ipc == NULL) { - attrd_ipc = crm_ipc_new(T_ATTRD, 0); - } - - do { - if (crm_ipc_connected(attrd_ipc) == FALSE) { - crm_ipc_close(attrd_ipc); - crm_info("Connecting to attribute manager ... %d retries remaining", max); - if (crm_ipc_connect(attrd_ipc) == FALSE) { - crm_perror(LOG_INFO, "Connection to attribute manager failed"); - } - } - - rc = attrd_update_delegate(attrd_ipc, command, host, name, value, XML_CIB_TAG_STATUS, NULL, - NULL, user_name, attrd_opts); - if (rc == pcmk_ok) { - break; - - } else if (rc != -EAGAIN && rc != -EALREADY) { - crm_info("Disconnecting from attribute manager: %s (%d)", pcmk_strerror(rc), rc); - crm_ipc_close(attrd_ipc); - } - - sleep(5 - max); - - } while (max--); - - if (rc != pcmk_ok) { - log_attrd_error(host, name, value, is_remote_node, command, rc); - } -} - -void -update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node) -{ - update_attrd_helper(host, name, value, user_name, is_remote_node, 'U'); -} - -void -update_attrd_remote_node_removed(const char *host, const char *user_name) -{ - crm_trace("Asking attrd to purge Pacemaker Remote node %s", host); - update_attrd_helper(host, NULL, NULL, user_name, TRUE, 'C'); -} - void crmd_peer_down(crm_node_t *peer, bool full) { if(full && peer->state == NULL) { crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL); } crm_update_peer_join(__FUNCTION__, peer, crm_join_none); crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); } diff --git a/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt b/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt index 05dc42e5f1..733f870713 100644 --- a/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt +++ b/doc/Pacemaker_Explained/en-US/Ch-Constraints.txt @@ -1,846 +1,850 @@ = Resource Constraints = indexterm:[Resource,Constraints] == Scores == Scores of all kinds are integral to how the cluster works. Practically everything from moving a resource to deciding which resource to stop in a degraded cluster is achieved by manipulating scores in some way. Scores are calculated per resource and node. Any node with a negative score for a resource can't run that resource. The cluster places a resource on the node with the highest score for it. === Infinity Math === Pacemaker implements +INFINITY+ (or equivalently, ++INFINITY+) internally as a score of 1,000,000. Addition and subtraction with it follow these three basic rules: * Any value + +INFINITY+ = +INFINITY+ * Any value - +INFINITY+ = +-INFINITY+ * +INFINITY+ - +INFINITY+ = +-INFINITY+ [NOTE] ====== What if you want to use a score higher than 1,000,000? Typically this possibility arises when someone wants to base the score on some external metric that might go above 1,000,000. The short answer is you can't. The long answer is it is sometimes possible work around this limitation creatively. You may be able to set the score to some computed value based on the external metric rather than use the metric directly. For nodes, you can store the metric as a node attribute, and query the attribute when computing the score (possibly as part of a custom resource agent). ====== == Deciding Which Nodes a Resource Can Run On == indexterm:[Location Constraints] indexterm:[Resource,Constraints,Location] 'Location constraints' tell the cluster which nodes a resource can run on. There are two alternative strategies. One way is to say that, by default, resources can run anywhere, and then the location constraints specify nodes that are not allowed (an 'opt-out' cluster). The other way is to start with nothing able to run anywhere, and use location constraints to selectively enable allowed nodes (an 'opt-in' cluster). Whether you should choose opt-in or opt-out depends on your personal preference and the make-up of your cluster. If most of your resources can run on most of the nodes, then an opt-out arrangement is likely to result in a simpler configuration. On the other-hand, if most resources can only run on a small subset of nodes, an opt-in configuration might be simpler. === Location Properties === .Properties of a rsc_location Constraint [width="95%",cols="2m,1,5>), the submatches can be referenced as +%0+ through +%9+ in the rule's +score-attribute+ or a rule expression's +attribute+ '(since 1.1.16)' indexterm:[rsc-pattern,Location Constraints] indexterm:[Constraints,Location,rsc-pattern] |node | |A node's name indexterm:[node,Location Constraints] indexterm:[Constraints,Location,node] |score | |Positive values indicate the resource should run on this node. Negative values indicate the resource should not run on this node. Values of \+/- +INFINITY+ change "should"/"should not" to "must"/"must not". indexterm:[score,Location Constraints] indexterm:[Constraints,Location,score] |resource-discovery |always |Whether Pacemaker should perform resource discovery (that is, check whether the resource is already running) for this resource on this node. This should normally be left as the default, so that rogue instances of a service can be stopped when they are running where they are not supposed to be. However, there are two situations where disabling resource discovery is a good idea: when a service is not installed on a node, discovery might return an error (properly written OCF agents will not, so this is usually only seen with other agent types); and when Pacemaker Remote is used to scale a cluster to hundreds of nodes, limiting resource discovery to allowed nodes can significantly boost performance. '(since 1.1.13)' * +always:+ Always perform resource discovery for the specified resource on this node. * +never:+ Never perform resource discovery for the specified resource on this node. This option should generally be used with a -INFINITY score, although that is not strictly required. * +exclusive:+ Perform resource discovery for the specified resource only on this node (and other nodes similarly marked as +exclusive+). Multiple location constraints using +exclusive+ discovery for the same resource across different nodes creates a subset of nodes resource-discovery is exclusive to. If a resource is marked for +exclusive+ discovery on one or more nodes, that resource is only allowed to be placed within that subset of nodes. indexterm:[Resource Discovery,Location Constraints] indexterm:[Constraints,Location,Resource Discovery] |========================================================= [WARNING] ========= Setting resource-discovery to +never+ or +exclusive+ removes Pacemaker's ability to detect and stop unwanted instances of a service running where it's not supposed to be. It is up to the system administrator (you!) to make sure that the service can 'never' be active on nodes without resource-discovery (such as by leaving the relevant software uninstalled). ========= === Asymmetrical "Opt-In" Clusters === indexterm:[Asymmetrical Opt-In Clusters] indexterm:[Cluster Type,Asymmetrical Opt-In] To create an opt-in cluster, start by preventing resources from running anywhere by default: ---- # crm_attribute --name symmetric-cluster --update false ---- Then start enabling nodes. The following fragment says that the web server prefers *sles-1*, the database prefers *sles-2* and both can fail over to *sles-3* if their most preferred node fails. .Opt-in location constraints for two resources ====== [source,XML] ------- ------- ====== === Symmetrical "Opt-Out" Clusters === indexterm:[Symmetrical Opt-Out Clusters] indexterm:[Cluster Type,Symmetrical Opt-Out] To create an opt-out cluster, start by allowing resources to run anywhere by default: ---- # crm_attribute --name symmetric-cluster --update true ---- Then start disabling nodes. The following fragment is the equivalent of the above opt-in configuration. .Opt-out location constraints for two resources ====== [source,XML] ------- ------- ====== [[node-score-equal]] === What if Two Nodes Have the Same Score === If two nodes have the same score, then the cluster will choose one. This choice may seem random and may not be what was intended, however the cluster was not given enough information to know any better. .Constraints where a resource prefers two nodes equally ====== [source,XML] ------- ------- ====== In the example above, assuming no other constraints and an inactive cluster, +Webserver+ would probably be placed on +sles-1+ and +Database+ on +sles-2+. It would likely have placed +Webserver+ based on the node's uname and +Database+ based on the desire to spread the resource load evenly across the cluster. However other factors can also be involved in more complex configurations. [[s-resource-ordering]] == Specifying the Order in which Resources Should Start/Stop == indexterm:[Resource,Constraints,Ordering] indexterm:[Resource,Start Order] indexterm:[Ordering Constraints] 'Ordering constraints' tell the cluster the order in which resources should start. [IMPORTANT] ==== Ordering constraints affect 'only' the ordering of resources; they do 'not' require that the resources be placed on the same node. If you want resources to be started on the same node 'and' in a specific order, you need both an ordering constraint 'and' a colocation constraint (see <>), or alternatively, a group (see <>). ==== === Ordering Properties === .Properties of a rsc_order Constraint [width="95%",cols="1m,1,4> resources. === Optional and mandatory ordering === Here is an example of ordering constraints where +Database+ 'must' start before +Webserver+, and +IP+ 'should' start before +Webserver+ if they both need to be started: .Optional and mandatory ordering constraints ====== [source,XML] ------- ------- ====== Because the above example lets +symmetrical+ default to TRUE, +Webserver+ must be stopped before +Database+ can be stopped, and +Webserver+ should be stopped before +IP+ if they both need to be stopped. [[s-resource-colocation]] == Placing Resources Relative to other Resources == indexterm:[Resource,Constraints,Colocation] indexterm:[Resource,Location Relative to other Resources] 'Colocation constraints' tell the cluster that the location of one resource depends on the location of another one. Colocation has an important side-effect: it affects the order in which resources are assigned to a node. Think about it: You can't place A relative to B unless you know where B is. footnote:[ While the human brain is sophisticated enough to read the constraint in any order and choose the correct one depending on the situation, the cluster is not quite so smart. Yet. ] So when you are creating colocation constraints, it is important to consider whether you should colocate A with B, or B with A. Another thing to keep in mind is that, assuming A is colocated with B, the cluster will take into account A's preferences when deciding which node to choose for B. For a detailed look at exactly how this occurs, see http://clusterlabs.org/doc/Colocation_Explained.pdf[Colocation Explained]. [IMPORTANT] ==== Colocation constraints affect 'only' the placement of resources; they do 'not' require that the resources be started in a particular order. If you want resources to be started on the same node 'and' in a specific order, you need both an ordering constraint (see <>) 'and' a colocation constraint, or alternatively, a group (see <>). ==== === Colocation Properties === .Properties of a rsc_colocation Constraint [width="95%",cols="2m,5<",options="header",align="center"] |========================================================= |Field |Description |id |A unique name for the constraint. indexterm:[id,Colocation Constraints] indexterm:[Constraints,Colocation,id] |rsc |The name of a resource that should be located relative to +with-rsc+. indexterm:[rsc,Colocation Constraints] indexterm:[Constraints,Colocation,rsc] |with-rsc |The name of the resource used as the colocation target. The cluster will decide where to put this resource first and then decide where to put +rsc+. indexterm:[with-rsc,Colocation Constraints] indexterm:[Constraints,Colocation,with-rsc] |score |Positive values indicate the resources should run on the same node. Negative values indicate the resources should run on different nodes. Values of \+/- +INFINITY+ change "should" to "must". indexterm:[score,Colocation Constraints] indexterm:[Constraints,Colocation,score] |========================================================= === Mandatory Placement === Mandatory placement occurs when the constraint's score is ++INFINITY+ or +-INFINITY+. In such cases, if the constraint can't be satisfied, then the +rsc+ resource is not permitted to run. For +score=INFINITY+, this includes cases where the +with-rsc+ resource is not active. If you need resource +A+ to always run on the same machine as resource +B+, you would add the following constraint: .Mandatory colocation constraint for two resources ==== [source,XML] ==== Remember, because +INFINITY+ was used, if +B+ can't run on any of the cluster nodes (for whatever reason) then +A+ will not be allowed to run. Whether +A+ is running or not has no effect on +B+. Alternatively, you may want the opposite -- that +A+ 'cannot' run on the same machine as +B+. In this case, use +score="-INFINITY"+. .Mandatory anti-colocation constraint for two resources ==== [source,XML] ==== Again, by specifying +-INFINITY+, the constraint is binding. So if the only place left to run is where +B+ already is, then +A+ may not run anywhere. As with +INFINITY+, +B+ can run even if +A+ is stopped. However, in this case +A+ also can run if +B+ is stopped, because it still meets the constraint of +A+ and +B+ not running on the same node. === Advisory Placement === If mandatory placement is about "must" and "must not", then advisory placement is the "I'd prefer if" alternative. For constraints with scores greater than +-INFINITY+ and less than +INFINITY+, the cluster will try to accommodate your wishes but may ignore them if the alternative is to stop some of the cluster resources. As in life, where if enough people prefer something it effectively becomes mandatory, advisory colocation constraints can combine with other elements of the configuration to behave as if they were mandatory. .Advisory colocation constraint for two resources ==== [source,XML] ==== [[s-resource-sets]] == Resource Sets == 'Resource sets' allow multiple resources to be affected by a single constraint. .A set of 3 resources ==== [source,XML] ---- ---- ==== Resource sets are valid inside +rsc_location+, +rsc_order+ (see <>), +rsc_colocation+ (see <>), and +rsc_ticket+ (see <>) constraints. A resource set has a number of properties that can be set, though not all have an effect in all contexts. .Properties of a resource_set [width="95%",cols="2m,1,5 ------- ====== .Visual representation of the four resources' start order for the above constraints image::images/resource-set.png["Ordered set",width="16cm",height="2.5cm",align="center"] === Ordered Set === To simplify this situation, resource sets (see <>) can be used within ordering constraints: .A chain of ordered resources expressed as a set ====== [source,XML] ------- ------- ====== While the set-based format is not less verbose, it is significantly easier to get right and maintain. [IMPORTANT] ========= If you use a higher-level tool, pay attention to how it exposes this functionality. Depending on the tool, creating a set +A B+ may be equivalent to +A then B+, or +B then A+. ========= === Ordering Multiple Sets === The syntax can be expanded to allow sets of resources to be ordered relative to each other, where the members of each individual set may be ordered or unordered (controlled by the +sequential+ property). In the example below, +A+ and +B+ can both start in parallel, as can +C+ and +D+, however +C+ and +D+ can only start once _both_ +A+ _and_ +B+ are active. .Ordered sets of unordered resources ====== [source,XML] ------- ------- ====== .Visual representation of the start order for two ordered sets of unordered resources image::images/two-sets.png["Two ordered sets",width="13cm",height="7.5cm",align="center"] Of course either set -- or both sets -- of resources can also be internally ordered (by setting +sequential="true"+) and there is no limit to the number of sets that can be specified. .Advanced use of set ordering - Three ordered sets, two of which are internally unordered ====== [source,XML] ------- ------- ====== .Visual representation of the start order for the three sets defined above image::images/three-sets.png["Three ordered sets",width="16cm",height="7.5cm",align="center"] [IMPORTANT] ==== An ordered set with +sequential=false+ makes sense only if there is another set in the constraint. Otherwise, the constraint has no effect. ==== === Resource Set OR Logic === The unordered set logic discussed so far has all been "AND" logic. To illustrate this take the 3 resource set figure in the previous section. Those sets can be expressed, +(A and B) then \(C) then (D) then (E and F)+. Say for example we want to change the first set, +(A and B)+, to use "OR" logic so the sets look like this: +(A or B) then \(C) then (D) then (E and F)+. This functionality can be achieved through the use of the +require-all+ option. This option defaults to TRUE which is why the "AND" logic is used by default. Setting +require-all=false+ means only one resource in the set needs to be started before continuing on to the next set. .Resource Set "OR" logic: Three ordered sets, where the first set is internally unordered with "OR" logic ====== [source,XML] ------- ------- ====== [IMPORTANT] ==== An ordered set with +require-all=false+ makes sense only in conjunction with +sequential=false+. Think of it like this: +sequential=false+ modifies the set to be an unordered set using "AND" logic by default, and adding +require-all=false+ flips the unordered set's "AND" logic to "OR" logic. ==== [[s-resource-sets-colocation]] == Colocating Sets of Resources == Another common situation is for an administrator to create a set of colocated resources. One way to do this would be to define a resource group (see <>), but that cannot always accurately express the desired state. Another way would be to define each relationship as an individual constraint, but that causes a constraint explosion as the number of resources and combinations grow. An example of this approach: .Chain of colocated resources ====== [source,XML] ------- ------- ====== To make things easier, resource sets (see <>) can be used within colocation constraints. As with the chained version, a resource that can't be active prevents any resource that must be colocated with it from being active. For example, if +B+ is not able to run, then both +C+ and by inference +D+ must also remain stopped. Here is an example +resource_set+: .Equivalent colocation chain expressed using +resource_set+ ====== [source,XML] ------- ------- ====== [IMPORTANT] ========= If you use a higher-level tool, pay attention to how it exposes this functionality. Depending on the tool, creating a set +A B+ may be equivalent to +A with B+, or +B with A+. ========= This notation can also be used to tell the cluster that sets of resources must be colocated relative to each other, where the individual members of each set may or may not depend on each other being active (controlled by the +sequential+ property). In this example, +A+, +B+, and +C+ will each be colocated with +D+. +D+ must be active, but any of +A+, +B+, or +C+ may be inactive without affecting any other resources. .Using colocated sets to specify a common peer ====== [source,XML] ------- ------- ====== [IMPORTANT] ==== A colocated set with +sequential=false+ makes sense only if there is another set in the constraint. Otherwise, the constraint has no effect. ==== There is no inherent limit to the number and size of the sets used. The only thing that matters is that in order for any member of one set in the constraint to be active, all members of sets listed after it must also be active (and naturally on the same node); and if a set has +sequential="true"+, then in order for one member of that set to be active, all members listed before it must also be active. If desired, you can restrict the dependency to instances of multistate resources that are in a specific role, using the set's +role+ property. .Colocation chain in which the members of the middle set have no interdependencies, and the last listed set (which the cluster places first) is restricted to instances in master status. ====== [source,XML] ------- ------- ====== .Visual representation the above example (resources to the left are placed first) image::images/three-sets-complex.png["Colocation chain",width="16cm",height="9cm",align="center"] [NOTE] ==== Pay close attention to the order in which resources and sets are listed. While the colocation dependency for members of any one set is last-to-first, the colocation dependency for multiple sets is first-to-last. In the above example, +B+ is colocated with +A+, but +colocated-set-1+ is colocated with +colocated-set-2+. Unlike ordered sets, colocated sets do not use the +require-all+ option. ==== diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h index cef01851ba..710ee8e566 100644 --- a/include/crm/common/internal.h +++ b/include/crm/common/internal.h @@ -1,79 +1,114 @@ /* * Copyright (C) 2015 * Andrew Beekhof * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /*! * \file * \brief internal common utilities * \ingroup core * \note Public APIs are declared in util.h */ #ifndef CRM_COMMON_INTERNAL__H #define CRM_COMMON_INTERNAL__H #include /* for gboolean */ #include /* for struct dirent */ #include /* for uid_t and gid_t */ +#include + /* internal I/O utilities (from io.c) */ char *generate_series_filename(const char *directory, const char *series, int sequence, gboolean bzip); int get_last_sequence(const char *directory, const char *series); void write_last_sequence(const char *directory, const char *series, int sequence, int max); int crm_chown_last_sequence(const char *directory, const char *series, uid_t uid, gid_t gid); gboolean crm_is_writable(const char *dir, const char *file, const char *user, const char *group, gboolean need_both); void crm_sync_directory(const char *name); char *crm_read_contents(const char *filename); int crm_write_sync(int fd, const char *contents); /* internal procfs utilities (from procfs.c) */ int crm_procfs_process_info(struct dirent *entry, char *name, int *pid); int crm_procfs_pid_of(const char *name); /* internal XML schema functions (from xml.c) */ void crm_schema_init(void); void crm_schema_cleanup(void); /* internal generic string functions (from strings.c) */ char *crm_concat(const char *prefix, const char *suffix, char join); void g_hash_destroy_str(gpointer data); long long crm_int_helper(const char *text, char **end_text); gboolean crm_ends_with(const char *s, const char *match); char *add_list_element(char *list, const char *value); bool crm_compress_string(const char *data, int length, int max, char **result, unsigned int *result_len); static inline int crm_strlen_zero(const char *s) { return !s || *s == '\0'; } +/* convenience functions for failure-related node attributes */ + +#define CRM_FAIL_COUNT_PREFIX "fail-count" +#define CRM_LAST_FAILURE_PREFIX "last-failure" + +/*! + * \internal + * \brief Generate a failure-related node attribute name for a resource + * + * \param[in] prefix Start of attribute name + * \param[in] rsc_id Resource name + * + * \return Newly allocated string with attribute name + */ +static inline char * +crm_fail_attr_name(const char *prefix, const char *rsc_id) +{ + CRM_CHECK(prefix && rsc_id, return NULL); + return crm_strdup_printf("%s-%s", prefix, rsc_id); +} + +static inline char * +crm_failcount_name(const char *rsc_id) +{ + return crm_fail_attr_name(CRM_FAIL_COUNT_PREFIX, rsc_id); +} + +static inline char * +crm_lastfailure_name(const char *rsc_id) +{ + return crm_fail_attr_name(CRM_LAST_FAILURE_PREFIX, rsc_id); +} + #endif /* CRM_COMMON_INTERNAL__H */ diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index 8adae7ca10..6b25da1b05 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,283 +1,285 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include # include # define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args) # define pe_err(fmt...) { was_processing_error = TRUE; crm_config_error = TRUE; crm_err(fmt); } # define pe_warn(fmt...) { was_processing_warning = TRUE; crm_config_warning = TRUE; crm_warn(fmt); } # define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } # define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } # define pe_set_action_bit(action, bit) action->flags = crm_set_bit(__FUNCTION__, action->uuid, action->flags, bit) # define pe_clear_action_bit(action, bit) action->flags = crm_clear_bit(__FUNCTION__, action->uuid, action->flags, bit) typedef struct notify_data_s { GHashTable *keys; const char *action; action_t *pre; action_t *post; action_t *pre_done; action_t *post_done; GListPtr active; /* notify_entry_t* */ GListPtr inactive; /* notify_entry_t* */ GListPtr start; /* notify_entry_t* */ GListPtr stop; /* notify_entry_t* */ GListPtr demote; /* notify_entry_t* */ GListPtr promote; /* notify_entry_t* */ GListPtr master; /* notify_entry_t* */ GListPtr slave; /* notify_entry_t* */ GHashTable *allowed_nodes; } notify_data_t; bool pe_can_fence(pe_working_set_t *data_set, node_t *node); int merge_weights(int w1, int w2); void add_hash_param(GHashTable * hash, const char *name, const char *value); void append_hashtable(gpointer key, gpointer value, gpointer user_data); char *native_parameter(resource_t * rsc, node_t * node, gboolean create, const char *name, pe_working_set_t * data_set); node_t *native_location(resource_t * rsc, GListPtr * list, gboolean current); void pe_metadata(void); void verify_pe_options(GHashTable * options); void common_update_score(resource_t * rsc, const char *id, int score); void native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set); node_t *rsc_known_on(resource_t * rsc, GListPtr * list); gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean clone_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean master_unpack(resource_t * rsc, pe_working_set_t * data_set); resource_t *native_find_rsc(resource_t * rsc, const char *id, node_t * node, int flags); gboolean native_active(resource_t * rsc, gboolean all); gboolean group_active(resource_t * rsc, gboolean all); gboolean clone_active(resource_t * rsc, gboolean all); gboolean master_active(resource_t * rsc, gboolean all); void native_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void group_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void clone_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void master_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void native_free(resource_t * rsc); void group_free(resource_t * rsc); void clone_free(resource_t * rsc); void master_free(resource_t * rsc); enum rsc_role_e native_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e group_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e clone_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e master_resource_state(const resource_t * rsc, gboolean current); gboolean common_unpack(xmlNode * xml_obj, resource_t ** rsc, resource_t * parent, pe_working_set_t * data_set); void common_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void common_free(resource_t * rsc); extern pe_working_set_t *pe_dataset; extern node_t *node_copy(const node_t *this_node); extern time_t get_effective_time(pe_working_set_t * data_set); + +/* Failure handling utilities (from failcounts.c) */ extern int get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set); extern int get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, bool effective, xmlNode * xml_op, pe_working_set_t * data_set); extern int get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set); /* Binary like operators for lists of nodes */ extern void node_list_exclude(GHashTable * list, GListPtr list2, gboolean merge_scores); extern GListPtr node_list_dup(GListPtr list, gboolean reset, gboolean filter); extern GListPtr node_list_from_hash(GHashTable * hash, gboolean reset, gboolean filter); extern GHashTable *node_hash_from_list(GListPtr list); static inline gpointer pe_hash_table_lookup(GHashTable * hash, gconstpointer key) { if (hash) { return g_hash_table_lookup(hash, key); } return NULL; } extern action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set); extern gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order); GHashTable *node_hash_dup(GHashTable * hash); extern GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter); extern GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter); extern GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter); extern void pe_free_shallow(GListPtr alist); extern void pe_free_shallow_adv(GListPtr alist, gboolean with_data); /* Printing functions for debug */ extern void print_node(const char *pre_text, node_t * node, gboolean details); extern void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details); extern void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes); extern void dump_node_capacity(int level, const char *comment, node_t * node); extern void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node); # define dump_node_scores(level, rsc, text, nodes) do { \ dump_node_scores_worker(level, __FILE__, __FUNCTION__, __LINE__, rsc, text, nodes); \ } while(0) /* Sorting functions */ extern gint sort_rsc_priority(gconstpointer a, gconstpointer b); extern gint sort_rsc_index(gconstpointer a, gconstpointer b); extern xmlNode *find_rsc_op_entry(resource_t * rsc, const char *key); extern action_t *custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean foo, pe_working_set_t * data_set); # define delete_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DELETE, 0) # define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, data_set); # define stopped_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOPPED, 0) # define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, data_set); # define stop_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOP, 0) # define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, data_set); # define reload_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_RELOAD, 0) # define start_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_START, 0) # define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, data_set) # define started_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STARTED, 0) # define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, data_set) # define promote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) # define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, data_set) # define promoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) # define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, data_set) # define demote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) # define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, data_set) # define demoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) # define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, data_set) extern action_t *find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node); extern enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic); extern GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node); extern GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node); extern GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node); extern void pe_free_action(action_t * action); extern void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); extern gboolean get_target_role(resource_t * rsc, enum rsc_role_e *role); extern resource_t *find_clone_instance(resource_t * rsc, const char *sub_id, pe_working_set_t * data_set); extern void destroy_ticket(gpointer data); extern ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set); char *clone_strip(const char *last_rsc_id); char *clone_zero(const char *last_rsc_id); int get_target_rc(xmlNode * xml_op); gint sort_node_uname(gconstpointer a, gconstpointer b); bool is_set_recursive(resource_t * rsc, long long flag, bool any); enum rsc_digest_cmp_val { /*! Digests are the same */ RSC_DIGEST_MATCH = 0, /*! Params that require a restart changed */ RSC_DIGEST_RESTART, /*! Some parameter changed. */ RSC_DIGEST_ALL, /*! rsc op didn't have a digest associated with it, so * it is unknown if parameters changed or not. */ RSC_DIGEST_UNKNOWN, }; typedef struct op_digest_cache_s { enum rsc_digest_cmp_val rc; xmlNode *params_all; xmlNode *params_secure; xmlNode *params_restart; char *digest_all_calc; char *digest_secure_calc; char *digest_restart_calc; } op_digest_cache_t; op_digest_cache_t *rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set); action_t *pe_fence_op(node_t * node, const char *op, bool optional, pe_working_set_t * data_set); void trigger_unfencing( resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set); void set_bit_recursive(resource_t * rsc, unsigned long long flag); void clear_bit_recursive(resource_t * rsc, unsigned long long flag); gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref); void print_rscs_brief(GListPtr rsc_list, const char * pre_text, long options, void * print_data, gboolean print_all); void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason); #endif diff --git a/lib/common/schemas.c b/lib/common/schemas.c index b8280b2d12..f1a0238024 100644 --- a/lib/common/schemas.c +++ b/lib/common/schemas.c @@ -1,935 +1,937 @@ /* * Copyright (C) 2004-2016 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #if HAVE_LIBXML2 # include #endif #if HAVE_LIBXSLT # include # include #endif #include #include typedef struct { xmlRelaxNGPtr rng; xmlRelaxNGValidCtxtPtr valid; xmlRelaxNGParserCtxtPtr parser; } relaxng_ctx_cache_t; struct schema_s { int type; float version; char *name; char *location; char *transform; int after_transform; void *cache; }; static struct schema_s *known_schemas = NULL; static int xml_schema_max = 0; void xml_log(int priority, const char *fmt, ...) G_GNUC_PRINTF(2, 3); void xml_log(int priority, const char *fmt, ...) { va_list ap; va_start(ap, fmt); qb_log_from_external_source_va(__FUNCTION__, __FILE__, fmt, priority, __LINE__, 0, ap); va_end(ap); } static int xml_latest_schema_index(void) { return xml_schema_max - 4; } static int xml_minimum_schema_index(void) { static int best = 0; if (best == 0) { int lpc = 0; float target = 0.0; best = xml_latest_schema_index(); target = floor(known_schemas[best].version); for (lpc = best; lpc > 0; lpc--) { if (known_schemas[lpc].version < target) { return best; } else { best = lpc; } } best = xml_latest_schema_index(); } return best; } const char * xml_latest_schema(void) { return get_schema_name(xml_latest_schema_index()); } static const char * get_schema_root(void) { static const char *base = NULL; if (base == NULL) { base = getenv("PCMK_schema_directory"); } if (base == NULL || strlen(base) == 0) { base = CRM_DTD_DIRECTORY; } return base; } static char * get_schema_path(const char *name, const char *file) { const char *base = get_schema_root(); if (file) { return crm_strdup_printf("%s/%s", base, file); } return crm_strdup_printf("%s/%s.rng", base, name); } static int schema_filter(const struct dirent *a) { int rc = 0; float version = 0; if (strstr(a->d_name, "pacemaker-") != a->d_name) { /* crm_trace("%s - wrong prefix", a->d_name); */ } else if (!crm_ends_with(a->d_name, ".rng")) { /* crm_trace("%s - wrong suffix", a->d_name); */ } else if (sscanf(a->d_name, "pacemaker-%f.rng", &version) == 0) { /* crm_trace("%s - wrong format", a->d_name); */ } else if (strcmp(a->d_name, "pacemaker-1.1.rng") == 0) { /* "-1.1" was used for what later became "-next" */ /* crm_trace("%s - hack", a->d_name); */ } else { /* crm_debug("%s - candidate", a->d_name); */ rc = 1; } return rc; } static int schema_sort(const struct dirent **a, const struct dirent **b) { int rc = 0; float a_version = 0.0; float b_version = 0.0; sscanf(a[0]->d_name, "pacemaker-%f.rng", &a_version); sscanf(b[0]->d_name, "pacemaker-%f.rng", &b_version); if (a_version > b_version) { rc = 1; } else if(a_version < b_version) { rc = -1; } /* crm_trace("%s (%f) vs. %s (%f) : %d", a[0]->d_name, a_version, b[0]->d_name, b_version, rc); */ return rc; } static void __xml_schema_add(int type, float version, const char *name, const char *location, const char *transform, int after_transform) { int last = xml_schema_max; xml_schema_max++; known_schemas = realloc_safe(known_schemas, xml_schema_max * sizeof(struct schema_s)); CRM_ASSERT(known_schemas != NULL); memset(known_schemas+last, 0, sizeof(struct schema_s)); known_schemas[last].type = type; known_schemas[last].after_transform = after_transform; if (version > 0.0) { known_schemas[last].version = version; known_schemas[last].name = crm_strdup_printf("pacemaker-%.1f", version); known_schemas[last].location = crm_strdup_printf("%s.rng", known_schemas[last].name); } else { char dummy[1024]; CRM_ASSERT(name); CRM_ASSERT(location); sscanf(name, "%[^-]-%f", dummy, &version); known_schemas[last].version = version; known_schemas[last].name = strdup(name); known_schemas[last].location = strdup(location); } if (transform) { known_schemas[last].transform = strdup(transform); } if (after_transform == 0) { after_transform = xml_schema_max; /* upgrade is a one-way */ } known_schemas[last].after_transform = after_transform; if (known_schemas[last].after_transform < 0) { crm_debug("Added supported schema %d: %s (%s)", last, known_schemas[last].name, known_schemas[last].location); } else if (known_schemas[last].transform) { crm_debug("Added supported schema %d: %s (%s upgrades to %d with %s)", last, known_schemas[last].name, known_schemas[last].location, known_schemas[last].after_transform, known_schemas[last].transform); } else { crm_debug("Added supported schema %d: %s (%s upgrades to %d)", last, known_schemas[last].name, known_schemas[last].location, known_schemas[last].after_transform); } } /*! * \internal * \brief Load pacemaker schemas into cache */ void crm_schema_init(void) { int lpc, max; const char *base = get_schema_root(); struct dirent **namelist = NULL; max = scandir(base, &namelist, schema_filter, schema_sort); __xml_schema_add(1, 0.0, "pacemaker-0.6", "crm.dtd", "upgrade06.xsl", 3); __xml_schema_add(1, 0.0, "transitional-0.6", "crm-transitional.dtd", "upgrade06.xsl", 3); __xml_schema_add(2, 0.0, "pacemaker-0.7", "pacemaker-1.0.rng", NULL, 0); if (max < 0) { crm_notice("scandir(%s) failed: %s (%d)", base, strerror(errno), errno); } else { for (lpc = 0; lpc < max; lpc++) { int next = 0; float version = 0.0; char *transform = NULL; sscanf(namelist[lpc]->d_name, "pacemaker-%f.rng", &version); if ((lpc + 1) < max) { float next_version = 0.0; sscanf(namelist[lpc+1]->d_name, "pacemaker-%f.rng", &next_version); if (floor(version) < floor(next_version)) { struct stat s; char *xslt = NULL; transform = crm_strdup_printf("upgrade-%.1f.xsl", version); xslt = get_schema_path(NULL, transform); if (stat(xslt, &s) != 0) { crm_err("Transform %s not found", xslt); free(xslt); __xml_schema_add(2, version, NULL, NULL, NULL, -1); break; } else { free(xslt); } } } else { next = -1; } __xml_schema_add(2, version, NULL, NULL, transform, next); free(namelist[lpc]); free(transform); } } /* 1.1 was the old name for -next */ __xml_schema_add(2, 0.0, "pacemaker-1.1", "pacemaker-next.rng", NULL, 0); __xml_schema_add(2, 0.0, "pacemaker-next", "pacemaker-next.rng", NULL, -1); __xml_schema_add(0, 0.0, "none", "N/A", NULL, -1); free(namelist); } static gboolean validate_with_dtd(xmlDocPtr doc, gboolean to_logs, const char *dtd_file) { gboolean valid = TRUE; xmlDtdPtr dtd = NULL; xmlValidCtxtPtr cvp = NULL; CRM_CHECK(doc != NULL, return FALSE); CRM_CHECK(dtd_file != NULL, return FALSE); dtd = xmlParseDTD(NULL, (const xmlChar *)dtd_file); if (dtd == NULL) { crm_err("Could not locate/parse DTD: %s", dtd_file); return TRUE; } cvp = xmlNewValidCtxt(); if (cvp) { if (to_logs) { cvp->userData = (void *)LOG_ERR; cvp->error = (xmlValidityErrorFunc) xml_log; cvp->warning = (xmlValidityWarningFunc) xml_log; } else { cvp->userData = (void *)stderr; cvp->error = (xmlValidityErrorFunc) fprintf; cvp->warning = (xmlValidityWarningFunc) fprintf; } if (!xmlValidateDtd(cvp, doc, dtd)) { valid = FALSE; } xmlFreeValidCtxt(cvp); } else { crm_err("Internal error: No valid context"); } xmlFreeDtd(dtd); return valid; } #if 0 static void relaxng_invalid_stderr(void *userData, xmlErrorPtr error) { /* Structure xmlError struct _xmlError { int domain : What part of the library raised this er int code : The error code, e.g. an xmlParserError char * message : human-readable informative error messag xmlErrorLevel level : how consequent is the error char * file : the filename int line : the line number if available char * str1 : extra string information char * str2 : extra string information char * str3 : extra string information int int1 : extra number information int int2 : column number of the error or 0 if N/A void * ctxt : the parser context if available void * node : the node in the tree } */ crm_err("Structured error: line=%d, level=%d %s", error->line, error->level, error->message); } #endif static gboolean validate_with_relaxng(xmlDocPtr doc, gboolean to_logs, const char *relaxng_file, relaxng_ctx_cache_t **cached_ctx) { int rc = 0; gboolean valid = TRUE; relaxng_ctx_cache_t *ctx = NULL; CRM_CHECK(doc != NULL, return FALSE); CRM_CHECK(relaxng_file != NULL, return FALSE); if (cached_ctx && *cached_ctx) { ctx = *cached_ctx; } else { crm_info("Creating RNG parser context"); ctx = calloc(1, sizeof(relaxng_ctx_cache_t)); xmlLoadExtDtdDefaultValue = 1; ctx->parser = xmlRelaxNGNewParserCtxt(relaxng_file); CRM_CHECK(ctx->parser != NULL, goto cleanup); if (to_logs) { xmlRelaxNGSetParserErrors(ctx->parser, (xmlRelaxNGValidityErrorFunc) xml_log, (xmlRelaxNGValidityWarningFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); } else { xmlRelaxNGSetParserErrors(ctx->parser, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } ctx->rng = xmlRelaxNGParse(ctx->parser); CRM_CHECK(ctx->rng != NULL, crm_err("Could not find/parse %s", relaxng_file); goto cleanup); ctx->valid = xmlRelaxNGNewValidCtxt(ctx->rng); CRM_CHECK(ctx->valid != NULL, goto cleanup); if (to_logs) { xmlRelaxNGSetValidErrors(ctx->valid, (xmlRelaxNGValidityErrorFunc) xml_log, (xmlRelaxNGValidityWarningFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); } else { xmlRelaxNGSetValidErrors(ctx->valid, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } } /* xmlRelaxNGSetValidStructuredErrors( */ /* valid, relaxng_invalid_stderr, valid); */ xmlLineNumbersDefault(1); rc = xmlRelaxNGValidateDoc(ctx->valid, doc); if (rc > 0) { valid = FALSE; } else if (rc < 0) { crm_err("Internal libxml error during validation"); } cleanup: if (cached_ctx) { *cached_ctx = ctx; } else { if (ctx->parser != NULL) { xmlRelaxNGFreeParserCtxt(ctx->parser); } if (ctx->valid != NULL) { xmlRelaxNGFreeValidCtxt(ctx->valid); } if (ctx->rng != NULL) { xmlRelaxNGFree(ctx->rng); } free(ctx); } return valid; } /*! * \internal * \brief Clean up global memory associated with XML schemas */ void crm_schema_cleanup(void) { int lpc; relaxng_ctx_cache_t *ctx = NULL; for (lpc = 0; lpc < xml_schema_max; lpc++) { switch (known_schemas[lpc].type) { case 0: /* None */ break; case 1: /* DTD - Not cached */ break; case 2: /* RNG - Cached */ ctx = (relaxng_ctx_cache_t *) known_schemas[lpc].cache; if (ctx == NULL) { break; } if (ctx->parser != NULL) { xmlRelaxNGFreeParserCtxt(ctx->parser); } if (ctx->valid != NULL) { xmlRelaxNGFreeValidCtxt(ctx->valid); } if (ctx->rng != NULL) { xmlRelaxNGFree(ctx->rng); } free(ctx); known_schemas[lpc].cache = NULL; break; default: break; } free(known_schemas[lpc].name); free(known_schemas[lpc].location); free(known_schemas[lpc].transform); } free(known_schemas); known_schemas = NULL; } static gboolean validate_with(xmlNode *xml, int method, gboolean to_logs) { xmlDocPtr doc = NULL; gboolean valid = FALSE; int type = 0; char *file = NULL; if (method < 0) { return FALSE; } type = known_schemas[method].type; if(type == 0) { return TRUE; } CRM_CHECK(xml != NULL, return FALSE); doc = getDocPtr(xml); file = get_schema_path(known_schemas[method].name, known_schemas[method].location); crm_trace("Validating with: %s (type=%d)", crm_str(file), type); switch (type) { case 1: valid = validate_with_dtd(doc, to_logs, file); break; case 2: valid = validate_with_relaxng(doc, to_logs, file, (relaxng_ctx_cache_t **) & (known_schemas[method].cache)); break; default: crm_err("Unknown validator type: %d", type); break; } free(file); return valid; } static void dump_file(const char *filename) { FILE *fp = NULL; int ch, line = 0; CRM_CHECK(filename != NULL, return); fp = fopen(filename, "r"); CRM_CHECK(fp != NULL, return); fprintf(stderr, "%4d ", ++line); do { ch = getc(fp); if (ch == EOF) { putc('\n', stderr); break; } else if (ch == '\n') { fprintf(stderr, "\n%4d ", ++line); } else { putc(ch, stderr); } } while (1); fclose(fp); } gboolean validate_xml_verbose(xmlNode *xml_blob) { int fd = 0; xmlDoc *doc = NULL; xmlNode *xml = NULL; gboolean rc = FALSE; char *filename = strdup(CRM_STATE_DIR "/cib-invalid.XXXXXX"); + CRM_CHECK(filename != NULL, return FALSE); + umask(S_IWGRP | S_IWOTH | S_IROTH); fd = mkstemp(filename); write_xml_fd(xml_blob, filename, fd, FALSE); dump_file(filename); doc = xmlParseFile(filename); xml = xmlDocGetRootElement(doc); rc = validate_xml(xml, NULL, FALSE); free_xml(xml); unlink(filename); free(filename); return rc; } gboolean validate_xml(xmlNode *xml_blob, const char *validation, gboolean to_logs) { int version = 0; if (validation == NULL) { validation = crm_element_value(xml_blob, XML_ATTR_VALIDATION); } if (validation == NULL) { int lpc = 0; bool valid = FALSE; validation = crm_element_value(xml_blob, "ignore-dtd"); if (crm_is_true(validation)) { /* Legacy compatibilty */ crm_xml_add(xml_blob, XML_ATTR_VALIDATION, "none"); return TRUE; } /* Work it out */ for (lpc = 0; lpc < xml_schema_max; lpc++) { if (validate_with(xml_blob, lpc, FALSE)) { valid = TRUE; crm_xml_add(xml_blob, XML_ATTR_VALIDATION, known_schemas[lpc].name); crm_info("XML validated against %s", known_schemas[lpc].name); if(known_schemas[lpc].after_transform == 0) { break; } } } return valid; } version = get_schema_version(validation); if (strcmp(validation, "none") == 0) { return TRUE; } else if (version < xml_schema_max) { return validate_with(xml_blob, version, to_logs); } crm_err("Unknown validator: %s", validation); return FALSE; } #if HAVE_LIBXSLT static xmlNode * apply_transformation(xmlNode *xml, const char *transform) { char *xform = NULL; xmlNode *out = NULL; xmlDocPtr res = NULL; xmlDocPtr doc = NULL; xsltStylesheet *xslt = NULL; CRM_CHECK(xml != NULL, return FALSE); doc = getDocPtr(xml); xform = get_schema_path(NULL, transform); xmlLoadExtDtdDefaultValue = 1; xmlSubstituteEntitiesDefault(1); xslt = xsltParseStylesheetFile((const xmlChar *)xform); CRM_CHECK(xslt != NULL, goto cleanup); res = xsltApplyStylesheet(xslt, doc, NULL); CRM_CHECK(res != NULL, goto cleanup); out = xmlDocGetRootElement(res); cleanup: if (xslt) { xsltFreeStylesheet(xslt); } free(xform); return out; } #endif const char * get_schema_name(int version) { if (version < 0 || version >= xml_schema_max) { return "unknown"; } return known_schemas[version].name; } int get_schema_version(const char *name) { int lpc = 0; if (name == NULL) { name = "none"; } for (; lpc < xml_schema_max; lpc++) { if (safe_str_eq(name, known_schemas[lpc].name)) { return lpc; } } return -1; } /* set which validation to use */ int update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, gboolean to_logs) { xmlNode *xml = NULL; char *value = NULL; int max_stable_schemas = xml_latest_schema_index(); int lpc = 0, match = -1, rc = pcmk_ok; int next = -1; /* -1 denotes "inactive" value */ CRM_CHECK(best != NULL, return -EINVAL); *best = 0; CRM_CHECK(xml_blob != NULL, return -EINVAL); CRM_CHECK(*xml_blob != NULL, return -EINVAL); xml = *xml_blob; value = crm_element_value_copy(xml, XML_ATTR_VALIDATION); if (value != NULL) { match = get_schema_version(value); lpc = match; if (lpc >= 0 && transform == FALSE) { *best = lpc++; } else if (lpc < 0) { crm_debug("Unknown validation type"); lpc = 0; } } if (match >= max_stable_schemas) { /* nothing to do */ free(value); *best = match; return pcmk_ok; } while (lpc <= max_stable_schemas) { crm_debug("Testing '%s' validation (%d of %d)", known_schemas[lpc].name ? known_schemas[lpc].name : "", lpc, max_stable_schemas); if (validate_with(xml, lpc, to_logs) == FALSE) { if (next != -1) { crm_info("Configuration not valid for schema: %s", known_schemas[lpc].name); next = -1; } else { crm_trace("%s validation failed", known_schemas[lpc].name ? known_schemas[lpc].name : ""); } if (*best) { /* we've satisfied the validation, no need to check further */ break; } rc = -pcmk_err_schema_validation; } else { if (next != -1) { crm_debug("Configuration valid for schema: %s", known_schemas[next].name); next = -1; } rc = pcmk_ok; } if (rc == pcmk_ok) { *best = lpc; } if (rc == pcmk_ok && transform) { xmlNode *upgrade = NULL; next = known_schemas[lpc].after_transform; if (next <= lpc) { /* There is no next version, or next would regress */ crm_trace("Stopping at %s", known_schemas[lpc].name); break; } else if (max > 0 && (lpc == max || next > max)) { crm_trace("Upgrade limit reached at %s (lpc=%d, next=%d, max=%d)", known_schemas[lpc].name, lpc, next, max); break; } else if (known_schemas[lpc].transform == NULL) { crm_debug("%s-style configuration is also valid for %s", known_schemas[lpc].name, known_schemas[next].name); lpc = next; } else { crm_debug("Upgrading %s-style configuration to %s with %s", known_schemas[lpc].name, known_schemas[next].name, known_schemas[lpc].transform ? known_schemas[lpc].transform : "no-op"); #if HAVE_LIBXSLT upgrade = apply_transformation(xml, known_schemas[lpc].transform); #endif if (upgrade == NULL) { crm_err("Transformation %s failed", known_schemas[lpc].transform); rc = -pcmk_err_transform_failed; } else if (validate_with(upgrade, next, to_logs)) { crm_info("Transformation %s successful", known_schemas[lpc].transform); lpc = next; *best = next; free_xml(xml); xml = upgrade; rc = pcmk_ok; } else { crm_err("Transformation %s did not produce a valid configuration", known_schemas[lpc].transform); crm_log_xml_info(upgrade, "transform:bad"); free_xml(upgrade); rc = -pcmk_err_schema_validation; } next = -1; } } if (transform == FALSE || rc != pcmk_ok) { /* we need some progress! */ lpc++; } } if (*best > match && *best) { crm_info("%s the configuration from %s to %s", transform?"Transformed":"Upgraded", value ? value : "", known_schemas[*best].name); crm_xml_add(xml, XML_ATTR_VALIDATION, known_schemas[*best].name); } *xml_blob = xml; free(value); return rc; } gboolean cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) { gboolean rc = TRUE; const char *value = crm_element_value(*xml, XML_ATTR_VALIDATION); char *const orig_value = strdup(value == NULL ? "(none)" : value); int version = get_schema_version(value); int orig_version = version; int min_version = xml_minimum_schema_index(); if (version < min_version) { xmlNode *converted = NULL; converted = copy_xml(*xml); update_validation(&converted, &version, 0, TRUE, to_logs); value = crm_element_value(converted, XML_ATTR_VALIDATION); if (version < min_version) { if (version < orig_version || orig_version == -1) { if (to_logs) { crm_config_err("Your current configuration %s could not" " validate with any schema in range [%s, %s]," " cannot upgrade to %s.", orig_value, get_schema_name(orig_version), xml_latest_schema(), get_schema_name(min_version)); } else { fprintf(stderr, "Your current configuration %s could not" " validate with any schema in range [%s, %s]," " cannot upgrade to %s.\n", orig_value, get_schema_name(orig_version), xml_latest_schema(), get_schema_name(min_version)); } } else if (to_logs) { crm_config_err("Your current configuration could only be upgraded to %s... " "the minimum requirement is %s.", crm_str(value), get_schema_name(min_version)); } else { fprintf(stderr, "Your current configuration could only be upgraded to %s... " "the minimum requirement is %s.\n", crm_str(value), get_schema_name(min_version)); } free_xml(converted); converted = NULL; rc = FALSE; } else { free_xml(*xml); *xml = converted; if (version < xml_latest_schema_index()) { crm_config_warn("Your configuration was internally updated to %s... " "which is acceptable but not the most recent", get_schema_name(version)); } else if (to_logs) { crm_info("Your configuration was internally updated to the latest version (%s)", get_schema_name(version)); } } } else if (version >= get_schema_version("none")) { if (to_logs) { crm_config_warn("Configuration validation is currently disabled." " It is highly encouraged and prevents many common cluster issues."); } else { fprintf(stderr, "Configuration validation is currently disabled." " It is highly encouraged and prevents many common cluster issues.\n"); } } if (best_version) { *best_version = version; } free(orig_value); return rc; } diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am index a4f3558b44..59aa3836ec 100644 --- a/lib/pengine/Makefile.am +++ b/lib/pengine/Makefile.am @@ -1,44 +1,44 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common ## libraries lib_LTLIBRARIES = libpe_rules.la libpe_status.la ## SOURCES noinst_HEADERS = unpack.h variant.h libpe_rules_la_LDFLAGS = -version-info 3:0:1 libpe_rules_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpe_rules_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la libpe_rules_la_SOURCES = rules.c common.c libpe_status_la_LDFLAGS = -version-info 12:0:2 libpe_status_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpe_status_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpe_status_la_LIBADD = @CURSESLIBS@ $(top_builddir)/lib/common/libcrmcommon.la libpe_status_la_SOURCES = status.c unpack.c utils.c complex.c native.c \ - group.c clone.c rules.c common.c remote.c + group.c clone.c rules.c common.c failcounts.c remote.c clean-generic: rm -f *.log *.debug *~ diff --git a/lib/pengine/failcounts.c b/lib/pengine/failcounts.c new file mode 100644 index 0000000000..6579228ecd --- /dev/null +++ b/lib/pengine/failcounts.c @@ -0,0 +1,303 @@ +/* + * Copyright (C) 2008-2017 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include + +#include +#include +#include +#include +#include + +struct fail_search { + resource_t *rsc; + pe_working_set_t *data_set; + int count; + long long last; + char *key; +}; + +static void +get_failcount_by_prefix(gpointer key_p, gpointer value, gpointer user_data) +{ + struct fail_search *search = user_data; + const char *attr_id = key_p; + const char *match = strstr(attr_id, search->key); + resource_t *parent = NULL; + + if (match == NULL) { + return; + } + + /* we are only incrementing the failcounts here if the rsc + * that matches our prefix has the same uber parent as the rsc we're + * calculating the failcounts for. This prevents false positive matches + * where unrelated resources may have similar prefixes in their names. + * + * search->rsc is already set to be the uber parent. */ + parent = uber_parent(pe_find_resource(search->data_set->resources, match)); + if (parent == NULL || parent != search->rsc) { + return; + } + if (strstr(attr_id, CRM_LAST_FAILURE_PREFIX "-") == attr_id) { + search->last = crm_int_helper(value, NULL); + + } else if (strstr(attr_id, CRM_FAIL_COUNT_PREFIX "-") == attr_id) { + search->count += char2score(value); + } +} + +int +get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, + pe_working_set_t *data_set) +{ + return get_failcount_full(node, rsc, last_failure, TRUE, NULL, data_set); +} + +static gboolean +is_matched_failure(const char *rsc_id, xmlNode *conf_op_xml, + xmlNode *lrm_op_xml) +{ + gboolean matched = FALSE; + const char *conf_op_name = NULL; + int conf_op_interval = 0; + const char *lrm_op_task = NULL; + int lrm_op_interval = 0; + const char *lrm_op_id = NULL; + char *last_failure_key = NULL; + + if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) { + return FALSE; + } + + conf_op_name = crm_element_value(conf_op_xml, "name"); + conf_op_interval = crm_get_msec(crm_element_value(conf_op_xml, "interval")); + lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK); + crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_INTERVAL, &lrm_op_interval); + + if (safe_str_eq(conf_op_name, lrm_op_task) == FALSE + || conf_op_interval != lrm_op_interval) { + return FALSE; + } + + lrm_op_id = ID(lrm_op_xml); + last_failure_key = generate_op_key(rsc_id, "last_failure", 0); + + if (safe_str_eq(last_failure_key, lrm_op_id)) { + matched = TRUE; + + } else { + char *expected_op_key = generate_op_key(rsc_id, conf_op_name, + conf_op_interval); + + if (safe_str_eq(expected_op_key, lrm_op_id)) { + int rc = 0; + int target_rc = get_target_rc(lrm_op_xml); + + crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc); + if (rc != target_rc) { + matched = TRUE; + } + } + free(expected_op_key); + } + + free(last_failure_key); + return matched; +} + +static gboolean +block_failure(node_t *node, resource_t *rsc, xmlNode *xml_op, + pe_working_set_t *data_set) +{ + char *xml_name = clone_strip(rsc->id); + char *xpath = crm_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", + xml_name); + xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); + gboolean should_block = FALSE; + + free(xpath); + + if (xpathObj) { + int max = numXpathResults(xpathObj); + int lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *pref = getXpathResult(xpathObj, lpc); + + if (xml_op) { + should_block = is_matched_failure(xml_name, pref, xml_op); + if (should_block) { + break; + } + + } else { + const char *conf_op_name = NULL; + int conf_op_interval = 0; + char *lrm_op_xpath = NULL; + xmlXPathObject *lrm_op_xpathObj = NULL; + + conf_op_name = crm_element_value(pref, "name"); + conf_op_interval = crm_get_msec(crm_element_value(pref, "interval")); + + lrm_op_xpath = crm_strdup_printf("//node_state[@uname='%s']" + "//lrm_resource[@id='%s']" + "/lrm_rsc_op[@operation='%s'][@interval='%d']", + node->details->uname, xml_name, + conf_op_name, conf_op_interval); + lrm_op_xpathObj = xpath_search(data_set->input, lrm_op_xpath); + + free(lrm_op_xpath); + + if (lrm_op_xpathObj) { + int max2 = numXpathResults(lrm_op_xpathObj); + int lpc2 = 0; + + for (lpc2 = 0; lpc2 < max2; lpc2++) { + xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj, + lpc2); + + should_block = is_matched_failure(xml_name, pref, + lrm_op_xml); + if (should_block) { + break; + } + } + } + freeXpathObject(lrm_op_xpathObj); + + if (should_block) { + break; + } + } + } + } + + free(xml_name); + freeXpathObject(xpathObj); + + return should_block; +} + +int +get_failcount_full(node_t *node, resource_t *rsc, time_t *last_failure, + bool effective, xmlNode *xml_op, pe_working_set_t *data_set) +{ + char *key = NULL; + const char *value = NULL; + struct fail_search search = { rsc, data_set, 0, 0, NULL }; + + /* Optimize the "normal" case */ + key = crm_failcount_name(rsc->clone_name? rsc->clone_name : rsc->id); + value = g_hash_table_lookup(node->details->attrs, key); + search.count = char2score(value); + crm_trace("%s = %s", key, value); + free(key); + + if (value) { + key = crm_lastfailure_name(rsc->clone_name? rsc->clone_name : rsc->id); + value = g_hash_table_lookup(node->details->attrs, key); + search.last = crm_int_helper(value, NULL); + free(key); + + /* This block is still relevant once we omit anonymous instance numbers + * because stopped clones won't have clone_name set + */ + } else if (is_not_set(rsc->flags, pe_rsc_unique)) { + search.rsc = uber_parent(rsc); + search.key = clone_strip(rsc->id); + + g_hash_table_foreach(node->details->attrs, get_failcount_by_prefix, + &search); + free(search.key); + search.key = NULL; + } + + if (search.count != 0 && search.last != 0 && last_failure) { + *last_failure = search.last; + } + + if (search.count && rsc->failure_timeout) { + /* Never time-out if blocking failures are configured */ + if (block_failure(node, rsc, xml_op, data_set)) { + pe_warn("Setting %s.failure-timeout=%d conflicts with on-fail=block: ignoring timeout", + rsc->id, rsc->failure_timeout); + rsc->failure_timeout = 0; +#if 0 + /* A good idea? */ + } else if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { + /* In this case, stop.on-fail defaults to block in unpack_operation() */ + rsc->failure_timeout = 0; +#endif + } + } + + if (effective && (search.count != 0) && (search.last != 0) + && rsc->failure_timeout) { + + if (search.last > 0) { + time_t now = get_effective_time(data_set); + + if (now > (search.last + rsc->failure_timeout)) { + crm_debug("Failcount for %s on %s has expired (limit was %ds)", + search.rsc->id, node->details->uname, + rsc->failure_timeout); + search.count = 0; + } + } + } + + if (search.count != 0) { + char *score = score2char(search.count); + + crm_info("%s has failed %s times on %s", + search.rsc->id, score, node->details->uname); + free(score); + } + + return search.count; +} + +/* If it's a resource container, get its failcount plus all the failcounts of + * the resources within it + */ +int +get_failcount_all(node_t *node, resource_t *rsc, time_t *last_failure, + pe_working_set_t *data_set) +{ + int failcount_all = 0; + + failcount_all = get_failcount(node, rsc, last_failure, data_set); + + if (rsc->fillers) { + GListPtr gIter = NULL; + + for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { + resource_t *filler = (resource_t *) gIter->data; + time_t filler_last_failure = 0; + + failcount_all += get_failcount(node, filler, &filler_last_failure, + data_set); + + if (last_failure && filler_last_failure > *last_failure) { + *last_failure = filler_last_failure; + } + } + + if (failcount_all != 0) { + char *score = score2char(failcount_all); + + crm_info("Container %s and the resources within it have failed %s times on %s", + rsc->id, score, node->details->uname); + free(score); + } + } + + return failcount_all; +} diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 6be9bb1735..2b53999f9d 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,2175 +1,1904 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include pe_working_set_t *pe_dataset = NULL; extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set); static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled); /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return TRUE if node can be fenced, FALSE otherwise * * \note This function should only be called for cluster nodes and baremetal * remote nodes; guest nodes are fenced by stopping their container * resource, so fence execution requirements do not apply to them. */ bool pe_can_fence(pe_working_set_t * data_set, node_t *node) { if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { return FALSE; /* Turned off */ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { return FALSE; /* No devices */ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { return TRUE; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return TRUE; } else if(node == NULL) { return FALSE; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return TRUE; } crm_trace("Cannot fence %s", node->details->uname); return FALSE; } node_t * node_copy(const node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); new_node = calloc(1, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = merge_weights(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } GHashTable * node_hash_from_list(GListPtr list) { GListPtr gIter = list; GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *n = node_copy(node); g_hash_table_insert(result, (gpointer) n->details->id, n); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; GListPtr gIter = list1; for (; gIter != NULL; gIter = gIter->next) { node_t *new_node = NULL; node_t *this_node = (node_t *) gIter->data; if (filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if (reset) { new_node->weight = 0; } if (new_node != NULL) { result = g_list_prepend(result, new_node); } } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { const node_t *node_a = a; const node_t *node_b = b; return strcmp(node_a->details->uname, node_b->details->uname); } void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes) { GHashTable *hash = nodes; GHashTableIter iter; node_t *node = NULL; if (rsc) { hash = rsc->allowed_nodes; } if (rsc && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't show the allocation scores for orphans */ return; } if (level == 0) { char score[128]; int len = sizeof(score); /* For now we want this in sorted order to keep the regression tests happy */ GListPtr gIter = NULL; GListPtr list = g_hash_table_get_values(hash); list = g_list_sort(list, sort_node_uname); gIter = list; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } } g_list_free(list); } else if (hash) { char score[128]; int len = sizeof(score); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { do_crm_log_alias(LOG_TRACE, file, function, line, "%s: %s allocation score on %s: %s", comment, rsc->id, node->details->uname, score); } else { do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment, node->details->uname, score); } } } if (rsc && rsc->children) { GListPtr gIter = NULL; gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; dump_node_scores_worker(level, file, function, line, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; int len = 0; char *new_text = NULL; len = strlen(*dump_text) + strlen(" ") + strlen(key) + strlen("=") + strlen(value) + 1; new_text = calloc(1, len); sprintf(new_text, "%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(node->details->uname) + strlen(" capacity:") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(rsc->id) + strlen(" utilization on ") + strlen(node->details->uname) + strlen(":") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, free(key); return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } else if(save_action) { #if 0 action = g_hash_table_lookup(data_set->singletons, key); #else /* More expensive but takes 'node' into account */ possible_matches = find_actions(data_set->actions, key, on_node); #endif } if(data_set->singletons == NULL) { data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); } if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); pe_rsc_trace(rsc, "Found existing action (%d) %s for %s on %s", action->id, task, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { pe_rsc_trace(rsc, "Creating%s action %d: %s for %s on %s %d", optional ? "" : " mandatory", data_set->action_id, key, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", optional); } action = calloc(1, sizeof(action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = node_copy(on_node); } action->uuid = strdup(key); pe_set_action_bit(action, pe_action_runnable); if (optional) { pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); } else { pe_clear_action_bit(action, pe_action_optional); pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); } /* Implied by calloc()... action->actions_before = NULL; action->actions_after = NULL; action->pseudo = FALSE; action->dumped = FALSE; action->processed = FALSE; action->seen_count = 0; */ action->extra = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); action->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); if(rsc == NULL) { g_hash_table_insert(data_set->singletons, action->uuid, action); } } if (rsc != NULL) { action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); unpack_operation(action, action->op_entry, rsc->container, data_set); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { pe_rsc_trace(rsc, "Action %d created", action->id); } } if (optional == FALSE) { pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); pe_clear_action_bit(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_TRACE; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_set_action_bit(action, pe_action_have_node_attrs); unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid); pe_clear_action_bit(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); /* action->runnable = FALSE; */ } else if (action->node->details->online == FALSE) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { pe_fence_node(data_set, action->node, "because node is unclean"); } } else if (action->node->details->pending) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid); pe_set_action_bit(action, pe_action_runnable); #if 0 /* * No point checking this * - if we don't have quorum we can't stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_stop) { pe_clear_action_bit(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role)); if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) { pe_clear_action_bit(action, pe_action_runnable); pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { pe_rsc_trace(rsc, "Action %s is runnable", action->uuid); pe_set_action_bit(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } static const char * unpack_operation_on_fail(action_t * action) { const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) { crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id); return NULL; } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) { /* demote on_fail defaults to master monitor value if present */ xmlNode *operation = NULL; const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval = NULL; const char *enabled = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = __xml_first_child(action->rsc->ops_xml); operation && !value; operation = __xml_next_element(operation)) { if (!crm_str_eq((const char *)operation->name, "op", TRUE)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); enabled = crm_element_value(operation, "enabled"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (enabled && !crm_is_true(enabled)) { continue; } else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) { continue; } else if (crm_get_interval(interval) <= 0) { continue; } value = on_fail; } } return value; } static xmlNode * find_min_interval_mon(resource_t * rsc, gboolean include_disabled) { int number = 0; int min_interval = -1; const char *name = NULL; const char *value = NULL; const char *interval = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } if (safe_str_neq(name, RSC_STATUS)) { continue; } number = crm_get_interval(interval); if (number < 0) { continue; } if (min_interval < 0 || number < min_interval) { min_interval = number; op = operation; } } } return op; } void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set) { int value_i = 0; unsigned long long interval = 0; unsigned long long start_delay = 0; char *value_ms = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); if (xml_obj) { xmlAttrPtr xIter = NULL; for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); g_hash_table_remove(action->meta, "id"); field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { interval = crm_get_interval(value); if (interval > 0) { value_ms = crm_itoa(interval); g_hash_table_replace(action->meta, strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } /* Begin compatibility code ("requires" set on start action not resource) */ value = g_hash_table_lookup(action->meta, "requires"); if (safe_str_neq(action->task, RSC_START) && safe_str_neq(action->task, RSC_PROMOTE)) { action->needs = rsc_req_nothing; value = "nothing (not start/promote)"; } else if (safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if (safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if (safe_str_eq(value, "unfencing")) { action->needs = rsc_req_stonith; set_bit(action->rsc->flags, pe_rsc_needs_unfencing); if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires unfencing but fencing is disabled", action->rsc->id); } } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires fencing but fencing is disabled", action->rsc->id); } /* End compatibility code */ } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing (resource)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum (resource)"; } else { action->needs = rsc_req_nothing; value = "nothing (resource)"; } pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->task, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (safe_str_eq(value, "block")) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); } else if (safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (safe_str_eq(value, "standby")) { action->on_fail = action_fail_standby; value = "node standby"; } else if (safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (safe_str_eq(value, "restart-container")) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* for baremetal remote nodes, ensure that any failure that results in * dropping an active connection to a remote node results in fencing of * the remote node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (value == NULL && is_rsc_baremetal_remote_node(action->rsc, data_set) && !(safe_str_eq(action->task, CRMD_ACTION_STATUS) && interval == 0) && (safe_str_neq(action->task, CRMD_ACTION_START))) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence baremetal remote node (default)"; } else { value = "recover baremetal remote node connection (default)"; } if (action->rsc->remote_reconnect_interval) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } else if (interval > 0 && g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN)) { crm_time_t *origin = NULL; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); origin = crm_time_new(value); if (origin == NULL) { crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s", ID(xml_obj), value); } else { crm_time_t *delay = NULL; int rc = crm_time_compare(origin, data_set->now); long long delay_s = 0; int interval_s = (interval / 1000); crm_trace("Origin: %s, interval: %d", value, interval_s); /* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */ while(rc > 0) { crm_time_add_seconds(origin, -interval_s); rc = crm_time_compare(origin, data_set->now); } /* Now find the first "multiple" that occurs after 'now' */ while (rc < 0) { crm_time_add_seconds(origin, interval_s); rc = crm_time_compare(origin, data_set->now); } delay = crm_time_calculate_duration(origin, data_set->now); crm_time_log(LOG_TRACE, "origin", origin, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "now", data_set->now, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration); delay_s = crm_time_get_seconds(delay); CRM_CHECK(delay_s >= 0, delay_s = 0); start_delay = delay_s * 1000; crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj)); g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); crm_time_free(origin); crm_time_free(delay); } } field = XML_ATTR_TIMEOUT; value = g_hash_table_lookup(action->meta, field); if (value == NULL && xml_obj == NULL && safe_str_eq(action->task, RSC_STATUS) && interval == 0) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); pe_rsc_trace(action->rsc, "\t%s uses the timeout value '%s' from the minimum interval monitor", action->uuid, value); } } if (value == NULL) { value = pe_pref(data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } static xmlNode * find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled) { unsigned long long number = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } number = crm_get_interval(interval); match_key = generate_op_key(rsc->id, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = generate_op_key(rsc->clone_name, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = generate_op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = generate_op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } void print_node(const char *pre_text, node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } CRM_ASSERT(node->details); crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details) { long options = pe_print_log | pe_print_pending; if (rsc == NULL) { do_crm_log(log_level - 1, "%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* action_warpper_t* */ g_list_free_full(action->actions_after, free); /* action_warpper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } free(action->cancel_task); free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if (value == NULL) { /* skip */ } else if (safe_str_eq(value, "0")) { /* skip */ } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; break; default: break; } } return task; } action_t * find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (uuid != NULL && safe_str_neq(uuid, action->uuid)) { continue; } else if (task != NULL && safe_str_neq(task, action->task)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (safe_str_neq(key, action->uuid)) { crm_trace("%s does not match action %s", key, action->uuid); continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, on_node->details->uname); action->node = node_copy(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { crm_trace("Action %s on %s matches", key, on_node->details->uname); result = g_list_prepend(result, action); } else { crm_trace("Action %s on node %s does not match requested node %s", key, action->node->details->uname, on_node->details->uname); } } return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { crm_trace("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if (on_node == NULL || action->node == NULL) { crm_trace("on_node=%p, action->node=%p", on_node, action->node); continue; } else if (safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_prepend(result, action); } crm_trace("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); } return result; } static void resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag) { node_t *match = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = node_copy(node); match->weight = merge_weights(score, node->weight); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node_iter = (node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); if (safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_const_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_const_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ int last_a = -1; int last_b = -1; crm_element_value_const_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_const_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %d vs %d", last_a, last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; int dummy = -1; const char *a_magic = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if(!decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic a"); } if(!decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (ie. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } -struct fail_search { - resource_t *rsc; - pe_working_set_t * data_set; - - int count; - long long last; - char *key; -}; - -static void -get_failcount_by_prefix(gpointer key_p, gpointer value, gpointer user_data) -{ - struct fail_search *search = user_data; - const char *attr_id = key_p; - const char *match = strstr(attr_id, search->key); - resource_t *parent = NULL; - - if (match == NULL) { - return; - } - - /* we are only incrementing the failcounts here if the rsc - * that matches our prefix has the same uber parent as the rsc we're - * calculating the failcounts for. This prevents false positive matches - * where unrelated resources may have similar prefixes in their names. - * - * search->rsc is already set to be the uber parent. */ - parent = uber_parent(pe_find_resource(search->data_set->resources, match)); - if (parent == NULL || parent != search->rsc) { - return; - } - if (strstr(attr_id, "last-failure-") == attr_id) { - search->last = crm_int_helper(value, NULL); - - } else if (strstr(attr_id, "fail-count-") == attr_id) { - search->count += char2score(value); - } -} - -int -get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) -{ - return get_failcount_full(node, rsc, last_failure, TRUE, NULL, data_set); -} - -static gboolean -is_matched_failure(const char * rsc_id, xmlNode * conf_op_xml, xmlNode * lrm_op_xml) -{ - gboolean matched = FALSE; - const char *conf_op_name = NULL; - int conf_op_interval = 0; - const char *lrm_op_task = NULL; - int lrm_op_interval = 0; - const char *lrm_op_id = NULL; - char *last_failure_key = NULL; - - if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) { - return FALSE; - } - - conf_op_name = crm_element_value(conf_op_xml, "name"); - conf_op_interval = crm_get_msec(crm_element_value(conf_op_xml, "interval")); - lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK); - crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_INTERVAL, &lrm_op_interval); - - if (safe_str_eq(conf_op_name, lrm_op_task) == FALSE - || conf_op_interval != lrm_op_interval) { - return FALSE; - } - - lrm_op_id = ID(lrm_op_xml); - last_failure_key = generate_op_key(rsc_id, "last_failure", 0); - - if (safe_str_eq(last_failure_key, lrm_op_id)) { - matched = TRUE; - - } else { - char *expected_op_key = generate_op_key(rsc_id, conf_op_name, conf_op_interval); - - if (safe_str_eq(expected_op_key, lrm_op_id)) { - int rc = 0; - int target_rc = get_target_rc(lrm_op_xml); - - crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc); - if (rc != target_rc) { - matched = TRUE; - } - } - free(expected_op_key); - } - - free(last_failure_key); - return matched; -} - -static gboolean -block_failure(node_t * node, resource_t * rsc, xmlNode * xml_op, pe_working_set_t * data_set) -{ - char *xml_name = clone_strip(rsc->id); - char *xpath = crm_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", xml_name); - xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); - gboolean should_block = FALSE; - - free(xpath); - - if (xpathObj) { - int max = numXpathResults(xpathObj); - int lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *pref = getXpathResult(xpathObj, lpc); - - if (xml_op) { - should_block = is_matched_failure(xml_name, pref, xml_op); - if (should_block) { - break; - } - - } else { - const char *conf_op_name = NULL; - int conf_op_interval = 0; - char *lrm_op_xpath = NULL; - xmlXPathObject *lrm_op_xpathObj = NULL; - - conf_op_name = crm_element_value(pref, "name"); - conf_op_interval = crm_get_msec(crm_element_value(pref, "interval")); - - lrm_op_xpath = crm_strdup_printf("//node_state[@uname='%s']" - "//lrm_resource[@id='%s']" - "/lrm_rsc_op[@operation='%s'][@interval='%d']", - node->details->uname, xml_name, - conf_op_name, conf_op_interval); - lrm_op_xpathObj = xpath_search(data_set->input, lrm_op_xpath); - - free(lrm_op_xpath); - - if (lrm_op_xpathObj) { - int max2 = numXpathResults(lrm_op_xpathObj); - int lpc2 = 0; - - for (lpc2 = 0; lpc2 < max2; lpc2++) { - xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj, lpc2); - - should_block = is_matched_failure(xml_name, pref, lrm_op_xml); - if (should_block) { - break; - } - } - } - freeXpathObject(lrm_op_xpathObj); - - if (should_block) { - break; - } - } - } - } - - free(xml_name); - freeXpathObject(xpathObj); - - return should_block; -} - -int -get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, - bool effective, xmlNode * xml_op, pe_working_set_t * data_set) -{ - char *key = NULL; - const char *value = NULL; - struct fail_search search = { rsc, data_set, 0, 0, NULL }; - - /* Optimize the "normal" case */ - key = crm_concat("fail-count", rsc->clone_name ? rsc->clone_name : rsc->id, '-'); - value = g_hash_table_lookup(node->details->attrs, key); - search.count = char2score(value); - crm_trace("%s = %s", key, value); - free(key); - - if (value) { - key = crm_concat("last-failure", rsc->clone_name ? rsc->clone_name : rsc->id, '-'); - value = g_hash_table_lookup(node->details->attrs, key); - search.last = crm_int_helper(value, NULL); - free(key); - - /* This block is still relevant once we omit anonymous instance numbers - * because stopped clones won't have clone_name set - */ - } else if (is_not_set(rsc->flags, pe_rsc_unique)) { - search.rsc = uber_parent(rsc); - search.key = clone_strip(rsc->id); - - g_hash_table_foreach(node->details->attrs, get_failcount_by_prefix, &search); - free(search.key); - search.key = NULL; - } - - if (search.count != 0 && search.last != 0 && last_failure) { - *last_failure = search.last; - } - - if(search.count && rsc->failure_timeout) { - /* Never time-out if blocking failures are configured */ - if (block_failure(node, rsc, xml_op, data_set)) { - pe_warn("Setting %s.failure-timeout=%d conflicts with on-fail=block: ignoring timeout", rsc->id, rsc->failure_timeout); - rsc->failure_timeout = 0; -#if 0 - /* A good idea? */ - } else if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { - /* In this case, stop.on-fail defaults to block in unpack_operation() */ - rsc->failure_timeout = 0; -#endif - } - } - - if (effective && search.count != 0 && search.last != 0 && rsc->failure_timeout) { - if (search.last > 0) { - time_t now = get_effective_time(data_set); - - if (now > (search.last + rsc->failure_timeout)) { - crm_debug("Failcount for %s on %s has expired (limit was %ds)", - search.rsc->id, node->details->uname, rsc->failure_timeout); - search.count = 0; - } - } - } - - if (search.count != 0) { - char *score = score2char(search.count); - - crm_info("%s has failed %s times on %s", search.rsc->id, score, node->details->uname); - free(score); - } - - return search.count; -} - -/* If it's a resource container, get its failcount plus all the failcounts of the resources within it */ -int -get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) -{ - int failcount_all = 0; - - failcount_all = get_failcount(node, rsc, last_failure, data_set); - - if (rsc->fillers) { - GListPtr gIter = NULL; - - for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { - resource_t *filler = (resource_t *) gIter->data; - time_t filler_last_failure = 0; - - failcount_all += get_failcount(node, filler, &filler_last_failure, data_set); - - if (last_failure && filler_last_failure > *last_failure) { - *last_failure = filler_last_failure; - } - } - - if (failcount_all != 0) { - char *score = score2char(failcount_all); - - crm_info("Container %s and the resources within it have failed %s times on %s", - rsc->id, score, node->details->uname); - free(score); - } - } - - return failcount_all; -} - gboolean get_target_role(resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (value == NULL || safe_str_eq("started", value) || safe_str_eq("default", value)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (uber_parent(rsc)->variant == pe_master) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { crm_config_err("%s is not part of a master/slave resource, a %s of '%s' makes no sense", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... its happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *after = (action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { action_t *op = NULL; if(data_set->singletons) { op = g_hash_table_lookup(data_set->singletons, name); } if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_pseudo); set_bit(op->flags, pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { ticket_t *ticket = NULL; if (ticket_id == NULL || strlen(ticket_id) == 0) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } static void filter_parameters(xmlNode * param_set, const char *param_string, bool need_present) { int len = 0; char *name = NULL; char *match = NULL; if (param_set == NULL) { return; } if (param_set) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; xIter = xIter->next; name = NULL; len = strlen(prop_name) + 3; name = malloc(len); if(name) { sprintf(name, " %s ", prop_name); name[len - 1] = 0; match = strstr(param_string, name); } if (need_present && match == NULL) { crm_trace("%s not found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } else if (need_present == FALSE && match) { crm_trace("%s found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } free(name); } } } op_digest_cache_t * rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; GHashTable *local_rsc_params = NULL; xmlNode *local_versioned_params = NULL; action_t *action = NULL; char *key = NULL; int interval = 0; const char *op_id = ID(xml_op); const char *interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_all; const char *digest_restart; const char *secure_list; const char *restart_list; const char *op_version; data = g_hash_table_lookup(node->details->digest_cache, op_id); if (data) { return data; } data = calloc(1, sizeof(op_digest_cache_t)); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); /* key is freed in custom_action */ interval = crm_parse_int(interval_s, "0"); key = generate_op_key(rsc->id, task, interval); action = custom_action(rsc, key, task, node, TRUE, FALSE, data_set); key = NULL; local_rsc_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(local_rsc_params, rsc, node, data_set); local_versioned_params = create_xml_node(NULL, XML_TAG_VER_ATTRS); pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set); data->params_all = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); g_hash_table_foreach(action->extra, hash2field, data->params_all); g_hash_table_foreach(rsc->parameters, hash2field, data->params_all); g_hash_table_foreach(action->meta, hash2metafield, data->params_all); filter_action_parameters(data->params_all, op_version); crm_summarize_versioned_params(data->params_all, rsc->versioned_parameters); crm_summarize_versioned_params(data->params_all, local_versioned_params); data->digest_all_calc = calculate_operation_digest(data->params_all, op_version); if (secure_list && is_set(data_set->flags, pe_flag_sanitized)) { data->params_secure = copy_xml(data->params_all); if (secure_list) { filter_parameters(data->params_secure, secure_list, FALSE); } data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version); } if (digest_restart) { data->params_restart = copy_xml(data->params_all); if (restart_list) { filter_parameters(data->params_restart, restart_list, TRUE); } data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version); } data->rc = RSC_DIGEST_MATCH; if (digest_restart && strcmp(data->digest_restart_calc, digest_restart) != 0) { data->rc = RSC_DIGEST_RESTART; } else if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strcmp(digest_all, data->digest_all_calc) != 0) { data->rc = RSC_DIGEST_ALL; } g_hash_table_insert(node->details->digest_cache, strdup(op_id), data); g_hash_table_destroy(local_rsc_params); free_xml(local_versioned_params); pe_free_action(action); return data; } const char *rsc_printable_id(resource_t *rsc) { if (is_not_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } void clear_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; clear_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; clear_bit_recursive(child_rsc, flag); } } void set_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; set_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; set_bit_recursive(child_rsc, flag); } } action_t * pe_fence_op(node_t * node, const char *op, bool optional, pe_working_set_t * data_set) { char *key = NULL; action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op); if(data_set->singletons) { stonith_op = g_hash_table_lookup(data_set->singletons, key); } if(stonith_op == NULL) { stonith_op = custom_action(NULL, key, CRM_OP_FENCE, node, optional, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); } else { free(key); } if(optional == FALSE) { crm_trace("%s is no longer optional", stonith_op->uuid); pe_clear_action_bit(stonith_op, pe_action_optional); } return stonith_op; } void trigger_unfencing( resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set) { if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { action_t *unfence = pe_fence_op(node, "on", FALSE, data_set); crm_notice("Unfencing %s: %s", node->details->uname, reason); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { tag_t *tag = NULL; GListPtr gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (crm_str_eq(existing_ref, obj_ref, TRUE)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } diff --git a/tools/crm_failcount b/tools/crm_failcount index ed697e961a..872a1f19b2 100755 --- a/tools/crm_failcount +++ b/tools/crm_failcount @@ -1,62 +1,118 @@ #!/bin/bash -resource="" -options="" -target=`crm_node -n` - -TEMP=`getopt -o DGQVN:U:v:i:l:r: --long help,version,resource-id:,node:,uname:,attr-value:,delete-attr,get-value,attr-id:,lifetime:,quiet \ - -n 'crm_failcount' -- "$@"` - -if [ $? != 0 ] ; then echo "crm_failcount - A convenience wrapper for crm_attribute"; echo ""; crm_attribute -?; exit 1 ; fi - -# Note the quotes around `$TEMP': they are essential! -eval set -- "$TEMP" - -function show_help() { - echo "crm_failcount - A convenience wrapper for crm_attribute"; - echo ""; - echo "Set, update or remove the failcount for the specified resource on the named node"; - echo ""; - echo "Usage: crm_failcount -r resource_name command [options]"; - echo "Options:" - echo " --help This text" - echo " --version Version information" - echo " -V, --verbose Increase debug output" - echo " -q, --quiet Print only the value on stdout" - echo "" - echo " -r, --resource-id=value The resource to update." - echo "" - echo "Commands:" - echo " -G, --query Query the current value of the attribute/option" - echo " -v, --update=value Update the value of the attribute/option" - echo " -D, --delete Delete the attribute/option" - echo "" - echo "Additional Options:" - echo " -N, --node=value Set an attribute for the named node (instead of the current one)." - echo " -l, --lifetime=value Until when should the setting take affect." - echo " Valid values: reboot, forever" - echo " -i, --id=value (Advanced) The ID used to identify the attribute" +USAGE_TEXT="Usage: crm_failcount [] +Common options: + --help Display this text, then exit + --version Display version information, then exit + -V, --verbose Specify multiple times to increase debug output + -q, --quiet Print only the value (if querying) + +Commands: + -G, --query Query the current value of the resource's fail count + -D, --delete Delete resource's recorded failures + +Additional Options: + -r, --resource=value Name of the resource to use (required) + -N, --node=value Set an attribute for the named node (instead of the current one)" + + +HELP_TEXT="crm_failcount - Query or delete resource fail counts + +crm_failcount is a convenience wrapper for crm_attribute (if querying) +and crm_resource --cleanup (if deleting). + +$USAGE_TEXT" + + +exit_usage() { + if [ $# -gt 0 ]; then + echo "error: $@" >&2 + fi + echo + echo "$USAGE_TEXT" + exit 1 } +warn() { + echo "warning: $@" >&2 +} + +command="" +options="" +resource="" +target=$(crm_node -n 2>/dev/null) + +LONGOPTS_COMMON="help,version,verbose,quiet" +LONGOPTS_COMMANDS="query,delete" +LONGOPTS_OTHER="resource:,node:" +LONGOPTS_COMPAT="delete-attr,get-value,resource-id:,uname:,lifetime:,attr-value:,attr-id:" + +LONGOPTS="$LONGOPTS_COMMON,$LONGOPTS_COMMANDS,$LONGOPTS_OTHER,$LONGOPTS_COMPAT" + +TEMP=$(getopt -o qDGQVN:U:v:i:l:r: --long $LONGOPTS -n crm_failcount -- "$@") +if [ $? != 0 ]; then + exit_usage +fi +eval set -- "$TEMP" # Quotes around $TEMP are essential + while true ; do - case "$1" in - -N|--node) target="$2"; shift; shift;; - -U|--uname) target="$2"; shift; shift;; - -v|--attr-value|-i|--attr-id) options="$options $1 $2"; shift; shift;; - -Q|--quiet|-D|--delete-attr|-G|--get-value|-V) options="$options $1"; shift;; - -r|--resource-id) options="$options -n fail-count-$2"; resource="$2"; shift; shift;; - --version) crm_attribute --version; exit 0;; - --help) - show_help - exit 0;; - --) shift ; break ;; - *) echo "Unknown option: $1. See --help for details." exit 1;; - esac + case "$1" in + --help) + echo "$HELP_TEXT" + exit 0 + ;; + --version) + crm_attribute --version + exit $? + ;; + -Q|--quiet|-V|--verbose) + options="$options $1" + shift + ;; + -G|--query|--get-value) + command="--query" + shift + ;; + -D|--delete|--delete-attr) + command="--delete" + shift + ;; + -r|--resource|--resource-id) + resource="$2" + shift 2 + ;; + -N|--node|-U|--uname) + target="$2" + shift 2 + ;; + -v|--attr-value) + if [ "$2" = "0" ]; then + command="--delete" + else + warn "ignoring deprecated option '$1' with nonzero value" + fi + shift 2 + ;; + -i|--attr-id|-l|--lifetime) + warn "ignoring deprecated option '$1'" + shift 2 + ;; + --) + shift + break + ;; + *) + exit_usage "unknown option '$1'" + ;; + esac done -if [ "x$resource" = x ]; then - echo "You must supply a resource name to check. See 'crm_failcount --help' for details" - exit 1 -fi +[ -n "$command" ] || exit_usage "must specify a command" +[ -n "$resource" ] || exit_usage "resource name required" +[ -n "$target" ] || exit_usage "node name required" -crm_attribute -N $target $options -t status -d 0 +if [ "$command" = "--query" ]; then + crm_attribute $options $command -N "$target" -n "fail-count-$resource" -t status -d 0 +else + crm_resource --cleanup $options -N "$target" -r "$resource" +fi diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 80872122a7..776aea8b31 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,4218 +1,4221 @@ /* * Copyright (C) 2004-2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include <../lib/pengine/unpack.h> #include <../pengine/pengine.h> #include extern void cleanup_alloc_calculations(pe_working_set_t * data_set); void clean_up(int rc); void crm_diff_update(const char *event, xmlNode * msg); gboolean mon_refresh_display(gpointer user_data); int cib_connect(gboolean full); void mon_st_callback(stonith_t * st, stonith_event_t * e); static char *get_node_display_name(node_t *node); /* * Definitions indicating which items to print */ #define mon_show_times (0x0001U) #define mon_show_stack (0x0002U) #define mon_show_dc (0x0004U) #define mon_show_count (0x0008U) #define mon_show_nodes (0x0010U) #define mon_show_resources (0x0020U) #define mon_show_attributes (0x0040U) #define mon_show_failcounts (0x0080U) #define mon_show_operations (0x0100U) #define mon_show_tickets (0x0200U) #define mon_show_bans (0x0400U) #define mon_show_headers (mon_show_times | mon_show_stack | mon_show_dc | mon_show_count) #define mon_show_default (mon_show_headers | mon_show_nodes | mon_show_resources) #define mon_show_all (mon_show_default | mon_show_attributes | mon_show_failcounts \ | mon_show_operations | mon_show_tickets | mon_show_bans) unsigned int show = mon_show_default; /* * Definitions indicating how to output */ enum mon_output_format_e { mon_output_none, mon_output_monitor, mon_output_plain, mon_output_console, mon_output_xml, mon_output_html, mon_output_cgi } output_format = mon_output_console; char *output_filename = NULL; /* if sending output to a file, its name */ /* other globals */ char *xml_file = NULL; char *pid_file = NULL; char *snmp_target = NULL; char *snmp_community = NULL; gboolean group_by_node = FALSE; gboolean inactive_resources = FALSE; int reconnect_msec = 5000; gboolean daemonize = FALSE; GMainLoop *mainloop = NULL; guint timer_id = 0; GList *attr_list = NULL; const char *crm_mail_host = NULL; const char *crm_mail_prefix = NULL; const char *crm_mail_from = NULL; const char *crm_mail_to = NULL; const char *external_agent = NULL; const char *external_recipient = NULL; cib_t *cib = NULL; stonith_t *st = NULL; xmlNode *current_cib = NULL; gboolean one_shot = FALSE; gboolean has_warnings = FALSE; gboolean print_timing = FALSE; gboolean watch_fencing = FALSE; gboolean print_brief = FALSE; gboolean print_pending = TRUE; gboolean print_clone_detail = FALSE; /* FIXME allow, detect, and correctly interpret glob pattern or regex? */ const char *print_neg_location_prefix = ""; /* Never display node attributes whose name starts with one of these prefixes */ -#define FILTER_STR { "shutdown", "terminate", "standby", "fail-count", \ - "last-failure", "probe_complete", "#", NULL } +#define FILTER_STR { CRM_FAIL_COUNT_PREFIX, CRM_LAST_FAILURE_PREFIX, \ + "shutdown", "terminate", "standby", "probe_complete", \ + "#", NULL } long last_refresh = 0; crm_trigger_t *refresh_trigger = NULL; /* * 1.3.6.1.4.1.32723 has been assigned to the project by IANA * http://www.iana.org/assignments/enterprise-numbers */ #define PACEMAKER_PREFIX "1.3.6.1.4.1.32723" #define PACEMAKER_TRAP_PREFIX PACEMAKER_PREFIX ".1" #define snmp_crm_trap_oid PACEMAKER_TRAP_PREFIX #define snmp_crm_oid_node PACEMAKER_TRAP_PREFIX ".1" #define snmp_crm_oid_rsc PACEMAKER_TRAP_PREFIX ".2" #define snmp_crm_oid_task PACEMAKER_TRAP_PREFIX ".3" #define snmp_crm_oid_desc PACEMAKER_TRAP_PREFIX ".4" #define snmp_crm_oid_status PACEMAKER_TRAP_PREFIX ".5" #define snmp_crm_oid_rc PACEMAKER_TRAP_PREFIX ".6" #define snmp_crm_oid_trc PACEMAKER_TRAP_PREFIX ".7" /* Define exit codes for monitoring-compatible output */ #define MON_STATUS_OK (0) #define MON_STATUS_WARN (1) /* Convenience macro for prettifying output (e.g. "node" vs "nodes") */ #define s_if_plural(i) (((i) == 1)? "" : "s") #if CURSES_ENABLED # define print_dot() if (output_format == mon_output_console) { \ printw("."); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, "."); \ } #else # define print_dot() fprintf(stdout, "."); #endif #if CURSES_ENABLED # define print_as(fmt, args...) if (output_format == mon_output_console) { \ printw(fmt, ##args); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, fmt, ##args); \ } #else # define print_as(fmt, args...) fprintf(stdout, fmt, ##args); #endif static void blank_screen(void) { #if CURSES_ENABLED int lpc = 0; for (lpc = 0; lpc < LINES; lpc++) { move(lpc, 0); clrtoeol(); } move(0, 0); refresh(); #endif } static gboolean mon_timer_popped(gpointer data) { int rc = pcmk_ok; #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif if (timer_id > 0) { g_source_remove(timer_id); } print_as("Reconnecting...\n"); rc = cib_connect(TRUE); if (rc != pcmk_ok) { timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); } return FALSE; } static void mon_cib_connection_destroy(gpointer user_data) { print_as("Connection to the CIB terminated\n"); if (cib) { cib->cmds->signoff(cib); timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); } return; } /* * Mainloop signal handler. */ static void mon_shutdown(int nsig) { clean_up(EX_OK); } #if ON_DARWIN # define sighandler_t sig_t #endif #if CURSES_ENABLED # ifndef HAVE_SIGHANDLER_T typedef void (*sighandler_t) (int); # endif static sighandler_t ncurses_winch_handler; static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); mainloop_set_trigger(refresh_trigger); } not_done--; } #endif int cib_connect(gboolean full) { int rc = pcmk_ok; static gboolean need_pass = TRUE; CRM_CHECK(cib != NULL, return -EINVAL); if (getenv("CIB_passwd") != NULL) { need_pass = FALSE; } if (watch_fencing && st == NULL) { st = stonith_api_new(); } if (watch_fencing && st->state == stonith_disconnected) { crm_trace("Connecting to stonith"); rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback); } } if (cib->state != cib_connected_query && cib->state != cib_connected_command) { crm_trace("Connecting to the CIB"); if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) { need_pass = FALSE; print_as("Password:"); } rc = cib->cmds->signon(cib, crm_system_name, cib_query); if (rc != pcmk_ok) { return rc; } rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); if (rc == pcmk_ok) { mon_refresh_display(NULL); } if (rc == pcmk_ok && full) { if (rc == pcmk_ok) { rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy); if (rc == -EPROTONOSUPPORT) { print_as ("Notification setup not supported, won't be able to reconnect after failure"); if (output_format == mon_output_console) { sleep(2); } rc = pcmk_ok; } } if (rc == pcmk_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if (rc != pcmk_ok) { print_as("Notification setup failed, could not monitor CIB actions"); if (output_format == mon_output_console) { sleep(2); } clean_up(-rc); } } } return rc; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"quiet", 0, 0, 'Q', "\tDisplay only essential output" }, {"-spacer-", 1, 0, '-', "\nModes:"}, {"as-html", 1, 0, 'h', "\tWrite cluster status to the named html file"}, {"as-xml", 0, 0, 'X', "\t\tWrite cluster status as xml to stdout. This will enable one-shot mode."}, {"web-cgi", 0, 0, 'w', "\t\tWeb mode with output suitable for cgi"}, {"simple-status", 0, 0, 's', "\tDisplay the cluster status once as a simple one line output (suitable for nagios)"}, {"snmp-traps", 1, 0, 'S', "\tSend SNMP traps to this station", !ENABLE_SNMP}, {"snmp-community", 1, 0, 'C', "Specify community for SNMP traps(default is NULL)", !ENABLE_SNMP}, {"mail-to", 1, 0, 'T', "\tSend Mail alerts to this user. See also --mail-from, --mail-host, --mail-prefix", !ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', "\nDisplay Options:"}, {"group-by-node", 0, 0, 'n', "\tGroup resources by node" }, {"inactive", 0, 0, 'r', "\t\tDisplay inactive resources" }, {"failcounts", 0, 0, 'f', "\tDisplay resource fail counts"}, {"operations", 0, 0, 'o', "\tDisplay resource operation history" }, {"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" }, {"tickets", 0, 0, 'c', "\t\tDisplay cluster tickets"}, {"watch-fencing", 0, 0, 'W', "\tListen for fencing events. For use with --external-agent, --mail-to and/or --snmp-traps where supported"}, {"neg-locations", 2, 0, 'L', "Display negative location constraints [optionally filtered by id prefix]"}, {"show-node-attributes", 0, 0, 'A', "Display node attributes" }, {"hide-headers", 0, 0, 'D', "\tHide all headers" }, {"show-detail", 0, 0, 'R', "\tShow more details (node IDs, individual clone instances)" }, {"brief", 0, 0, 'b', "\t\tBrief output" }, {"pending", 0, 0, 'j', "\t\tDisplay pending state if 'record-pending' is enabled", pcmk_option_hidden}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"interval", 1, 0, 'i', "\tUpdate frequency in seconds" }, {"one-shot", 0, 0, '1', "\t\tDisplay the cluster status once on the console and exit"}, {"disable-ncurses",0, 0, 'N', "\tDisable the use of ncurses", !CURSES_ENABLED}, {"daemonize", 0, 0, 'd', "\tRun in the background as a daemon"}, {"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"}, {"mail-from", 1, 0, 'F', "\tMail alerts should come from the named user", !ENABLE_ESMTP}, {"mail-host", 1, 0, 'H', "\tMail alerts should be sent via the named host", !ENABLE_ESMTP}, {"mail-prefix", 1, 0, 'P', "Subjects for mail alerts should start with this string", !ENABLE_ESMTP}, {"external-agent", 1, 0, 'E', "A program to run when resource operations take place."}, {"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."}, {"xml-file", 1, 0, 'x', NULL, pcmk_option_hidden}, {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "Display the cluster status on the console with updates as they occur:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Display the cluster status on the console just once then exit:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon -1", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Display your cluster status, group resources by node, and include inactive resources in the list:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon --group-by-node --inactive", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --as-html /path/to/docroot/filename.html", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon and export the current cluster status as xml to stdout, then exit.:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon --as-xml", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it send email alerts:", pcmk_option_paragraph|!ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com", pcmk_option_example|!ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it send SNMP alerts:", pcmk_option_paragraph|!ENABLE_SNMP}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --snmp-traps snmptrapd.example.com", pcmk_option_example|!ENABLE_SNMP}, {NULL, 0, 0, 0} }; /* *INDENT-ON* */ #if CURSES_ENABLED static const char * get_option_desc(char c) { int lpc; for (lpc = 0; long_options[lpc].name != NULL; lpc++) { if (long_options[lpc].name[0] == '-') continue; if (long_options[lpc].val == c) { const char * tab = NULL; tab = strrchr(long_options[lpc].desc, '\t'); return tab ? ++tab : long_options[lpc].desc; } } return NULL; } #define print_option_help(option, condition) \ print_as("%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option)); static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer unused) { int c; gboolean config_mode = FALSE; while (1) { /* Get user input */ c = getchar(); switch (c) { case 'c': show ^= mon_show_tickets; break; case 'f': show ^= mon_show_failcounts; break; case 'n': group_by_node = ! group_by_node; break; case 'o': show ^= mon_show_operations; if ((show & mon_show_operations) == 0) { print_timing = 0; } break; case 'r': inactive_resources = ! inactive_resources; break; case 'R': print_clone_detail = ! print_clone_detail; break; case 't': print_timing = ! print_timing; if (print_timing) { show |= mon_show_operations; } break; case 'A': show ^= mon_show_attributes; break; case 'L': show ^= mon_show_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (show & mon_show_headers) { show &= ~mon_show_headers; } else { show |= mon_show_headers; } break; case 'b': print_brief = ! print_brief; break; case 'j': print_pending = ! print_pending; break; case '?': config_mode = TRUE; break; default: goto refresh; } if (!config_mode) goto refresh; blank_screen(); print_as("Display option change mode\n"); print_as("\n"); print_option_help('c', show & mon_show_tickets); print_option_help('f', show & mon_show_failcounts); print_option_help('n', group_by_node); print_option_help('o', show & mon_show_operations); print_option_help('r', inactive_resources); print_option_help('t', print_timing); print_option_help('A', show & mon_show_attributes); print_option_help('L', show & mon_show_bans); print_option_help('D', (show & mon_show_headers) == 0); print_option_help('R', print_clone_detail); print_option_help('b', print_brief); print_option_help('j', print_pending); print_as("\n"); print_as("Toggle fields via field letter, type any other key to return"); } refresh: mon_refresh_display(NULL); return TRUE; } #endif int main(int argc, char **argv) { int flag; int argerr = 0; int exit_code = 0; int option_index = 0; pid_file = strdup("/tmp/ClusterMon.pid"); crm_log_cli_init("crm_mon"); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); #if !defined (ON_DARWIN) && !defined (ON_BSD) /* prevent zombies */ signal(SIGCLD, SIG_IGN); #endif if (strcmp(crm_system_name, "crm_mon.cgi") == 0) { output_format = mon_output_cgi; one_shot = TRUE; } while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'Q': show &= ~mon_show_times; break; case 'i': reconnect_msec = crm_get_msec(optarg); break; case 'n': group_by_node = TRUE; break; case 'r': inactive_resources = TRUE; break; case 'W': watch_fencing = TRUE; break; case 'd': daemonize = TRUE; break; case 't': print_timing = TRUE; show |= mon_show_operations; break; case 'o': show |= mon_show_operations; break; case 'f': show |= mon_show_failcounts; break; case 'A': show |= mon_show_attributes; break; case 'L': show |= mon_show_bans; print_neg_location_prefix = optarg? optarg : ""; break; case 'D': show &= ~mon_show_headers; break; case 'b': print_brief = TRUE; break; case 'j': print_pending = TRUE; break; case 'R': print_clone_detail = TRUE; break; case 'c': show |= mon_show_tickets; break; case 'p': free(pid_file); if(optarg == NULL) { return crm_help(flag, EX_USAGE); } pid_file = strdup(optarg); break; case 'x': if(optarg == NULL) { return crm_help(flag, EX_USAGE); } xml_file = strdup(optarg); one_shot = TRUE; break; case 'h': if(optarg == NULL) { return crm_help(flag, EX_USAGE); } output_format = mon_output_html; output_filename = strdup(optarg); umask(S_IWGRP | S_IWOTH); break; case 'X': output_format = mon_output_xml; one_shot = TRUE; break; case 'w': output_format = mon_output_cgi; one_shot = TRUE; break; case 's': output_format = mon_output_monitor; one_shot = TRUE; break; case 'S': snmp_target = optarg; break; case 'T': crm_mail_to = optarg; break; case 'F': crm_mail_from = optarg; break; case 'H': crm_mail_host = optarg; break; case 'P': crm_mail_prefix = optarg; break; case 'E': external_agent = optarg; break; case 'e': external_recipient = optarg; break; case '1': one_shot = TRUE; break; case 'N': if (output_format == mon_output_console) { output_format = mon_output_plain; } break; case 'C': snmp_community = optarg; break; case '$': case '?': return crm_help(flag, EX_OK); break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { return crm_help('?', EX_USAGE); } /* XML output always prints everything */ if (output_format == mon_output_xml) { show = mon_show_all; print_timing = TRUE; } if (one_shot) { if (output_format == mon_output_console) { output_format = mon_output_plain; } } else if (daemonize) { if ((output_format == mon_output_console) || (output_format == mon_output_plain)) { output_format = mon_output_none; } crm_enable_stderr(FALSE); if ((output_format != mon_output_html) && (output_format != mon_output_xml) && !snmp_target && !crm_mail_to && !external_agent) { printf ("Looks like you forgot to specify one or more of: --as-html, --as-xml, --mail-to, --snmp-target, --external-agent\n"); return crm_help('?', EX_USAGE); } crm_make_daemon(crm_system_name, TRUE, pid_file); } else if (output_format == mon_output_console) { #if CURSES_ENABLED initscr(); cbreak(); noecho(); crm_enable_stderr(FALSE); #else one_shot = TRUE; output_format = mon_output_plain; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } crm_info("Starting %s", crm_system_name); if (xml_file != NULL) { current_cib = filename2xml(xml_file); mon_refresh_display(NULL); return exit_code; } if (current_cib == NULL) { cib = cib_new(); do { if (!one_shot) { print_as("Attempting connection to the cluster...\n"); } exit_code = cib_connect(!one_shot); if (one_shot) { break; } else if (exit_code != pcmk_ok) { sleep(reconnect_msec / 1000); #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif } } while (exit_code == -ENOTCONN); if (exit_code != pcmk_ok) { if (output_format == mon_output_monitor) { printf("CLUSTER WARN: Connection to cluster failed: %s\n", pcmk_strerror(exit_code)); clean_up(MON_STATUS_WARN); } else { print_as("\nConnection to cluster failed: %s\n", pcmk_strerror(exit_code)); } if (output_format == mon_output_console) { sleep(2); } clean_up(-exit_code); } } if (one_shot) { return exit_code; } mainloop = g_main_new(FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (output_format == mon_output_console) { ncurses_winch_handler = signal(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; g_io_add_watch(g_io_channel_unix_new(STDIN_FILENO), G_IO_IN, detect_user_input, NULL); } #endif refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_run(mainloop); g_main_destroy(mainloop); crm_info("Exiting %s", crm_system_name); clean_up(0); return 0; /* never reached */ } #define mon_warn(fmt...) do { \ if (!has_warnings) { \ print_as("CLUSTER WARN:"); \ } else { \ print_as(","); \ } \ print_as(fmt); \ has_warnings = TRUE; \ } while(0) static int count_resources(pe_working_set_t * data_set, resource_t * rsc) { int count = 0; GListPtr gIter = NULL; if (rsc == NULL) { gIter = data_set->resources; } else if (rsc->children) { gIter = rsc->children; } else { return is_not_set(rsc->flags, pe_rsc_orphan); } for (; gIter != NULL; gIter = gIter->next) { count += count_resources(data_set, gIter->data); } return count; } /*! * \internal * \brief Print one-line status suitable for use with monitoring software * * \param[in] data_set Working set of CIB state * * \note This function's output (and the return code when the program exits) * should conform to https://www.monitoring-plugins.org/doc/guidelines.html */ static void print_simple_status(pe_working_set_t * data_set) { GListPtr gIter = NULL; int nodes_online = 0; int nodes_standby = 0; int nodes_maintenance = 0; if (data_set->dc_node == NULL) { mon_warn(" No DC"); } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node->details->standby && node->details->online) { nodes_standby++; } else if (node->details->maintenance && node->details->online) { nodes_maintenance++; } else if (node->details->online) { nodes_online++; } else { mon_warn(" offline node: %s", node->details->uname); } } if (!has_warnings) { int nresources = count_resources(data_set, NULL); print_as("CLUSTER OK: %d node%s online", nodes_online, s_if_plural(nodes_online)); if (nodes_standby > 0) { print_as(", %d standby node%s", nodes_standby, s_if_plural(nodes_standby)); } if (nodes_maintenance > 0) { print_as(", %d maintenance node%s", nodes_maintenance, s_if_plural(nodes_maintenance)); } print_as(", %d resource%s configured", nresources, s_if_plural(nresources)); } print_as("\n"); } /*! * \internal * \brief Print a [name]=[value][units] pair, optionally using time string * * \param[in] stream File stream to display output to * \param[in] name Name to display * \param[in] value Value to display (or NULL to convert time instead) * \param[in] units Units to display (or NULL for no units) * \param[in] epoch_time Epoch time to convert if value is NULL */ static void print_nvpair(FILE *stream, const char *name, const char *value, const char *units, time_t epoch_time) { /* print name= */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as(" %s=", name); break; case mon_output_html: case mon_output_cgi: case mon_output_xml: fprintf(stream, " %s=", name); break; default: break; } /* If we have a value (and optionally units), print it */ if (value) { switch (output_format) { case mon_output_plain: case mon_output_console: print_as("%s%s", value, (units? units : "")); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "%s%s", value, (units? units : "")); break; case mon_output_xml: fprintf(stream, "\"%s%s\"", value, (units? units : "")); break; default: break; } /* Otherwise print user-friendly time string */ } else { char *date_str, *c; date_str = asctime(localtime(&epoch_time)); for (c = date_str; c != '\0'; ++c) { if (*c == '\n') { *c = '\0'; break; } } switch (output_format) { case mon_output_plain: case mon_output_console: print_as("'%s'", date_str); break; case mon_output_html: case mon_output_cgi: case mon_output_xml: fprintf(stream, "\"%s\"", date_str); break; default: break; } } } /*! * \internal * \brief Print whatever is needed to start a node section * * \param[in] stream File stream to display output to * \param[in] node Node to print */ static void print_node_start(FILE *stream, node_t *node) { char *node_name; switch (output_format) { case mon_output_plain: case mon_output_console: node_name = get_node_display_name(node); print_as("* Node %s:\n", node_name); free(node_name); break; case mon_output_html: case mon_output_cgi: node_name = get_node_display_name(node); fprintf(stream, "

Node: %s

\n
    \n", node_name); free(node_name); break; case mon_output_xml: fprintf(stream, " \n", node->details->uname); break; default: break; } } /*! * \internal * \brief Print whatever is needed to end a node section * * \param[in] stream File stream to display output to */ static void print_node_end(FILE *stream) { switch (output_format) { case mon_output_html: case mon_output_cgi: fprintf(stream, "
\n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } } /*! * \internal * \brief Print resources section heading appropriate to options * * \param[in] stream File stream to display output to */ static void print_resources_heading(FILE *stream) { const char *heading; if (group_by_node) { /* Active resources have already been printed by node */ heading = (inactive_resources? "Inactive resources" : NULL); } else if (inactive_resources) { heading = "Full list of resources"; } else { heading = "Active resources"; } /* Print section heading */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\n%s:\n\n", heading); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
\n

%s

\n", heading); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } } /*! * \internal * \brief Print whatever resource section closing is appropriate * * \param[in] stream File stream to display output to */ static void print_resources_closing(FILE *stream, gboolean printed_heading) { const char *heading; /* What type of resources we did or did not display */ if (group_by_node) { heading = "inactive "; } else if (inactive_resources) { heading = ""; } else { heading = "active "; } switch (output_format) { case mon_output_plain: case mon_output_console: if (!printed_heading) { print_as("\nNo %sresources\n\n", heading); } break; case mon_output_html: case mon_output_cgi: if (!printed_heading) { fprintf(stream, "
\n

No %sresources

\n", heading); } break; case mon_output_xml: fprintf(stream, " %s\n", (printed_heading? "
" : "")); break; default: break; } } /*! * \internal * \brief Print whatever resource section(s) are appropriate * * \param[in] stream File stream to display output to * \param[in] data_set Cluster state to display * \param[in] print_opts Bitmask of pe_print_options */ static void print_resources(FILE *stream, pe_working_set_t *data_set, int print_opts) { GListPtr rsc_iter; const char *prefix = NULL; gboolean printed_heading = FALSE; gboolean brief_output = print_brief; /* If we already showed active resources by node, and * we're not showing inactive resources, we have nothing to do */ if (group_by_node && !inactive_resources) { return; } /* XML uses an indent, and ignores brief option for resources */ if (output_format == mon_output_xml) { prefix = " "; brief_output = FALSE; } /* If we haven't already printed resources grouped by node, * and brief output was requested, print resource summary */ if (brief_output && !group_by_node) { print_resources_heading(stream); printed_heading = TRUE; print_rscs_brief(data_set->resources, NULL, print_opts, stream, inactive_resources); } /* For each resource, display it if appropriate */ for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { resource_t *rsc = (resource_t *) rsc_iter->data; /* Complex resources may have some sub-resources active and some inactive */ gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); /* Skip inactive orphans (deleted but still in CIB) */ if (is_set(rsc->flags, pe_rsc_orphan) && !is_active) { continue; /* Skip active resources if we already displayed them by node */ } else if (group_by_node) { if (is_active) { continue; } /* Skip primitives already counted in a brief summary */ } else if (brief_output && (rsc->variant == pe_native)) { continue; /* Skip resources that aren't at least partially active, * unless we're displaying inactive resources */ } else if (!partially_active && !inactive_resources) { continue; } /* Print this resource */ if (printed_heading == FALSE) { print_resources_heading(stream); printed_heading = TRUE; } rsc->fns->print(rsc, prefix, print_opts, stream); } print_resources_closing(stream, printed_heading); } /*! * \internal * \brief Print heading for resource history * * \param[in] stream File stream to display output to * \param[in] data_set Current state of CIB * \param[in] node Node that ran this resource * \param[in] rsc Resource to print * \param[in] rsc_id ID of resource to print * \param[in] all Whether to print every resource or just failed ones */ static void print_rsc_history_start(FILE *stream, pe_working_set_t *data_set, node_t *node, resource_t *rsc, const char *rsc_id, gboolean all) { time_t last_failure = 0; int failcount = rsc? get_failcount_full(node, rsc, &last_failure, FALSE, NULL, data_set) : 0; if (!all && !failcount && (last_failure <= 0)) { return; } /* Print resource ID */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as(" %s:", rsc_id); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
  • %s:", rsc_id); break; case mon_output_xml: fprintf(stream, " 0)) { /* Print migration threshold */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as(" migration-threshold=%d", rsc->migration_threshold); break; case mon_output_html: case mon_output_cgi: fprintf(stream, " migration-threshold=%d", rsc->migration_threshold); break; case mon_output_xml: fprintf(stream, " orphan=\"false\" migration-threshold=\"%d\"", rsc->migration_threshold); break; default: break; } /* Print fail count if any */ if (failcount > 0) { switch (output_format) { case mon_output_plain: case mon_output_console: - print_as(" fail-count=%d", failcount); + print_as(" " CRM_FAIL_COUNT_PREFIX "=%d", failcount); break; case mon_output_html: case mon_output_cgi: - fprintf(stream, " fail-count=%d", failcount); + fprintf(stream, " " CRM_FAIL_COUNT_PREFIX "=%d", failcount); break; case mon_output_xml: - fprintf(stream, " fail-count=\"%d\"", failcount); + fprintf(stream, " " CRM_FAIL_COUNT_PREFIX "=\"%d\"", + failcount); break; default: break; } } /* Print last failure time if any */ if (last_failure > 0) { - print_nvpair(stream, "last-failure", NULL, NULL, last_failure); + print_nvpair(stream, CRM_LAST_FAILURE_PREFIX, NULL, NULL, + last_failure); } } /* End the heading */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "\n
      \n"); break; case mon_output_xml: fprintf(stream, ">\n"); break; default: break; } } /*! * \internal * \brief Print closing for resource history * * \param[in] stream File stream to display output to */ static void print_rsc_history_end(FILE *stream) { switch (output_format) { case mon_output_html: case mon_output_cgi: fprintf(stream, "
    \n
  • \n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } } /*! * \internal * \brief Print operation history * * \param[in] stream File stream to display output to * \param[in] data_set Current state of CIB * \param[in] node Node this operation is for * \param[in] xml_op Root of XML tree describing this operation * \param[in] task Task parsed from this operation's XML * \param[in] interval Interval parsed from this operation's XML * \param[in] rc Return code parsed from this operation's XML */ static void print_op_history(FILE *stream, pe_working_set_t *data_set, node_t *node, xmlNode *xml_op, const char *task, const char *interval, int rc) { const char *value = NULL; const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); /* Begin the operation description */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as(" + (%s) %s:", call, task); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
  • (%s) %s:", call, task); break; case mon_output_xml: fprintf(stream, " 0) { print_nvpair(stream, attr, NULL, NULL, int_value); } } attr = XML_RSC_OP_LAST_RUN; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); if (int_value > 0) { print_nvpair(stream, attr, NULL, NULL, int_value); } } attr = XML_RSC_OP_T_EXEC; value = crm_element_value(xml_op, attr); if (value) { print_nvpair(stream, attr, value, "ms", 0); } attr = XML_RSC_OP_T_QUEUE; value = crm_element_value(xml_op, attr); if (value) { print_nvpair(stream, attr, value, "ms", 0); } } /* End the operation description */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as(" rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc)); break; case mon_output_html: case mon_output_cgi: fprintf(stream, " rc=%d (%s)
  • \n", rc, services_ocf_exitcode_str(rc)); break; case mon_output_xml: fprintf(stream, " rc=\"%d\" rc_text=\"%s\" />\n", rc, services_ocf_exitcode_str(rc)); break; default: break; } } /*! * \internal * \brief Print resource operation/failure history * * \param[in] stream File stream to display output to * \param[in] data_set Current state of CIB * \param[in] node Node that ran this resource * \param[in] rsc_entry Root of XML tree describing resource status * \param[in] operations Whether to print operations or just failcounts */ static void print_rsc_history(FILE *stream, pe_working_set_t *data_set, node_t *node, xmlNode *rsc_entry, gboolean operations) { GListPtr gIter = NULL; GListPtr op_list = NULL; gboolean printed = FALSE; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); xmlNode *rsc_op = NULL; /* If we're not showing operations, just print the resource failure summary */ if (operations == FALSE) { print_rsc_history_start(stream, data_set, node, rsc, rsc_id, FALSE); print_rsc_history_end(stream); return; } /* Create a list of this resource's operations */ for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_append(op_list, rsc_op); } } op_list = g_list_sort(op_list, sort_op_by_callid); /* Print each operation */ for (gIter = op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); int rc = crm_parse_int(op_rc, "0"); /* Display 0-interval monitors as "probe" */ if (safe_str_eq(task, CRMD_ACTION_STATUS) && safe_str_eq(interval, "0")) { task = "probe"; } /* Ignore notifies and some probes */ if (safe_str_eq(task, CRMD_ACTION_NOTIFY) || (safe_str_eq(task, "probe") && (rc == 7))) { continue; } /* If this is the first printed operation, print heading for resource */ if (printed == FALSE) { printed = TRUE; print_rsc_history_start(stream, data_set, node, rsc, rsc_id, TRUE); } /* Print the operation */ print_op_history(stream, data_set, node, xml_op, task, interval, rc); } /* Free the list we created (no need to free the individual items) */ g_list_free(op_list); /* If we printed anything, close the resource */ if (printed) { print_rsc_history_end(stream); } } /*! * \internal * \brief Print node operation/failure history * * \param[in] stream File stream to display output to * \param[in] data_set Current state of CIB * \param[in] node_state Root of XML tree describing node status * \param[in] operations Whether to print operations or just failcounts */ static void print_node_history(FILE *stream, pe_working_set_t *data_set, xmlNode *node_state, gboolean operations) { node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; if (node && node->details && node->details->online) { print_node_start(stream, node); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); /* Print history of each of the node's resources */ for (rsc_entry = __xml_first_child(lrm_rsc); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { print_rsc_history(stream, data_set, node, rsc_entry, operations); } } print_node_end(stream); } } /*! * \internal * \brief Print extended information about an attribute if appropriate * * \param[in] data_set Working set of CIB state * * \return TRUE if extended information was printed, FALSE otherwise * \note Currently, extended information is only supported for ping/pingd * resources, for which a message will be printed if connectivity is lost * or degraded. */ static gboolean print_attr_msg(FILE *stream, node_t * node, GListPtr rsc_list, const char *attrname, const char *attrvalue) { GListPtr gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, "type"); if (rsc->children != NULL) { if (print_attr_msg(stream, node, rsc->children, attrname, attrvalue)) { return TRUE; } } if (safe_str_eq(type, "ping") || safe_str_eq(type, "pingd")) { const char *name = g_hash_table_lookup(rsc->parameters, "name"); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (safe_str_eq(name, attrname)) { int host_list_num = 0; int expected_score = 0; int value = crm_parse_int(attrvalue, "0"); const char *hosts = g_hash_table_lookup(rsc->parameters, "host_list"); const char *multiplier = g_hash_table_lookup(rsc->parameters, "multiplier"); if(hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } /* pingd multiplier is the same as the default value. */ expected_score = host_list_num * crm_parse_int(multiplier, "1"); switch (output_format) { case mon_output_plain: case mon_output_console: if (value <= 0) { print_as("\t: Connectivity is lost"); } else if (value < expected_score) { print_as("\t: Connectivity is degraded (Expected=%d)", expected_score); } break; case mon_output_html: case mon_output_cgi: if (value <= 0) { fprintf(stream, " (connectivity is lost)"); } else if (value < expected_score) { fprintf(stream, " (connectivity is degraded -- expected %d)", expected_score); } break; case mon_output_xml: fprintf(stream, " expected=\"%d\"", expected_score); break; default: break; } return TRUE; } } } return FALSE; } static int compare_attribute(gconstpointer a, gconstpointer b) { int rc; rc = strcmp((const char *)a, (const char *)b); return rc; } static void create_attr_list(gpointer name, gpointer value, gpointer data) { int i; const char *filt_str[] = FILTER_STR; CRM_CHECK(name != NULL, return); /* filtering automatic attributes */ for (i = 0; filt_str[i] != NULL; i++) { if (g_str_has_prefix(name, filt_str[i])) { return; } } attr_list = g_list_insert_sorted(attr_list, name, compare_attribute); } /* structure for passing multiple user data to g_list_foreach() */ struct mon_attr_data { FILE *stream; node_t *node; }; static void print_node_attribute(gpointer name, gpointer user_data) { const char *value = NULL; struct mon_attr_data *data = (struct mon_attr_data *) user_data; value = g_hash_table_lookup(data->node->details->attrs, name); /* Print attribute name and value */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as(" + %-32s\t: %-10s", (char *)name, value); break; case mon_output_html: case mon_output_cgi: fprintf(data->stream, "
  • %s: %s", (char *)name, value); break; case mon_output_xml: fprintf(data->stream, " stream, data->node, data->node->details->running_rsc, name, value); /* Close out the attribute */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\n"); break; case mon_output_html: case mon_output_cgi: fprintf(data->stream, "
  • \n"); break; case mon_output_xml: fprintf(data->stream, " />\n"); break; default: break; } } static void print_node_summary(FILE *stream, pe_working_set_t * data_set, gboolean operations) { xmlNode *node_state = NULL; xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); /* Print heading */ switch (output_format) { case mon_output_plain: case mon_output_console: if (operations) { print_as("\nOperations:\n"); } else { print_as("\nMigration Summary:\n"); } break; case mon_output_html: case mon_output_cgi: if (operations) { fprintf(stream, "
    \n

    Operations

    \n"); } else { fprintf(stream, "
    \n

    Migration Summary

    \n"); } break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } /* Print each node in the CIB status */ for (node_state = __xml_first_child(cib_status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { print_node_history(stream, data_set, node_state, operations); } } /* Close section */ switch (output_format) { case mon_output_xml: fprintf(stream, " \n"); break; default: break; } } static void print_ticket(gpointer name, gpointer value, gpointer data) { ticket_t *ticket = (ticket_t *) value; FILE *stream = (FILE *) data; switch (output_format) { case mon_output_plain: case mon_output_console: print_as("* %s:\t%s%s", ticket->id, (ticket->granted? "granted" : "revoked"), (ticket->standby? " [standby]" : "")); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
  • %s: %s%s", ticket->id, (ticket->granted? "granted" : "revoked"), (ticket->standby? " [standby]" : "")); break; case mon_output_xml: fprintf(stream, " id, (ticket->granted? "granted" : "revoked"), (ticket->standby? "true" : "false")); break; default: break; } if (ticket->last_granted > -1) { print_nvpair(stdout, "last-granted", NULL, NULL, ticket->last_granted); } switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
  • \n"); break; case mon_output_xml: fprintf(stream, " />\n"); break; default: break; } } static void print_cluster_tickets(FILE *stream, pe_working_set_t * data_set) { /* Print section heading */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\nTickets:\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
    \n

    Tickets

    \n
      \n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } /* Print each ticket */ g_hash_table_foreach(data_set->tickets, print_ticket, stream); /* Close section */ switch (output_format) { case mon_output_html: case mon_output_cgi: fprintf(stream, "
    \n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } } /*! * \internal * \brief Return human-friendly string representing node name * * The returned string will be in the format * uname[@hostUname] [(nodeID)] * "@hostUname" will be printed if the node is a guest node. * "(nodeID)" will be printed if the node ID is different from the node uname, * and detailed output has been requested. * * \param[in] node Node to represent * \return Newly allocated string with representation of node name * \note It is the caller's responsibility to free the result with free(). */ static char * get_node_display_name(node_t *node) { char *node_name; const char *node_host = NULL; const char *node_id = NULL; int name_len; CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL)); /* Host is displayed only if this is a guest node */ if (is_container_remote_node(node)) { if (node->details->remote_rsc->running_on) { /* running_on is a list, but guest nodes will have exactly one entry * unless they are in the process of migrating, in which case they * will have two; either way, we can use the first item in the list */ node_t *host_node = (node_t *) node->details->remote_rsc->running_on->data; if (host_node && host_node->details) { node_host = host_node->details->uname; } } if (node_host == NULL) { node_host = ""; /* so we at least get "uname@" to indicate guest */ } } /* Node ID is displayed if different from uname and detail is requested */ if (print_clone_detail && safe_str_neq(node->details->uname, node->details->id)) { node_id = node->details->id; } /* Determine name length */ name_len = strlen(node->details->uname) + 1; if (node_host) { name_len += strlen(node_host) + 1; /* "@node_host" */ } if (node_id) { name_len += strlen(node_id) + 3; /* + " (node_id)" */ } /* Allocate and populate display name */ node_name = malloc(name_len); CRM_ASSERT(node_name != NULL); strcpy(node_name, node->details->uname); if (node_host) { strcat(node_name, "@"); strcat(node_name, node_host); } if (node_id) { strcat(node_name, " ("); strcat(node_name, node_id); strcat(node_name, ")"); } return node_name; } /*! * \internal * \brief Print a negative location constraint * * \param[in] stream File stream to display output to * \param[in] node Node affected by constraint * \param[in] location Constraint to print */ static void print_ban(FILE *stream, node_t *node, rsc_to_node_t *location) { char *node_name = NULL; switch (output_format) { case mon_output_plain: case mon_output_console: node_name = get_node_display_name(node); print_as(" %s\tprevents %s from running %son %s\n", location->id, location->rsc_lh->id, ((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""), node_name); break; case mon_output_html: case mon_output_cgi: node_name = get_node_display_name(node); fprintf(stream, "
  • %s prevents %s from running %son %s
  • \n", location->id, location->rsc_lh->id, ((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""), node_name); break; case mon_output_xml: fprintf(stream, " \n", location->id, location->rsc_lh->id, node->details->uname, node->weight, ((location->role_filter == RSC_ROLE_MASTER)? "true" : "false")); break; default: break; } free(node_name); } /*! * \internal * \brief Print section for negative location constraints * * \param[in] stream File stream to display output to * \param[in] data_set Working set corresponding to CIB status to display */ static void print_neg_locations(FILE *stream, pe_working_set_t *data_set) { GListPtr gIter, gIter2; /* Print section heading */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\nNegative Location Constraints:\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
    \n

    Negative Location Constraints

    \n
      \n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } /* Print each ban */ for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { rsc_to_node_t *location = (rsc_to_node_t *) gIter->data; if (!g_str_has_prefix(location->id, print_neg_location_prefix)) continue; for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) { node_t *node = (node_t *) gIter2->data; if (node->weight < 0) { print_ban(stream, node, location); } } } /* Close section */ switch (output_format) { case mon_output_cgi: case mon_output_html: fprintf(stream, "
    \n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } } static void crm_mon_get_parameters(resource_t *rsc, pe_working_set_t * data_set) { get_rsc_attributes(rsc->parameters, rsc, NULL, data_set); crm_trace("Beekhof: unpacked params for %s (%d)", rsc->id, g_hash_table_size(rsc->parameters)); if(rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { crm_mon_get_parameters(gIter->data, data_set); } } } /*! * \internal * \brief Print node attributes section * * \param[in] stream File stream to display output to * \param[in] data_set Working set of CIB state */ static void print_node_attributes(FILE *stream, pe_working_set_t *data_set) { GListPtr gIter = NULL; /* Print section heading */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\nNode Attributes:\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
    \n

    Node Attributes

    \n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } /* Unpack all resource parameters (it would be more efficient to do this * only when needed for the first time in print_attr_msg()) */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { crm_mon_get_parameters(gIter->data, data_set); } /* Display each node's attributes */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { struct mon_attr_data data; data.stream = stream; data.node = (node_t *) gIter->data; if (data.node && data.node->details && data.node->details->online) { print_node_start(stream, data.node); g_hash_table_foreach(data.node->details->attrs, create_attr_list, NULL); g_list_foreach(attr_list, print_node_attribute, &data); g_list_free(attr_list); attr_list = NULL; print_node_end(stream); } } /* Print section footer */ switch (output_format) { case mon_output_xml: fprintf(stream, " \n"); break; default: break; } } /*! * \internal * \brief Return resource display options corresponding to command-line choices * * \return Bitmask of pe_print_options suitable for resource print functions */ static int get_resource_display_options(void) { int print_opts; /* Determine basic output format */ switch (output_format) { case mon_output_console: print_opts = pe_print_ncurses; break; case mon_output_html: case mon_output_cgi: print_opts = pe_print_html; break; case mon_output_xml: print_opts = pe_print_xml; break; default: print_opts = pe_print_printf; break; } /* Add optional display elements */ if (print_pending) { print_opts |= pe_print_pending; } if (print_clone_detail) { print_opts |= pe_print_clone_details; } if (!inactive_resources) { print_opts |= pe_print_clone_active; } if (print_brief) { print_opts |= pe_print_brief; } return print_opts; } /*! * \internal * \brief Return human-friendly string representing current time * * \return Current time as string (as by ctime() but without newline) on success * or "Could not determine current time" on error * \note The return value points to a statically allocated string which might be * overwritten by subsequent calls to any of the C library date and time functions. */ static const char * crm_now_string(void) { time_t a_time = time(NULL); char *since_epoch = ctime(&a_time); if ((a_time == (time_t) -1) || (since_epoch == NULL)) { return "Could not determine current time"; } since_epoch[strlen(since_epoch) - 1] = EOS; /* trim newline */ return (since_epoch); } /*! * \internal * \brief Print header for cluster summary if needed * * \param[in] stream File stream to display output to */ static void print_cluster_summary_header(FILE *stream) { switch (output_format) { case mon_output_html: case mon_output_cgi: fprintf(stream, "

    Cluster Summary

    \n

    \n"); break; case mon_output_xml: fprintf(stream, "

    \n"); break; default: break; } } /*! * \internal * \brief Print footer for cluster summary if needed * * \param[in] stream File stream to display output to */ static void print_cluster_summary_footer(FILE *stream) { switch (output_format) { case mon_output_cgi: case mon_output_html: fprintf(stream, "

    \n"); break; case mon_output_xml: fprintf(stream, "
    \n"); break; default: break; } } /*! * \internal * \brief Print times the display was last updated and CIB last changed * * \param[in] stream File stream to display output to * \param[in] data_set Working set of CIB state */ static void print_cluster_times(FILE *stream, pe_working_set_t *data_set) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); switch (output_format) { case mon_output_plain: case mon_output_console: print_as("Last updated: %s", crm_now_string()); print_as((user || client || origin)? "\n" : "\t\t"); print_as("Last change: %s", last_written ? last_written : ""); if (user) { print_as(" by %s", user); } if (client) { print_as(" via %s", client); } if (origin) { print_as(" on %s", origin); } print_as("\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, " Last updated: %s
    \n", crm_now_string()); fprintf(stream, " Last change: %s", last_written ? last_written : ""); if (user) { fprintf(stream, " by %s", user); } if (client) { fprintf(stream, " via %s", client); } if (origin) { fprintf(stream, " on %s", origin); } fprintf(stream, "
    \n"); break; case mon_output_xml: fprintf(stream, " \n", crm_now_string()); fprintf(stream, " \n", last_written ? last_written : "", user ? user : "", client ? client : "", origin ? origin : ""); break; default: break; } } /*! * \internal * \brief Print cluster stack * * \param[in] stream File stream to display output to * \param[in] stack_s Stack name */ static void print_cluster_stack(FILE *stream, const char *stack_s) { switch (output_format) { case mon_output_plain: case mon_output_console: print_as("Stack: %s\n", stack_s); break; case mon_output_html: case mon_output_cgi: fprintf(stream, " Stack: %s
    \n", stack_s); break; case mon_output_xml: fprintf(stream, " \n", stack_s); break; default: break; } } /*! * \internal * \brief Print current DC and its version * * \param[in] stream File stream to display output to * \param[in] data_set Working set of CIB state */ static void print_cluster_dc(FILE *stream, pe_working_set_t *data_set) { node_t *dc = data_set->dc_node; xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : NULL; const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); char *dc_name = dc? get_node_display_name(dc) : NULL; switch (output_format) { case mon_output_plain: case mon_output_console: print_as("Current DC: "); if (dc) { print_as("%s (version %s) - partition %s quorum\n", dc_name, (dc_version_s? dc_version_s : "unknown"), (crm_is_true(quorum) ? "with" : "WITHOUT")); } else { print_as("NONE\n"); } break; case mon_output_html: case mon_output_cgi: fprintf(stream, " Current DC: "); if (dc) { fprintf(stream, "%s (version %s) - partition %s quorum", dc_name, (dc_version_s? dc_version_s : "unknown"), (crm_is_true(quorum)? "with" : "WITHOUT")); } else { fprintf(stream, "NONE"); } fprintf(stream, "
    \n"); break; case mon_output_xml: fprintf(stream, " details->uname, dc->details->id, (crm_is_true(quorum) ? "true" : "false")); } else { fprintf(stream, "present=\"false\""); } fprintf(stream, " />\n"); break; default: break; } free(dc_name); } /*! * \internal * \brief Print counts of configured nodes and resources * * \param[in] stream File stream to display output to * \param[in] data_set Working set of CIB state * \param[in] stack_s Stack name */ static void print_cluster_counts(FILE *stream, pe_working_set_t *data_set, const char *stack_s) { int nnodes = g_list_length(data_set->nodes); int nresources = count_resources(data_set, NULL); xmlNode *quorum_node = get_xpath_object("//nvpair[@name='" XML_ATTR_EXPECTED_VOTES "']", data_set->input, LOG_DEBUG); const char *quorum_votes = quorum_node? crm_element_value(quorum_node, XML_NVPAIR_ATTR_VALUE) : "unknown"; switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\n%d node%s configured", nnodes, s_if_plural(nnodes)); if (stack_s && strstr(stack_s, "classic openais") != NULL) { print_as(" (%s expected votes)", quorum_votes); } print_as("\n"); print_as("%d resource%s configured", nresources, s_if_plural(nresources)); if(data_set->disabled_resources || data_set->blocked_resources) { print_as(" ("); if (data_set->disabled_resources) { print_as("%d DISABLED", data_set->disabled_resources); } if (data_set->disabled_resources && data_set->blocked_resources) { print_as(", "); } if (data_set->blocked_resources) { print_as("%d BLOCKED from starting due to failure", data_set->blocked_resources); } print_as(")"); } print_as("\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, " %d node%s configured", nnodes, s_if_plural(nnodes)); if (stack_s && strstr(stack_s, "classic openais") != NULL) { fprintf(stream, " (%s expected votes)", quorum_votes); } fprintf(stream, "
    \n"); fprintf(stream, " %d resource%s configured", nresources, s_if_plural(nresources)); if (data_set->disabled_resources || data_set->blocked_resources) { fprintf(stream, " ("); if (data_set->disabled_resources) { fprintf(stream, "%d DISABLED", data_set->disabled_resources); } if (data_set->disabled_resources && data_set->blocked_resources) { fprintf(stream, ", "); } if (data_set->blocked_resources) { fprintf(stream, "%d BLOCKED from starting due to failure", data_set->blocked_resources); } fprintf(stream, ")"); } fprintf(stream, "
    \n"); break; case mon_output_xml: fprintf(stream, " \n", g_list_length(data_set->nodes), quorum_votes); fprintf(stream, " \n", count_resources(data_set, NULL), data_set->disabled_resources, data_set->blocked_resources); break; default: break; } } /*! * \internal * \brief Print cluster-wide options * * \param[in] stream File stream to display output to * \param[in] data_set Working set of CIB state * * \note Currently this is only implemented for HTML and XML output, and * prints only a few options. If there is demand, more could be added. */ static void print_cluster_options(FILE *stream, pe_working_set_t *data_set) { switch (output_format) { case mon_output_plain: case mon_output_console: if (is_set(data_set->flags, pe_flag_maintenance_mode)) { print_as("\n *** Resource management is DISABLED ***"); print_as("\n The cluster will not attempt to start, stop or recover services"); print_as("\n"); } break; case mon_output_html: fprintf(stream, "

    \n

    Config Options

    \n"); fprintf(stream, " \n"); fprintf(stream, " \n", is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled"); fprintf(stream, " \n", is_set(data_set->flags, pe_flag_symmetric_cluster)? "" : "a"); fprintf(stream, " \n"); fprintf(stream, " \n"); fprintf(stream, "
    STONITH of failed nodes%s
    Cluster is%ssymmetric
    No Quorum Policy"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: fprintf(stream, "Freeze resources"); break; case no_quorum_stop: fprintf(stream, "Stop ALL resources"); break; case no_quorum_ignore: fprintf(stream, "Ignore"); break; case no_quorum_suicide: fprintf(stream, "Suicide"); break; } fprintf(stream, "
    Resource management"); if (is_set(data_set->flags, pe_flag_maintenance_mode)) { fprintf(stream, "DISABLED (the cluster will " "not attempt to start, stop or recover services)"); } else { fprintf(stream, "enabled"); } fprintf(stream, "
    \n

    \n"); break; case mon_output_xml: fprintf(stream, " flags, pe_flag_stonith_enabled)? "true" : "false"); fprintf(stream, " symmetric-cluster=\"%s\"", is_set(data_set->flags, pe_flag_symmetric_cluster)? "true" : "false"); fprintf(stream, " no-quorum-policy=\""); switch (data_set->no_quorum_policy) { case no_quorum_freeze: fprintf(stream, "freeze"); break; case no_quorum_stop: fprintf(stream, "stop"); break; case no_quorum_ignore: fprintf(stream, "ignore"); break; case no_quorum_suicide: fprintf(stream, "suicide"); break; } fprintf(stream, "\""); fprintf(stream, " maintenance-mode=\"%s\"", is_set(data_set->flags, pe_flag_maintenance_mode)? "true" : "false"); fprintf(stream, " />\n"); break; default: break; } } /*! * \internal * \brief Get the name of the stack in use (or "unknown" if not available) * * \param[in] data_set Working set of CIB state * * \return String representing stack name */ static const char * get_cluster_stack(pe_working_set_t *data_set) { xmlNode *stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG); return stack? crm_element_value(stack, XML_NVPAIR_ATTR_VALUE) : "unknown"; } /*! * \internal * \brief Print a summary of cluster-wide information * * \param[in] stream File stream to display output to * \param[in] data_set Working set of CIB state */ static void print_cluster_summary(FILE *stream, pe_working_set_t *data_set) { const char *stack_s = get_cluster_stack(data_set); gboolean header_printed = FALSE; if (show & mon_show_stack) { if (header_printed == FALSE) { print_cluster_summary_header(stream); header_printed = TRUE; } print_cluster_stack(stream, stack_s); } /* Always print DC if none, even if not requested */ if ((data_set->dc_node == NULL) || (show & mon_show_dc)) { if (header_printed == FALSE) { print_cluster_summary_header(stream); header_printed = TRUE; } print_cluster_dc(stream, data_set); } if (show & mon_show_times) { if (header_printed == FALSE) { print_cluster_summary_header(stream); header_printed = TRUE; } print_cluster_times(stream, data_set); } if (is_set(data_set->flags, pe_flag_maintenance_mode) || data_set->disabled_resources || data_set->blocked_resources || is_set(show, mon_show_count)) { if (header_printed == FALSE) { print_cluster_summary_header(stream); header_printed = TRUE; } print_cluster_counts(stream, data_set, stack_s); } /* There is not a separate option for showing cluster options, so show with * stack for now; a separate option could be added if there is demand */ if (show & mon_show_stack) { print_cluster_options(stream, data_set); } if (header_printed) { print_cluster_summary_footer(stream); } } /*! * \internal * \brief Print a failed action * * \param[in] stream File stream to display output to * \param[in] xml_op Root of XML tree describing failed action */ static void print_failed_action(FILE *stream, xmlNode *xml_op) { const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); const char *op_key_attr = "op_key"; const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); int rc = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), "0"); int status = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), "0"); char *exit_reason_cleaned; /* If no op_key was given, use id instead */ if (op_key == NULL) { op_key = ID(xml_op); op_key_attr = "id"; } /* If no exit reason was given, use "none" */ if (exit_reason == NULL) { exit_reason = "none"; } /* Print common action information */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("* %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'", op_key, node, services_ocf_exitcode_str(rc), rc, call, services_lrm_status_str(status), exit_reason); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "

  • %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'", op_key, node, services_ocf_exitcode_str(rc), rc, call, services_lrm_status_str(status), exit_reason); break; case mon_output_xml: exit_reason_cleaned = crm_xml_escape(exit_reason); fprintf(stream, " \n"); break; case mon_output_xml: fprintf(stream, " />\n"); break; default: break; } } /*! * \internal * \brief Print a section for failed actions * * \param[in] stream File stream to display output to * \param[in] data_set Working set of CIB state */ static void print_failed_actions(FILE *stream, pe_working_set_t *data_set) { xmlNode *xml_op = NULL; /* Print section heading */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\nFailed Actions:\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
    \n

    Failed Actions

    \n
      \n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } /* Print each failed action */ for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL; xml_op = __xml_next(xml_op)) { print_failed_action(stream, xml_op); } /* End section */ switch (output_format) { case mon_output_plain: case mon_output_console: print_as("\n"); break; case mon_output_html: case mon_output_cgi: fprintf(stream, "
    \n"); break; case mon_output_xml: fprintf(stream, " \n"); break; default: break; } } /*! * \internal * \brief Print cluster status to screen * * This uses the global display preferences set by command-line options * to display cluster status in a human-friendly way. * * \param[in] data_set Working set of CIB state */ static void print_status(pe_working_set_t * data_set) { GListPtr gIter = NULL; int print_opts = get_resource_display_options(); /* space-separated lists of node names */ char *online_nodes = NULL; char *online_remote_nodes = NULL; char *online_guest_nodes = NULL; char *offline_nodes = NULL; char *offline_remote_nodes = NULL; if (output_format == mon_output_console) { blank_screen(); } print_cluster_summary(stdout, data_set); print_as("\n"); /* Gather node information (and print if in bad state or grouping by node) */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_mode = NULL; char *node_name = get_node_display_name(node); /* Get node mode */ if (node->details->unclean) { if (node->details->online) { node_mode = "UNCLEAN (online)"; } else if (node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if (node->details->pending) { node_mode = "pending"; } else if (node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { node_mode = "standby"; } else { node_mode = "OFFLINE (standby)"; } } else if (node->details->maintenance) { if (node->details->online) { node_mode = "maintenance"; } else { node_mode = "OFFLINE (maintenance)"; } } else if (node->details->online) { node_mode = "online"; if (group_by_node == FALSE) { if (is_container_remote_node(node)) { online_guest_nodes = add_list_element(online_guest_nodes, node_name); } else if (is_baremetal_remote_node(node)) { online_remote_nodes = add_list_element(online_remote_nodes, node_name); } else { online_nodes = add_list_element(online_nodes, node_name); } free(node_name); continue; } } else { node_mode = "OFFLINE"; if (group_by_node == FALSE) { if (is_baremetal_remote_node(node)) { offline_remote_nodes = add_list_element(offline_remote_nodes, node_name); } else if (is_container_remote_node(node)) { /* ignore offline guest nodes */ } else { offline_nodes = add_list_element(offline_nodes, node_name); } free(node_name); continue; } } /* If we get here, node is in bad state, or we're grouping by node */ /* Print the node name and status */ if (is_container_remote_node(node)) { print_as("Guest"); } else if (is_baremetal_remote_node(node)) { print_as("Remote"); } print_as("Node %s: %s\n", node_name, node_mode); /* If we're grouping by node, print its resources */ if (group_by_node) { if (print_brief) { print_rscs_brief(node->details->running_rsc, "\t", print_opts | pe_print_rsconly, stdout, FALSE); } else { GListPtr gIter2 = NULL; for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout); } } } free(node_name); } /* If we're not grouping by node, summarize nodes by status */ if (online_nodes) { print_as("Online: [%s ]\n", online_nodes); free(online_nodes); } if (offline_nodes) { print_as("OFFLINE: [%s ]\n", offline_nodes); free(offline_nodes); } if (online_remote_nodes) { print_as("RemoteOnline: [%s ]\n", online_remote_nodes); free(online_remote_nodes); } if (offline_remote_nodes) { print_as("RemoteOFFLINE: [%s ]\n", offline_remote_nodes); free(offline_remote_nodes); } if (online_guest_nodes) { print_as("GuestOnline: [%s ]\n", online_guest_nodes); free(online_guest_nodes); } /* Print resources section, if needed */ print_resources(stdout, data_set, print_opts); /* print Node Attributes section if requested */ if (show & mon_show_attributes) { print_node_attributes(stdout, data_set); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (show & (mon_show_operations | mon_show_failcounts)) { print_node_summary(stdout, data_set, ((show & mon_show_operations)? TRUE : FALSE)); } /* If there were any failed actions, print them */ if (xml_has_children(data_set->failed)) { print_failed_actions(stdout, data_set); } /* Print tickets if requested */ if (show & mon_show_tickets) { print_cluster_tickets(stdout, data_set); } /* Print negative location constraints if requested */ if (show & mon_show_bans) { print_neg_locations(stdout, data_set); } #if CURSES_ENABLED if (output_format == mon_output_console) { refresh(); } #endif } /*! * \internal * \brief Print cluster status in XML format * * \param[in] data_set Working set of CIB state */ static void print_xml_status(pe_working_set_t * data_set) { FILE *stream = stdout; GListPtr gIter = NULL; int print_opts = get_resource_display_options(); fprintf(stream, "\n"); fprintf(stream, "\n", VERSION); print_cluster_summary(stream, data_set); /*** NODES ***/ fprintf(stream, " \n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_type = "unknown"; switch (node->details->type) { case node_member: node_type = "member"; break; case node_remote: node_type = "remote"; break; case node_ping: node_type = "ping"; break; } fprintf(stream, " details->uname); fprintf(stream, "id=\"%s\" ", node->details->id); fprintf(stream, "online=\"%s\" ", node->details->online ? "true" : "false"); fprintf(stream, "standby=\"%s\" ", node->details->standby ? "true" : "false"); fprintf(stream, "standby_onfail=\"%s\" ", node->details->standby_onfail ? "true" : "false"); fprintf(stream, "maintenance=\"%s\" ", node->details->maintenance ? "true" : "false"); fprintf(stream, "pending=\"%s\" ", node->details->pending ? "true" : "false"); fprintf(stream, "unclean=\"%s\" ", node->details->unclean ? "true" : "false"); fprintf(stream, "shutdown=\"%s\" ", node->details->shutdown ? "true" : "false"); fprintf(stream, "expected_up=\"%s\" ", node->details->expected_up ? "true" : "false"); fprintf(stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false"); fprintf(stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc)); fprintf(stream, "type=\"%s\" ", node_type); if (is_container_remote_node(node)) { fprintf(stream, "id_as_resource=\"%s\" ", node->details->remote_rsc->container->id); } if (group_by_node) { GListPtr lpc2 = NULL; fprintf(stream, ">\n"); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { resource_t *rsc = (resource_t *) lpc2->data; rsc->fns->print(rsc, " ", print_opts | pe_print_rsconly, stream); } fprintf(stream, " \n"); } else { fprintf(stream, "/>\n"); } } fprintf(stream, " \n"); /* Print resources section, if needed */ print_resources(stream, data_set, print_opts); /* print Node Attributes section if requested */ if (show & mon_show_attributes) { print_node_attributes(stream, data_set); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (show & (mon_show_operations | mon_show_failcounts)) { print_node_summary(stream, data_set, ((show & mon_show_operations)? TRUE : FALSE)); } /* If there were any failed actions, print them */ if (xml_has_children(data_set->failed)) { print_failed_actions(stream, data_set); } /* Print tickets if requested */ if (show & mon_show_tickets) { print_cluster_tickets(stream, data_set); } /* Print negative location constraints if requested */ if (show & mon_show_bans) { print_neg_locations(stream, data_set); } fprintf(stream, "\n"); fflush(stream); fclose(stream); } /*! * \internal * \brief Print cluster status in HTML format (with HTTP headers if CGI) * * \param[in] data_set Working set of CIB state * \param[in] filename Name of file to write HTML to (ignored if CGI) * * \return 0 on success, -1 on error */ static int print_html_status(pe_working_set_t * data_set, const char *filename) { FILE *stream; GListPtr gIter = NULL; char *filename_tmp = NULL; int print_opts = get_resource_display_options(); if (output_format == mon_output_cgi) { stream = stdout; fprintf(stream, "Content-type: text/html\n\n"); } else { filename_tmp = crm_concat(filename, "tmp", '.'); stream = fopen(filename_tmp, "w"); if (stream == NULL) { crm_perror(LOG_ERR, "Cannot open %s for writing", filename_tmp); free(filename_tmp); return -1; } } fprintf(stream, "\n"); fprintf(stream, " \n"); fprintf(stream, " Cluster status\n"); fprintf(stream, " \n", reconnect_msec / 1000); fprintf(stream, " \n"); fprintf(stream, "\n"); print_cluster_summary(stream, data_set); /*** NODE LIST ***/ fprintf(stream, "
    \n

    Node List

    \n"); fprintf(stream, "
      \n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; char *node_name = get_node_display_name(node); fprintf(stream, "
    • Node: %s: ", node_name); if (node->details->standby_onfail && node->details->online) { fprintf(stream, "standby (on-fail)\n"); } else if (node->details->standby && node->details->online) { fprintf(stream, "standby\n"); } else if (node->details->standby) { fprintf(stream, "OFFLINE (standby)\n"); } else if (node->details->maintenance && node->details->online) { fprintf(stream, "maintenance\n"); } else if (node->details->maintenance) { fprintf(stream, "OFFLINE (maintenance)\n"); } else if (node->details->online) { fprintf(stream, "online\n"); } else { fprintf(stream, "OFFLINE\n"); } if (print_brief && group_by_node) { fprintf(stream, "
        \n"); print_rscs_brief(node->details->running_rsc, NULL, print_opts | pe_print_rsconly, stream, FALSE); fprintf(stream, "
      \n"); } else if (group_by_node) { GListPtr lpc2 = NULL; fprintf(stream, "
        \n"); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { resource_t *rsc = (resource_t *) lpc2->data; fprintf(stream, "
      • "); rsc->fns->print(rsc, NULL, print_opts | pe_print_rsconly, stream); fprintf(stream, "
      • \n"); } fprintf(stream, "
      \n"); } fprintf(stream, "
    • \n"); free(node_name); } fprintf(stream, "
    \n"); /* Print resources section, if needed */ print_resources(stream, data_set, print_opts); /* print Node Attributes section if requested */ if (show & mon_show_attributes) { print_node_attributes(stream, data_set); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (show & (mon_show_operations | mon_show_failcounts)) { print_node_summary(stream, data_set, ((show & mon_show_operations)? TRUE : FALSE)); } /* If there were any failed actions, print them */ if (xml_has_children(data_set->failed)) { print_failed_actions(stream, data_set); } /* Print tickets if requested */ if (show & mon_show_tickets) { print_cluster_tickets(stream, data_set); } /* Print negative location constraints if requested */ if (show & mon_show_bans) { print_neg_locations(stream, data_set); } fprintf(stream, "\n"); fprintf(stream, "\n"); fflush(stream); fclose(stream); if (output_format != mon_output_cgi) { if (rename(filename_tmp, filename) != 0) { crm_perror(LOG_ERR, "Unable to rename %s->%s", filename_tmp, filename); } free(filename_tmp); } return 0; } #if ENABLE_SNMP # include # include # include # include # include # include # define add_snmp_field(list, oid_string, value) do { \ oid name[MAX_OID_LEN]; \ size_t name_length = MAX_OID_LEN; \ if (snmp_parse_oid(oid_string, name, &name_length)) { \ int s_rc = snmp_add_var(list, name, name_length, 's', (value)); \ if(s_rc != 0) { \ crm_err("Could not add %s=%s rc=%d", oid_string, value, s_rc); \ } else { \ crm_trace("Added %s=%s", oid_string, value); \ } \ } else { \ crm_err("Could not parse OID: %s", oid_string); \ } \ } while(0) \ # define add_snmp_field_int(list, oid_string, value) do { \ oid name[MAX_OID_LEN]; \ size_t name_length = MAX_OID_LEN; \ if (snmp_parse_oid(oid_string, name, &name_length)) { \ if(NULL == snmp_pdu_add_variable( \ list, name, name_length, ASN_INTEGER, \ (u_char *) & value, sizeof(value))) { \ crm_err("Could not add %s=%d", oid_string, value); \ } else { \ crm_trace("Added %s=%d", oid_string, value); \ } \ } else { \ crm_err("Could not parse OID: %s", oid_string); \ } \ } while(0) \ static int snmp_input(int operation, netsnmp_session * session, int reqid, netsnmp_pdu * pdu, void *magic) { return 1; } static netsnmp_session * crm_snmp_init(const char *target, char *community) { static netsnmp_session *session = NULL; # ifdef NETSNMPV53 char target53[128]; snprintf(target53, sizeof(target53), "%s:162", target); # endif if (session) { return session; } if (target == NULL) { return NULL; } if (get_crm_log_level() > LOG_INFO) { char *debug_tokens = strdup("run:shell,snmptrap,tdomain"); debug_register_tokens(debug_tokens); snmp_set_do_debugging(1); } session = calloc(1, sizeof(netsnmp_session)); snmp_sess_init(session); session->version = SNMP_VERSION_2c; session->callback = snmp_input; session->callback_magic = NULL; if (community) { session->community_len = strlen(community); session->community = (unsigned char *)community; } session = snmp_add(session, # ifdef NETSNMPV53 netsnmp_tdomain_transport(target53, 0, "udp"), # else netsnmp_transport_open_client("snmptrap", target), # endif NULL, NULL); if (session == NULL) { snmp_sess_perror("Could not create snmp transport", session); } return session; } #endif static int send_snmp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { int ret = 1; #if ENABLE_SNMP static oid snmptrap_oid[] = { 1, 3, 6, 1, 6, 3, 1, 1, 4, 1, 0 }; static oid sysuptime_oid[] = { 1, 3, 6, 1, 2, 1, 1, 3, 0 }; netsnmp_pdu *trap_pdu; netsnmp_session *session = crm_snmp_init(snmp_target, snmp_community); trap_pdu = snmp_pdu_create(SNMP_MSG_TRAP2); if (!trap_pdu) { crm_err("Failed to create SNMP notification"); return SNMPERR_GENERR; } if (1) { /* send uptime */ char csysuptime[20]; time_t now = time(NULL); sprintf(csysuptime, "%lld", (long long) now); snmp_add_var(trap_pdu, sysuptime_oid, sizeof(sysuptime_oid) / sizeof(oid), 't', csysuptime); } /* Indicate what the trap is by setting snmpTrapOid.0 */ ret = snmp_add_var(trap_pdu, snmptrap_oid, sizeof(snmptrap_oid) / sizeof(oid), 'o', snmp_crm_trap_oid); if (ret != 0) { crm_err("Failed set snmpTrapOid.0=%s", snmp_crm_trap_oid); return ret; } /* Add extries to the trap */ if (rsc) { add_snmp_field(trap_pdu, snmp_crm_oid_rsc, rsc); } add_snmp_field(trap_pdu, snmp_crm_oid_node, node); add_snmp_field(trap_pdu, snmp_crm_oid_task, task); add_snmp_field(trap_pdu, snmp_crm_oid_desc, desc); add_snmp_field_int(trap_pdu, snmp_crm_oid_rc, rc); add_snmp_field_int(trap_pdu, snmp_crm_oid_trc, target_rc); add_snmp_field_int(trap_pdu, snmp_crm_oid_status, status); /* Send and cleanup */ ret = snmp_send(session, trap_pdu); if (ret == 0) { /* error */ snmp_sess_perror("Could not send SNMP trap", session); snmp_free_pdu(trap_pdu); ret = SNMPERR_GENERR; } else { ret = SNMPERR_SUCCESS; } #else crm_err("Sending SNMP traps is not supported by this installation"); #endif return ret; } #if ENABLE_ESMTP # include # include static void print_recipient_status(smtp_recipient_t recipient, const char *mailbox, void *arg) { const smtp_status_t *status; status = smtp_recipient_status(recipient); printf("%s: %d %s", mailbox, status->code, status->text); } static void event_cb(smtp_session_t session, int event_no, void *arg, ...) { int *ok; va_list alist; va_start(alist, arg); switch (event_no) { case SMTP_EV_CONNECT: case SMTP_EV_MAILSTATUS: case SMTP_EV_RCPTSTATUS: case SMTP_EV_MESSAGEDATA: case SMTP_EV_MESSAGESENT: case SMTP_EV_DISCONNECT: break; case SMTP_EV_WEAK_CIPHER:{ int bits = va_arg(alist, long); ok = va_arg(alist, int *); crm_debug("SMTP_EV_WEAK_CIPHER, bits=%d - accepted.", bits); *ok = 1; break; } case SMTP_EV_STARTTLS_OK: crm_debug("SMTP_EV_STARTTLS_OK - TLS started here."); break; case SMTP_EV_INVALID_PEER_CERTIFICATE:{ long vfy_result = va_arg(alist, long); ok = va_arg(alist, int *); /* There is a table in handle_invalid_peer_certificate() of mail-file.c */ crm_err("SMTP_EV_INVALID_PEER_CERTIFICATE: %ld", vfy_result); *ok = 1; break; } case SMTP_EV_NO_PEER_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_NO_PEER_CERTIFICATE - accepted."); *ok = 1; break; case SMTP_EV_WRONG_PEER_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_WRONG_PEER_CERTIFICATE - accepted."); *ok = 1; break; case SMTP_EV_NO_CLIENT_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_NO_CLIENT_CERTIFICATE - accepted."); *ok = 1; break; default: crm_debug("Got event: %d - ignored.", event_no); } va_end(alist); } #endif #define BODY_MAX 2048 #if ENABLE_ESMTP static void crm_smtp_debug(const char *buf, int buflen, int writing, void *arg) { char type = 0; int lpc = 0, last = 0, level = *(int *)arg; if (writing == SMTP_CB_HEADERS) { type = 'H'; } else if (writing) { type = 'C'; } else { type = 'S'; } for (; lpc < buflen; lpc++) { switch (buf[lpc]) { case 0: case '\n': if (last > 0) { do_crm_log(level, " %.*s", lpc - last, buf + last); } else { do_crm_log(level, "%c: %.*s", type, lpc - last, buf + last); } last = lpc + 1; break; } } } #endif static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = crm_itoa(rc); char *status_s = crm_itoa(status); char *target_rc_s = crm_itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", external_recipient, external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (external_recipient) { setenv("CRM_notify_recipient", external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { crm_perror(LOG_ERR, "notification fork() failed."); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(external_agent, external_agent, NULL); } crm_trace("Finished running custom notification program '%s'.", external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static int send_smtp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { #if ENABLE_ESMTP smtp_session_t session; smtp_message_t message; auth_context_t authctx; struct sigaction sa; int len = 25; /* Note: Check extra padding on the Subject line below */ int noauth = 1; int smtp_debug = LOG_DEBUG; char crm_mail_body[BODY_MAX]; char *crm_mail_subject = NULL; memset(&sa, 0, sizeof(struct sigaction)); if (node == NULL) { node = "-"; } if (rsc == NULL) { rsc = "-"; } if (desc == NULL) { desc = "-"; } if (crm_mail_to == NULL) { return 1; } if (crm_mail_host == NULL) { crm_mail_host = "localhost:25"; } if (crm_mail_prefix == NULL) { crm_mail_prefix = "Cluster notification"; } crm_debug("Sending '%s' mail to %s via %s", crm_mail_prefix, crm_mail_to, crm_mail_host); len += strlen(crm_mail_prefix); len += strlen(task); len += strlen(rsc); len += strlen(node); len += strlen(desc); len++; crm_mail_subject = calloc(1, len); /* If you edit this line, ensure you allocate enough memory for it by altering 'len' above */ snprintf(crm_mail_subject, len, "%s - %s event for %s on %s: %s\r\n", crm_mail_prefix, task, rsc, node, desc); len = 0; len += snprintf(crm_mail_body + len, BODY_MAX - len, "\r\n%s\r\n", crm_mail_prefix); len += snprintf(crm_mail_body + len, BODY_MAX - len, "====\r\n\r\n"); if (rc == target_rc) { len += snprintf(crm_mail_body + len, BODY_MAX - len, "Completed operation %s for resource %s on %s\r\n", task, rsc, node); } else { len += snprintf(crm_mail_body + len, BODY_MAX - len, "Operation %s for resource %s on %s failed: %s\r\n", task, rsc, node, desc); } len += snprintf(crm_mail_body + len, BODY_MAX - len, "\r\nDetails:\r\n"); len += snprintf(crm_mail_body + len, BODY_MAX - len, "\toperation status: (%d) %s\r\n", status, services_lrm_status_str(status)); if (status == PCMK_LRM_OP_DONE) { len += snprintf(crm_mail_body + len, BODY_MAX - len, "\tscript returned: (%d) %s\r\n", rc, services_ocf_exitcode_str(rc)); len += snprintf(crm_mail_body + len, BODY_MAX - len, "\texpected return value: (%d) %s\r\n", target_rc, services_ocf_exitcode_str(target_rc)); } auth_client_init(); session = smtp_create_session(); message = smtp_add_message(session); smtp_starttls_enable(session, Starttls_ENABLED); sa.sa_handler = SIG_IGN; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sigaction(SIGPIPE, &sa, NULL); smtp_set_server(session, crm_mail_host); authctx = auth_create_context(); auth_set_mechanism_flags(authctx, AUTH_PLUGIN_PLAIN, 0); smtp_set_eventcb(session, event_cb, NULL); /* Now tell libESMTP it can use the SMTP AUTH extension. */ if (!noauth) { crm_debug("Adding authentication context"); smtp_auth_set_context(session, authctx); } if (crm_mail_from == NULL) { struct utsname us; char auto_from[BODY_MAX]; CRM_ASSERT(uname(&us) == 0); snprintf(auto_from, BODY_MAX, "crm_mon@%s", us.nodename); smtp_set_reverse_path(message, auto_from); } else { /* NULL is ok */ smtp_set_reverse_path(message, crm_mail_from); } smtp_set_header(message, "To", NULL /*phrase */ , NULL /*addr */ ); /* "Phrase" */ smtp_add_recipient(message, crm_mail_to); /* Set the Subject: header and override any subject line in the message headers. */ smtp_set_header(message, "Subject", crm_mail_subject); smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1); smtp_set_message_str(message, crm_mail_body); smtp_set_monitorcb(session, crm_smtp_debug, &smtp_debug, 1); if (smtp_start_session(session)) { char buf[128]; int rc = smtp_errno(); crm_err("SMTP server problem: %s (%d)", smtp_strerror(rc, buf, sizeof buf), rc); } else { char buf[128]; int rc = smtp_errno(); const smtp_status_t *smtp_status = smtp_message_transfer_status(message); if (rc != 0) { crm_err("SMTP server problem: %s (%d)", smtp_strerror(rc, buf, sizeof buf), rc); } crm_info("Send status: %d %s", smtp_status->code, crm_str(smtp_status->text)); smtp_enumerate_recipients(message, print_recipient_status, NULL); } smtp_destroy_session(session); auth_destroy_context(authctx); auth_client_exit(); #endif return 0; } static void handle_rsc_op(xmlNode * xml, const char *node_id) { int rc = -1; int status = -1; int action = -1; int interval = 0; int target_rc = -1; int transition_num = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; char *update_te_uuid = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) { xmlNode *cIter; for(cIter = xml->children; cIter; cIter = cIter->next) { handle_rsc_op(cIter, node_id); } return; } id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY); if (id == NULL) { /* Compatibility with <= 1.1.5 */ id = ID(rsc_op); } magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return; } if (FALSE == decode_transition_magic(magic, &update_te_uuid, &transition_num, &action, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return; } if (parse_op_key(id, &rsc, &task, &interval) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, XML_ATTR_UNAME); } if (node == NULL && n) { node = ID(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_strerror(pcmk_ok); if (status == PCMK_LRM_OP_DONE && target_rc == rc) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_LRM_OP_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = services_lrm_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && snmp_target) { send_snmp_trap(node, rsc, task, target_rc, rc, status, desc); } if (notify && crm_mail_to) { send_smtp_trap(node, rsc, task, target_rc, rc, status, desc); } if (notify && external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(update_te_uuid); free(rsc); free(task); } static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger(refresh_trigger); return FALSE; } #define NODE_PATT "/lrm[@id=" static char *get_node_from_xpath(const char *xpath) { char *nodeid = NULL; char *tmp = strstr(xpath, NODE_PATT); if(tmp) { tmp += strlen(NODE_PATT); tmp += 1; nodeid = strdup(tmp); tmp = strstr(nodeid, "\'"); CRM_ASSERT(tmp); tmp[0] = 0; } return nodeid; } static void crm_diff_update_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) { const char *name = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); xmlNode *match = NULL; const char *node = NULL; if(op == NULL) { continue; } else if(strcmp(op, "create") == 0) { match = change->children; } else if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "delete") == 0) { continue; } else if(strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); } else if(strcmp(name, XML_TAG_CIB) == 0) { xmlNode *state = NULL; xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS); for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) { xmlNode *state = NULL; for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) { node = crm_element_value(match, XML_ATTR_UNAME); if (node == NULL) { node = ID(match); } handle_rsc_op(match, node); } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { node = ID(match); handle_rsc_op(match, node); } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } } } static void crm_diff_update_v1(const char *event, xmlNode * msg) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op, NULL); } freeXpathObject(xpathObj); } void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; long now = time(NULL); static bool stale = FALSE; static int updates = 0; static mainloop_timer_t *refresh_timer = NULL; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); print_dot(); if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; break; case pcmk_ok: updates++; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); } if (crm_mail_to || snmp_target || external_agent) { int format = 0; crm_element_value_int(diff, "format", &format); switch(format) { case 1: crm_diff_update_v1(event, msg); break; case 2: crm_diff_update_v2(event, msg); break; default: crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { print_as("--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; /* Refresh * - immediately if the last update was more than 5s ago * - every 10 updates * - at most 2s after the last update */ if ((now - last_refresh) > (reconnect_msec / 1000)) { mainloop_set_trigger(refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else if(updates > 10) { mainloop_set_trigger(refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } gboolean mon_refresh_display(gpointer user_data) { xmlNode *cib_copy = copy_xml(current_cib); pe_working_set_t data_set; last_refresh = time(NULL); if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { if (cib) { cib->cmds->signoff(cib); } print_as("Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation)); if (output_format == mon_output_console) { sleep(2); } clean_up(EX_USAGE); return FALSE; } set_working_set_defaults(&data_set); data_set.input = cib_copy; cluster_status(&data_set); /* Unpack constraints if any section will need them * (tickets may be referenced in constraints but not granted yet, * and bans need negative location constraints) */ if (show & (mon_show_bans | mon_show_tickets)) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set.input); unpack_constraints(cib_constraints, &data_set); } switch (output_format) { case mon_output_html: case mon_output_cgi: if (print_html_status(&data_set, output_filename) != 0) { fprintf(stderr, "Critical: Unable to output html file\n"); clean_up(EX_USAGE); } break; case mon_output_xml: print_xml_status(&data_set); break; case mon_output_monitor: print_simple_status(&data_set); if (has_warnings) { clean_up(MON_STATUS_WARN); } break; case mon_output_plain: case mon_output_console: print_status(&data_set); break; case mon_output_none: break; } cleanup_alloc_calculations(&data_set); return TRUE; } void mon_st_callback(stonith_t * st, stonith_event_t * e) { char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)", e->operation, e->origin, e->target, pcmk_strerror(e->result), e->id); if (snmp_target) { send_snmp_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); } if (crm_mail_to) { send_smtp_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); } if (external_agent) { send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); } free(desc); } /* * De-init ncurses, signoff from the CIB and deallocate memory. */ void clean_up(int rc) { #if ENABLE_SNMP netsnmp_session *session = crm_snmp_init(NULL, NULL); if (session) { snmp_close(session); snmp_shutdown("snmpapp"); } #endif #if CURSES_ENABLED if (output_format == mon_output_console) { output_format = mon_output_plain; echo(); nocbreak(); endwin(); } #endif if (cib != NULL) { cib->cmds->signoff(cib); cib_delete(cib); cib = NULL; } free(output_filename); free(xml_file); free(pid_file); if (rc >= 0) { crm_exit(rc); } return; } diff --git a/tools/crm_resource.c b/tools/crm_resource.c index 6cb71bf07b..c4ee78e621 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -1,1063 +1,1065 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include bool BE_QUIET = FALSE; bool scope_master = FALSE; int cib_options = cib_sync_call; GMainLoop *mainloop = NULL; #define message_timeout_ms 60*1000 static gboolean resource_ipc_timeout(gpointer data) { fprintf(stderr, "No messages received in %d seconds.. aborting\n", (int)message_timeout_ms / 1000); crm_err("No messages received in %d seconds", (int)message_timeout_ms / 1000); return crm_exit(-1); } static void resource_ipc_connection_destroy(gpointer user_data) { crm_info("Connection to CRMd was terminated"); crm_exit(1); } static void start_mainloop(void) { if (crmd_replies_needed == 0) { return; } mainloop = g_main_new(FALSE); fprintf(stderr, "Waiting for %d replies from the CRMd", crmd_replies_needed); crm_debug("Waiting for %d replies from the CRMd", crmd_replies_needed); g_timeout_add(message_timeout_ms, resource_ipc_timeout, NULL); g_main_run(mainloop); } static int resource_ipc_callback(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); fprintf(stderr, "."); crm_log_xml_trace(msg, "[inbound]"); crmd_replies_needed--; if (crmd_replies_needed == 0) { fprintf(stderr, " OK\n"); crm_debug("Got all the replies we expected"); return crm_exit(pcmk_ok); } free_xml(msg); return 0; } struct ipc_client_callbacks crm_callbacks = { .dispatch = resource_ipc_callback, .destroy = resource_ipc_connection_destroy, }; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\t\tThis text"}, {"version", 0, 0, '$', "\t\tVersion information" }, {"verbose", 0, 0, 'V', "\t\tIncrease debug output"}, {"quiet", 0, 0, 'Q', "\t\tPrint only the value on stdout\n"}, {"resource", 1, 0, 'r', "\tResource ID" }, {"-spacer-",1, 0, '-', "\nQueries:"}, {"list", 0, 0, 'L', "\t\tList all cluster resources"}, {"list-raw", 0, 0, 'l', "\tList the IDs of all instantiated resources (no groups/clones/...)"}, {"list-cts", 0, 0, 'c', NULL, pcmk_option_hidden}, {"list-operations", 0, 0, 'O', "\tList active resource operations. Optionally filtered by resource (-r) and/or node (-N)"}, {"list-all-operations", 0, 0, 'o', "List all resource operations. Optionally filtered by resource (-r) and/or node (-N)\n"}, {"pending", 0, 0, 'j', "\t\tDisplay pending state if 'record-pending' is enabled\n", pcmk_option_hidden}, {"list-standards", 0, 0, 0, "\tList supported standards"}, {"list-ocf-providers", 0, 0, 0, "List all available OCF providers"}, {"list-agents", 1, 0, 0, "List all agents available for the named standard and/or provider."}, {"list-ocf-alternatives", 1, 0, 0, "List all available providers for the named OCF agent\n"}, {"show-metadata", 1, 0, 0, "Show the metadata for the named class:provider:agent"}, {"query-xml", 0, 0, 'q', "\tQuery the definition of a resource (template expanded)"}, {"query-xml-raw", 0, 0, 'w', "\tQuery the definition of a resource (raw xml)"}, {"locate", 0, 0, 'W', "\t\tDisplay the current location(s) of a resource"}, {"stack", 0, 0, 'A', "\t\tDisplay the prerequisites and dependents of a resource"}, {"constraints",0, 0, 'a', "\tDisplay the (co)location constraints that apply to a resource"}, {"-spacer-", 1, 0, '-', "\nCommands:"}, {"cleanup", 0, 0, 'C', "\t\tDelete the resource history and re-check the current state. Optional: --resource"}, {"set-parameter", 1, 0, 'p', "Set the named parameter for a resource. See also -m, --meta"}, {"get-parameter", 1, 0, 'g', "Display the named parameter for a resource. See also -m, --meta"}, {"delete-parameter",1, 0, 'd', "Delete the named parameter for a resource. See also -m, --meta"}, {"get-property", 1, 0, 'G', "Display the 'class', 'type' or 'provider' of a resource", pcmk_option_hidden}, {"set-property", 1, 0, 'S', "(Advanced) Set resource property (e.g. 'class', 'type', or 'provider'). Required: -r, -t, -v", pcmk_option_hidden}, {"-spacer-", 1, 0, '-', "\nResource location:"}, { "move", 0, 0, 'M', "\t\tMove a resource from its current location to the named destination.\n " "\t\t\t\tRequires: --host. Optional: --lifetime, --master\n\n" "\t\t\t\tNOTE: This may prevent the resource from running on the previous location node until the implicit constraints expire or are removed with --unban\n" }, { "ban", 0, 0, 'B', "\t\tPrevent the named resource from running on the named --host. \n" "\t\t\t\tRequires: --resource. Optional: --host, --lifetime, --master\n\n" "\t\t\t\tIf --host is not specified, it defaults to:\n" "\t\t\t\t * the current location for primitives and groups, or\n\n" "\t\t\t\t * the current location of the master for m/s resources with master-max=1\n\n" "\t\t\t\tAll other situations result in an error as there is no sane default.\n\n" "\t\t\t\tNOTE: This will prevent the resource from running on this node until the constraint expires or is removed with --clear\n" }, { "clear", 0, 0, 'U', "\t\tRemove all constraints created by the --ban and/or --move commands. \n" "\t\t\t\tRequires: --resource. Optional: --host, --master\n\n" "\t\t\t\tIf --host is not specified, all constraints created by --ban and --move will be removed for the named resource.\n" }, {"lifetime", 1, 0, 'u', "\tLifespan of constraints created by the --ban and --move commands"}, { "master", 0, 0, 0, "\t\tLimit the scope of the --ban, --move and --clear commands to the Master role.\n" "\t\t\t\tFor --ban and --move, the previous master can still remain active in the Slave role." }, {"-spacer-", 1, 0, '-', "\nAdvanced Commands:"}, {"delete", 0, 0, 'D', "\t\t(Advanced) Delete a resource from the CIB"}, {"fail", 0, 0, 'F', "\t\t(Advanced) Tell the cluster this resource has failed"}, {"restart", 0, 0, 0, "\t\t(Advanced) Tell the cluster to restart this resource and anything that depends on it"}, {"wait", 0, 0, 0, "\t\t(Advanced) Wait until the cluster settles into a stable state"}, {"force-demote",0,0, 0, "\t(Advanced) Bypass the cluster and demote a resource on the local node. Additional detail with -V"}, {"force-stop", 0, 0, 0, "\t(Advanced) Bypass the cluster and stop a resource on the local node. Additional detail with -V"}, {"force-start",0, 0, 0, "\t(Advanced) Bypass the cluster and start a resource on the local node. Additional detail with -V"}, {"force-promote",0,0, 0, "\t(Advanced) Bypass the cluster and promote a resource on the local node. Additional detail with -V"}, {"force-check",0, 0, 0, "\t(Advanced) Bypass the cluster and check the state of a resource on the local node. Additional detail with -V\n"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"node", 1, 0, 'N', "\tHost uname"}, {"recursive", 0, 0, 0, "\tFollow colocation chains when using --set-parameter"}, {"resource-type", 1, 0, 't', "Resource type (primitive, clone, group, ...)"}, {"parameter-value", 1, 0, 'v', "Value to use with -p"}, {"meta", 0, 0, 'm', "\t\tModify a resource's configuration option rather than one which is passed to the resource agent script. For use with -p, -g, -d"}, {"utilization", 0, 0, 'z', "\tModify a resource's utilization attribute. For use with -p, -g, -d"}, {"set-name", 1, 0, 's', "\t(Advanced) ID of the instance_attributes object to change"}, {"nvpair", 1, 0, 'i', "\t(Advanced) ID of the nvpair object to change/delete"}, {"timeout", 1, 0, 'T', "\t(Advanced) Abort if command does not finish in this time (with --restart or --wait)"}, {"force", 0, 0, 'f', "\n" /* Is this actually true anymore? "\t\tForce the resource to move by creating a rule for the current location and a score of -INFINITY" "\n\t\tThis should be used if the resource's stickiness and constraint scores total more than INFINITY (Currently 100,000)" "\n\t\tNOTE: This will prevent the resource from running on this node until the constraint is removed with -U or the --lifetime duration expires\n"*/ }, {"xml-file", 1, 0, 'x', NULL, pcmk_option_hidden},\ /* legacy options */ {"host-uname", 1, 0, 'H', NULL, pcmk_option_hidden}, {"migrate", 0, 0, 'M', NULL, pcmk_option_hidden}, {"un-migrate", 0, 0, 'U', NULL, pcmk_option_hidden}, {"un-move", 0, 0, 'U', NULL, pcmk_option_hidden}, {"refresh", 0, 0, 'R', NULL, pcmk_option_hidden}, {"reprobe", 0, 0, 'P', NULL, pcmk_option_hidden}, {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "List the configured resources:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --list", pcmk_option_example}, {"-spacer-", 1, 0, '-', "List the available OCF agents:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --list-agents ocf", pcmk_option_example}, {"-spacer-", 1, 0, '-', "List the available OCF agents from the linux-ha project:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --list-agents ocf:heartbeat", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Display the current location of 'myResource':", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --locate", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Move 'myResource' to another machine:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --move", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Move 'myResource' to a specific machine:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --move --node altNode", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Allow (but not force) 'myResource' to move back to its original location:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --un-move", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Tell the cluster that 'myResource' failed:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --fail", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Stop 'myResource' (and anything that depends on it):", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --set-parameter target-role --meta --parameter-value Stopped", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Tell the cluster not to manage 'myResource':", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "The cluster will not attempt to start or stop the resource under any circumstances."}, {"-spacer-", 1, 0, '-', "Useful when performing maintenance tasks on a resource.", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --set-parameter is-managed --meta --parameter-value false", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Erase the operation history of 'myResource' on 'aNode':", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "The cluster will 'forget' the existing resource state (including any errors) and attempt to recover the resource."}, {"-spacer-", 1, 0, '-', "Useful when a resource had failed permanently and has been repaired by an administrator.", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --cleanup --node aNode", pcmk_option_example}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { char rsc_cmd = 'L'; const char *rsc_id = NULL; const char *host_uname = NULL; const char *prop_name = NULL; const char *prop_value = NULL; const char *rsc_type = NULL; const char *prop_id = NULL; const char *prop_set = NULL; const char *rsc_long_cmd = NULL; const char *longname = NULL; GHashTable *override_params = NULL; char *xml_file = NULL; crm_ipc_t *crmd_channel = NULL; pe_working_set_t data_set; cib_t *cib_conn = NULL; bool recursive = FALSE; char *our_pid = NULL; /* Not all commands set these appropriately, but the defaults here are * sufficient to get the logic right. */ bool require_resource = TRUE; /* whether command requires that resource be specified */ bool require_dataset = TRUE; /* whether command requires populated dataset instance */ bool require_crmd = FALSE; /* whether command requires connection to CRMd */ int rc = pcmk_ok; int option_index = 0; int timeout_ms = 0; int argerr = 0; int flag; crm_log_cli_init("crm_resource"); crm_set_options(NULL, "(query|command) [options]", long_options, "Perform tasks related to cluster resources.\nAllows resources to be queried (definition and location), modified, and moved around the cluster.\n"); if (argc < 2) { crm_help('?', EX_USAGE); } while (1) { flag = crm_get_option_long(argc, argv, &option_index, &longname); if (flag == -1) break; switch (flag) { case 0: /* long options with no short equivalent */ if (safe_str_eq("master", longname)) { scope_master = TRUE; } else if(safe_str_eq(longname, "recursive")) { recursive = TRUE; } else if (safe_str_eq("wait", longname)) { rsc_cmd = flag; rsc_long_cmd = longname; require_resource = FALSE; require_dataset = FALSE; } else if ( safe_str_eq("restart", longname) || safe_str_eq("force-demote", longname) || safe_str_eq("force-stop", longname) || safe_str_eq("force-start", longname) || safe_str_eq("force-promote", longname) || safe_str_eq("force-check", longname)) { rsc_cmd = flag; rsc_long_cmd = longname; crm_log_args(argc, argv); } else if (safe_str_eq("list-ocf-providers", longname) || safe_str_eq("list-ocf-alternatives", longname) || safe_str_eq("list-standards", longname)) { const char *text = NULL; lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; lrmd_t *lrmd_conn = lrmd_api_new(); if (safe_str_eq("list-ocf-providers", longname) || safe_str_eq("list-ocf-alternatives", longname)) { rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, optarg, &list); text = "OCF providers"; } else if (safe_str_eq("list-standards", longname)) { rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); text = "standards"; } if (rc > 0) { rc = 0; for (iter = list; iter != NULL; iter = iter->next) { rc++; printf("%s\n", iter->val); } lrmd_list_freeall(list); } else if (optarg) { fprintf(stderr, "No %s found for %s\n", text, optarg); } else { fprintf(stderr, "No %s found\n", text); } lrmd_api_delete(lrmd_conn); return crm_exit(rc); } else if (safe_str_eq("show-metadata", longname)) { char standard[512]; char provider[512]; char type[512]; char *metadata = NULL; lrmd_t *lrmd_conn = lrmd_api_new(); rc = sscanf(optarg, "%[^:]:%[^:]:%s", standard, provider, type); if (rc == 3) { rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard, provider, type, &metadata, 0); } else if (rc == 2) { rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard, NULL, provider, &metadata, 0); } else if (rc < 2) { fprintf(stderr, "Please specify standard:type or standard:provider:type, not %s\n", optarg); rc = -EINVAL; } if (metadata) { printf("%s\n", metadata); } else { fprintf(stderr, "Metadata query for %s failed: %d\n", optarg, rc); } lrmd_api_delete(lrmd_conn); return crm_exit(rc); } else if (safe_str_eq("list-agents", longname)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; char *provider = strchr (optarg, ':'); lrmd_t *lrmd_conn = lrmd_api_new(); if (provider) { *provider++ = 0; } rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, optarg, provider); if (rc > 0) { rc = 0; for (iter = list; iter != NULL; iter = iter->next) { printf("%s\n", iter->val); rc++; } lrmd_list_freeall(list); rc = 0; } else { fprintf(stderr, "No agents found for standard=%s, provider=%s\n", optarg, (provider? provider : "*")); rc = -1; } lrmd_api_delete(lrmd_conn); return crm_exit(rc); } else { crm_err("Unhandled long option: %s", longname); } break; case 'V': do_trace = TRUE; crm_bump_log_level(argc, argv); break; case '$': case '?': crm_help(flag, EX_OK); break; case 'x': xml_file = strdup(optarg); break; case 'Q': BE_QUIET = TRUE; break; case 'm': attr_set_type = XML_TAG_META_SETS; break; case 'z': attr_set_type = XML_TAG_UTILIZATION; break; case 'u': move_lifetime = strdup(optarg); break; case 'f': do_force = TRUE; crm_log_args(argc, argv); break; case 'i': prop_id = optarg; break; case 's': prop_set = optarg; break; case 'r': rsc_id = optarg; break; case 'v': prop_value = optarg; break; case 't': rsc_type = optarg; break; case 'T': timeout_ms = crm_get_msec(optarg); break; case 'C': case 'R': case 'P': crm_log_args(argc, argv); require_resource = FALSE; require_crmd = TRUE; rsc_cmd = 'C'; break; case 'F': crm_log_args(argc, argv); require_crmd = TRUE; rsc_cmd = flag; break; case 'U': case 'B': case 'M': case 'D': crm_log_args(argc, argv); rsc_cmd = flag; break; case 'L': case 'c': case 'l': case 'q': case 'w': case 'W': case 'O': case 'o': case 'A': case 'a': rsc_cmd = flag; break; case 'j': print_pending = TRUE; break; case 'p': case 'd': case 'S': crm_log_args(argc, argv); prop_name = optarg; rsc_cmd = flag; break; case 'G': case 'g': prop_name = optarg; rsc_cmd = flag; break; case 'h': case 'H': case 'N': crm_trace("Option %c => %s", flag, optarg); host_uname = optarg; break; default: CMD_ERR("Argument code 0%o (%c) is not (?yet?) supported", flag, flag); ++argerr; break; } } if (optind < argc && argv[optind] != NULL && rsc_cmd == 0 && rsc_long_cmd) { override_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); while (optind < argc && argv[optind] != NULL) { char *name = calloc(1, strlen(argv[optind])); char *value = calloc(1, strlen(argv[optind])); int rc = sscanf(argv[optind], "%[^=]=%s", name, value); if(rc == 2) { g_hash_table_replace(override_params, name, value); } else { CMD_ERR("Error parsing '%s' as a name=value pair for --%s", argv[optind], rsc_long_cmd); free(value); free(name); argerr++; } optind++; } } else if (optind < argc && argv[optind] != NULL && rsc_cmd == 0) { CMD_ERR("non-option ARGV-elements: "); while (optind < argc && argv[optind] != NULL) { CMD_ERR("[%d of %d] %s ", optind, argc, argv[optind]); optind++; argerr++; } } if (optind > argc) { ++argerr; } if (argerr) { CMD_ERR("Invalid option(s) supplied, use --help for valid usage"); return crm_exit(EX_USAGE); } our_pid = calloc(1, 11); if (our_pid != NULL) { snprintf(our_pid, 10, "%d", getpid()); our_pid[10] = '\0'; } if (do_force) { crm_debug("Forcing..."); cib_options |= cib_quorum_override; } data_set.input = NULL; /* make clean-up easier */ /* If user specified resource, look for it, even if it's optional for command */ if (rsc_id) { require_resource = TRUE; } /* We need a dataset to find a resource, even if command doesn't need it */ if (require_resource) { require_dataset = TRUE; } /* Establish a connection to the CIB */ cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); if (rc != pcmk_ok) { CMD_ERR("Error signing on to the CIB service: %s", pcmk_strerror(rc)); return crm_exit(rc); } /* Populate working set from XML file if specified or CIB query otherwise */ if (require_dataset) { xmlNode *cib_xml_copy = NULL; if (xml_file != NULL) { cib_xml_copy = filename2xml(xml_file); } else { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); } if(rc != pcmk_ok) { goto bail; } /* Populate the working set instance */ set_working_set_defaults(&data_set); rc = update_working_set_xml(&data_set, &cib_xml_copy); if (rc != pcmk_ok) { goto bail; } cluster_status(&data_set); /* Set rc to -ENXIO if no resource matching rsc_id is found. * This does not bail, but is handled later for certain commands. * That handling could be done here instead if all flags above set * require_resource appropriately. */ if (require_resource && rsc_id && (find_rsc_or_clone(rsc_id, &data_set) == NULL)) { rc = -ENXIO; } } /* Establish a connection to the CRMd if needed */ if (require_crmd) { xmlNode *xml = NULL; mainloop_io_t *source = mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks); crmd_channel = mainloop_get_ipc_client(source); if (crmd_channel == NULL) { CMD_ERR("Error signing on to the CRMd service"); rc = -ENOTCONN; goto bail; } xml = create_hello_message(our_pid, crm_system_name, "0", "1"); crm_ipc_send(crmd_channel, xml, 0, 0, NULL); free_xml(xml); } /* Handle rsc_cmd appropriately */ if (rsc_cmd == 'L') { rc = pcmk_ok; cli_resource_print_list(&data_set, FALSE); } else if (rsc_cmd == 'l') { int found = 0; GListPtr lpc = NULL; rc = pcmk_ok; for (lpc = data_set.resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; found++; cli_resource_print_raw(rsc); } if (found == 0) { printf("NO resources configured\n"); rc = -ENXIO; } } else if (rsc_cmd == 0 && rsc_long_cmd && safe_str_eq(rsc_long_cmd, "restart")) { resource_t *rsc = NULL; rc = -ENXIO; if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); goto bail; } rsc = pe_find_resource(data_set.resources, rsc_id); rc = -EINVAL; if (rsc == NULL) { CMD_ERR("Resource '%s' not restarted: unknown", rsc_id); goto bail; } rc = cli_resource_restart(rsc, host_uname, timeout_ms, cib_conn); } else if (rsc_cmd == 0 && rsc_long_cmd && safe_str_eq(rsc_long_cmd, "wait")) { rc = wait_till_stable(timeout_ms, cib_conn); } else if (rsc_cmd == 0 && rsc_long_cmd) { /* force-(stop|start|check) */ rc = cli_resource_execute(rsc_id, rsc_long_cmd, override_params, cib_conn, &data_set); } else if (rsc_cmd == 'A' || rsc_cmd == 'a') { GListPtr lpc = NULL; resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set.input); if (rsc == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } unpack_constraints(cib_constraints, &data_set); for (lpc = data_set.resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } cli_resource_print_colocation(rsc, TRUE, rsc_cmd == 'A', 1); fprintf(stdout, "* %s\n", rsc->id); cli_resource_print_location(rsc, NULL); for (lpc = data_set.resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } cli_resource_print_colocation(rsc, FALSE, rsc_cmd == 'A', 1); } else if (rsc_cmd == 'c') { int found = 0; GListPtr lpc = NULL; rc = pcmk_ok; for (lpc = data_set.resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; cli_resource_print_cts(rsc); found++; } cli_resource_print_cts_constraints(&data_set); } else if (rsc_cmd == 'F') { rc = cli_resource_fail(crmd_channel, host_uname, rsc_id, &data_set); if (rc == pcmk_ok) { start_mainloop(); } } else if (rsc_cmd == 'O') { rc = cli_resource_print_operations(rsc_id, host_uname, TRUE, &data_set); } else if (rsc_cmd == 'o') { rc = cli_resource_print_operations(rsc_id, host_uname, FALSE, &data_set); /* All remaining commands require that resource exist */ } else if (rc == -ENXIO) { CMD_ERR("Resource '%s' not found: %s", crm_str(rsc_id), pcmk_strerror(rc)); } else if (rsc_cmd == 'W') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } rc = cli_resource_search(rsc_id, &data_set); if (rc >= 0) { rc = pcmk_ok; } } else if (rsc_cmd == 'q') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } rc = cli_resource_print(rsc_id, &data_set, TRUE); } else if (rsc_cmd == 'w') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } rc = cli_resource_print(rsc_id, &data_set, FALSE); } else if (rsc_cmd == 'U') { node_t *dest = NULL; if (rsc_id == NULL) { CMD_ERR("No value specified for --resource"); rc = -ENXIO; goto bail; } if (host_uname) { dest = pe_find_node(data_set.nodes, host_uname); if (dest == NULL) { CMD_ERR("Unknown node: %s", host_uname); rc = -ENXIO; goto bail; } rc = cli_resource_clear(rsc_id, dest->details->uname, NULL, cib_conn); } else { rc = cli_resource_clear(rsc_id, NULL, data_set.nodes, cib_conn); } } else if (rsc_cmd == 'M' && host_uname) { rc = cli_resource_move(rsc_id, host_uname, cib_conn, &data_set); } else if (rsc_cmd == 'B' && host_uname) { resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); node_t *dest = pe_find_node(data_set.nodes, host_uname); rc = -ENXIO; if (rsc_id == NULL) { CMD_ERR("No value specified for --resource"); goto bail; } else if(rsc == NULL) { CMD_ERR("Resource '%s' not moved: unknown", rsc_id); } else if (dest == NULL) { CMD_ERR("Error performing operation: node '%s' is unknown", host_uname); goto bail; } rc = cli_resource_ban(rsc_id, dest->details->uname, NULL, cib_conn); } else if (rsc_cmd == 'B' || rsc_cmd == 'M') { resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); rc = -ENXIO; if (rsc_id == NULL) { CMD_ERR("No value specified for --resource"); goto bail; } rc = -EINVAL; if(rsc == NULL) { CMD_ERR("Resource '%s' not moved: unknown", rsc_id); } else if(g_list_length(rsc->running_on) == 1) { node_t *current = rsc->running_on->data; rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn); } else if(rsc->variant == pe_master) { int count = 0; GListPtr iter = NULL; node_t *current = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { count++; current = child->running_on->data; } } if(count == 1 && current) { rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn); } else { CMD_ERR("Resource '%s' not moved: active in %d locations (promoted in %d).", rsc_id, g_list_length(rsc->running_on), count); CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --host ", rsc_id); CMD_ERR("You can prevent '%s' from being promoted at a specific location with:" " --ban --master --host ", rsc_id); } } else { CMD_ERR("Resource '%s' not moved: active in %d locations.", rsc_id, g_list_length(rsc->running_on)); CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --host ", rsc_id); } } else if (rsc_cmd == 'G') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } rc = cli_resource_print_property(rsc_id, prop_name, &data_set); } else if (rsc_cmd == 'S') { xmlNode *msg_data = NULL; if ((rsc_id == NULL) || !strlen(rsc_id)) { CMD_ERR("Must specify -r with resource id"); rc = -ENXIO; goto bail; } else if ((rsc_type == NULL) || !strlen(rsc_type)) { CMD_ERR("Must specify -t with resource type"); rc = -ENXIO; goto bail; } else if ((prop_value == NULL) || !strlen(prop_value)) { CMD_ERR("Must supply -v with new value"); rc = -EINVAL; goto bail; } else if (cib_conn == NULL) { rc = -ENOTCONN; goto bail; } CRM_LOG_ASSERT(prop_name != NULL); msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, rsc_id); crm_xml_add(msg_data, prop_name, prop_value); rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, cib_options); free_xml(msg_data); } else if (rsc_cmd == 'g') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } rc = cli_resource_print_attribute(rsc_id, prop_name, &data_set); } else if (rsc_cmd == 'p') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } if (prop_value == NULL || strlen(prop_value) == 0) { CMD_ERR("You need to supply a value with the -v option"); rc = -EINVAL; goto bail; } /* coverity[var_deref_model] False positive */ rc = cli_resource_update_attribute(rsc_id, prop_set, prop_id, prop_name, prop_value, recursive, cib_conn, &data_set); } else if (rsc_cmd == 'd') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } /* coverity[var_deref_model] False positive */ rc = cli_resource_delete_attribute(rsc_id, prop_set, prop_id, prop_name, cib_conn, &data_set); } else if (rsc_cmd == 'C' && rsc_id) { resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); if(do_force == FALSE) { rsc = uber_parent(rsc); } crm_debug("Re-checking the state of %s for %s on %s", rsc->id, rsc_id, host_uname); if(rsc) { crmd_replies_needed = 0; rc = cli_resource_delete(cib_conn, crmd_channel, host_uname, rsc, &data_set); } else { rc = -ENODEV; } if(rc == pcmk_ok && BE_QUIET == FALSE) { /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */ cli_resource_check(cib_conn, rsc); } if (rc == pcmk_ok) { start_mainloop(); } } else if (rsc_cmd == 'C') { #if HAVE_ATOMIC_ATTRD const char *router_node = host_uname; xmlNode *msg_data = NULL; xmlNode *cmd = NULL; if (host_uname) { node_t *node = pe_find_node(data_set.nodes, host_uname); if (node && is_remote_node(node)) { if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { CMD_ERR("No lrmd connection detected to remote node %s", host_uname); return -ENXIO; } node = node->details->remote_rsc->running_on->data; router_node = node->details->uname; } } msg_data = create_xml_node(NULL, "crm-resource-reprobe-op"); crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); if (safe_str_neq(router_node, host_uname)) { crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); } cmd = create_request(CRM_OP_REPROBE, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); free_xml(msg_data); crm_debug("Re-checking the state of all resources on %s", host_uname?host_uname:"all nodes"); - rc = attrd_update_delegate( - NULL, 'u', host_uname, "fail-count-*", NULL, XML_CIB_TAG_STATUS, NULL, NULL, NULL, attrd_opt_none); + rc = attrd_update_delegate(NULL, 'u', host_uname, + "^" CRM_FAIL_COUNT_PREFIX "-", NULL, + XML_CIB_TAG_STATUS, NULL, NULL, NULL, + attrd_opt_none); if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { start_mainloop(); } free_xml(cmd); #else GListPtr rIter = NULL; crmd_replies_needed = 0; for (rIter = data_set.resources; rIter; rIter = rIter->next) { resource_t *rsc = rIter->data; cli_resource_delete(cib_conn, crmd_channel, host_uname, rsc, &data_set); } start_mainloop(); #endif } else if (rsc_cmd == 'D') { xmlNode *msg_data = NULL; if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r"); rc = -ENXIO; goto bail; } if (rsc_type == NULL) { CMD_ERR("You need to specify a resource type with -t"); rc = -ENXIO; goto bail; } else if (cib_conn == NULL) { rc = -ENOTCONN; goto bail; } msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, rsc_id); rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, cib_options); free_xml(msg_data); } else { CMD_ERR("Unknown command: %c", rsc_cmd); } bail: if (data_set.input != NULL) { cleanup_alloc_calculations(&data_set); } if (cib_conn != NULL) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } if (rc == -pcmk_err_no_quorum) { CMD_ERR("Error performing operation: %s", pcmk_strerror(rc)); CMD_ERR("Try using -f"); } else if (rc != pcmk_ok) { CMD_ERR("Error performing operation: %s", pcmk_strerror(rc)); } return crm_exit(rc); } diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 49815f734e..4f3a2fc660 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,1690 +1,1690 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include bool do_trace = FALSE; bool do_force = FALSE; int crmd_replies_needed = 1; /* The welcome message */ const char *attr_set_type = XML_TAG_ATTR_SETS; static int do_find_resource(const char *rsc, resource_t * the_rsc, pe_working_set_t * data_set) { int found = 0; GListPtr lpc = NULL; for (lpc = the_rsc->running_on; lpc != NULL; lpc = lpc->next) { node_t *node = (node_t *) lpc->data; crm_trace("resource %s is running on: %s", rsc, node->details->uname); if (BE_QUIET) { fprintf(stdout, "%s\n", node->details->uname); } else { const char *state = ""; if (the_rsc->variant < pe_clone && the_rsc->fns->state(the_rsc, TRUE) == RSC_ROLE_MASTER) { state = "Master"; } fprintf(stdout, "resource %s is running on: %s %s\n", rsc, node->details->uname, state); } found++; } if (BE_QUIET == FALSE && found == 0) { fprintf(stderr, "resource %s is NOT running\n", rsc); } return found; } int cli_resource_search(const char *rsc, pe_working_set_t * data_set) { int found = 0; resource_t *the_rsc = NULL; resource_t *parent = NULL; if (the_rsc == NULL) { the_rsc = pe_find_resource(data_set->resources, rsc); } if (the_rsc == NULL) { return -ENXIO; } if (the_rsc->variant >= pe_clone) { GListPtr gIter = the_rsc->children; for (; gIter != NULL; gIter = gIter->next) { found += do_find_resource(rsc, gIter->data, data_set); } /* The anonymous clone children's common ID is supplied */ } else if ((parent = uber_parent(the_rsc)) != NULL && parent->variant >= pe_clone && is_not_set(the_rsc->flags, pe_rsc_unique) && the_rsc->clone_name && safe_str_eq(rsc, the_rsc->clone_name) && safe_str_neq(rsc, the_rsc->id)) { GListPtr gIter = parent->children; for (; gIter != NULL; gIter = gIter->next) { found += do_find_resource(rsc, gIter->data, data_set); } } else { found += do_find_resource(rsc, the_rsc, data_set); } return found; } resource_t * find_rsc_or_clone(const char *rsc, pe_working_set_t * data_set) { resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if (the_rsc == NULL) { char *as_clone = crm_concat(rsc, "0", ':'); the_rsc = pe_find_resource(data_set->resources, as_clone); free(as_clone); } return the_rsc; } static int find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, char **value) { int offset = 0; static int xpath_max = 1024; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; if(value) { *value = NULL; } if(the_cib == NULL) { return -ENOTCONN; } xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "%s", get_object_path("resources")); offset += snprintf(xpath_string + offset, xpath_max - offset, "//*[@id=\"%s\"]", rsc); if (set_type) { offset += snprintf(xpath_string + offset, xpath_max - offset, "/%s", set_type); if (set_name) { offset += snprintf(xpath_string + offset, xpath_max - offset, "[@id=\"%s\"]", set_name); } } offset += snprintf(xpath_string + offset, xpath_max - offset, "//nvpair["); if (attr_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, "@id=\"%s\"", attr_id); } if (attr_name) { if (attr_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, " and "); } offset += snprintf(xpath_string + offset, xpath_max - offset, "@name=\"%s\"", attr_name); } offset += snprintf(xpath_string + offset, xpath_max - offset, "]"); CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { xmlNode *child = NULL; rc = -EINVAL; printf("Multiple attributes match name=%s\n", attr_name); for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) { printf(" Value: %s \t(id=%s)\n", crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); } } else if(value) { const char *tmp = crm_element_value(xml_search, attr); if (tmp) { *value = strdup(tmp); } } bail: free(xpath_string); free_xml(xml_search); return rc; } static resource_t * find_matching_attr_resource(resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd) { int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; if(do_force == TRUE) { return rsc; } else if(rsc->parent) { switch(rsc->parent->variant) { case pe_group: if (BE_QUIET == FALSE) { printf("Performing %s of '%s' for '%s' will not apply to its peers in '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); } break; case pe_master: case pe_clone: rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); free(local_attr_id); if(rc != pcmk_ok) { rsc = rsc->parent; if (BE_QUIET == FALSE) { printf("Performing %s of '%s' on '%s', the parent of '%s'\n", cmd, attr_name, rsc->id, rsc_id); } } break; default: break; } } else if (rsc->parent && BE_QUIET == FALSE) { printf("Forcing %s of '%s' for '%s' instead of '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); } else if(rsc->parent == NULL && rsc->children) { resource_t *child = rsc->children->data; if(child->variant == pe_native) { lookup_id = clone_strip(child->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if(rc == pcmk_ok) { rsc = child; if (BE_QUIET == FALSE) { printf("A value for '%s' already exists in child '%s', performing %s on that instead of '%s'\n", attr_name, lookup_id, cmd, rsc_id); } } free(local_attr_id); free(lookup_id); } } return rsc; } int cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, const char *attr_value, bool recursive, cib_t * cib, pe_working_set_t * data_set) { int rc = pcmk_ok; static bool need_init = TRUE; char *lookup_id = NULL; char *local_attr_id = NULL; char *local_attr_set = NULL; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; bool use_attributes_tag = FALSE; resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); if (rsc == NULL) { return -ENXIO; } if(attr_id == NULL && do_force == FALSE && pcmk_ok != find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL)) { printf("\n"); } if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { if (do_force == FALSE) { rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, XML_TAG_META_SETS, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", uber_parent(rsc)->id, attr_name, local_attr_id); printf(" Delete '%s' first or use --force to override\n", local_attr_id); } free(local_attr_id); if (rc == pcmk_ok) { return -ENOTUNIQ; } } } else { rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "update"); } lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok) { crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id); attr_id = local_attr_id; } else if (rc != -ENXIO) { free(lookup_id); free(local_attr_id); return rc; } else { const char *value = NULL; xmlNode *cib_top = NULL; const char *tag = crm_element_name(rsc->xml); cib->cmds->query(cib, "/cib", &cib_top, cib_sync_call | cib_scope_local | cib_xpath | cib_no_children); value = crm_element_value(cib_top, "ignore_dtd"); if (value != NULL) { use_attributes_tag = TRUE; } else { value = crm_element_value(cib_top, XML_ATTR_VALIDATION); if (crm_ends_with(value, "-0.6")) { use_attributes_tag = TRUE; } } free_xml(cib_top); if (attr_set == NULL) { local_attr_set = crm_concat(lookup_id, attr_set_type, '-'); attr_set = local_attr_set; } if (attr_id == NULL) { local_attr_id = crm_concat(attr_set, attr_name, '-'); attr_id = local_attr_id; } if (use_attributes_tag && safe_str_eq(tag, XML_CIB_TAG_MASTER)) { tag = "master_slave"; /* use the old name */ } xml_top = create_xml_node(NULL, tag); crm_xml_add(xml_top, XML_ATTR_ID, lookup_id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); if (use_attributes_tag) { xml_obj = create_xml_node(xml_obj, XML_TAG_ATTRS); } } xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); if (xml_top == NULL) { xml_top = xml_obj; } crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Set '%s' option: id=%s%s%s%s%s=%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value); } free_xml(xml_top); free(lookup_id); free(local_attr_id); free(local_attr_set); if(recursive && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { GListPtr lpc = NULL; if(need_init) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); need_init = FALSE; unpack_constraints(cib_constraints, data_set); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } } crm_debug("Looking for dependencies %p", rsc->rsc_cons_lhs); set_bit(rsc->flags, pe_rsc_allocating); for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; resource_t *peer = cons->rsc_lh; crm_debug("Checking %s %d", cons->id, cons->score); if (cons->score > 0 && is_not_set(peer->flags, pe_rsc_allocating)) { /* Don't get into colocation loops */ crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, peer->id); cli_resource_update_attribute(peer->id, NULL, NULL, attr_name, attr_value, recursive, cib, data_set); } } } return rc; } int cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, cib_t * cib, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); if (rsc == NULL) { return -ENXIO; } if(attr_id == NULL && do_force == FALSE && find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL) != pcmk_ok) { printf("\n"); } if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "delete"); } lookup_id = clone_strip(rsc->id); rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == -ENXIO) { free(lookup_id); return pcmk_ok; } else if (rc != pcmk_ok) { free(lookup_id); return rc; } if (attr_id == NULL) { attr_id = local_attr_id; } xml_obj = create_xml_node(NULL, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->delete(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Deleted '%s' option: id=%s%s%s%s%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free(lookup_id); free_xml(xml_obj); free(local_attr_id); return rc; } static int send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, const char *host_uname, const char *rsc_id, bool only_failed, pe_working_set_t * data_set) { char *our_pid = NULL; char *key = NULL; int rc = -ECOMM; xmlNode *cmd = NULL; xmlNode *xml_rsc = NULL; const char *value = NULL; const char *router_node = host_uname; xmlNode *params = NULL; xmlNode *msg_data = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Resource %s not found", rsc_id); return -ENXIO; } else if (rsc->variant != pe_native) { CMD_ERR("We can only process primitive resources, not %s", rsc_id); return -EINVAL; } else if (host_uname == NULL) { CMD_ERR("Please supply a hostname with -H"); return -EINVAL; } else { node_t *node = pe_find_node(data_set->nodes, host_uname); if (node && is_remote_node(node)) { if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { CMD_ERR("No lrmd connection detected to remote node %s", host_uname); return -ENXIO; } node = node->details->remote_rsc->running_on->data; router_node = node->details->uname; } } key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); free(key); crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); if (safe_str_neq(router_node, host_uname)) { crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); } xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); if (rsc->clone_name) { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->id); } else { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); } value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_ATTR_TYPE); if (value == NULL) { CMD_ERR("%s has no type! Aborting...", rsc_id); return -ENXIO; } value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_CLASS); if (value == NULL) { CMD_ERR("%s has no class! Aborting...", rsc_id); return -ENXIO; } crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER); params = create_xml_node(msg_data, XML_TAG_ATTRS); crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); key = crm_meta_name(XML_LRM_ATTR_INTERVAL); crm_xml_add(params, key, "60000"); /* 1 minute */ free(key); our_pid = calloc(1, 11); if (our_pid != NULL) { snprintf(our_pid, 10, "%d", getpid()); our_pid[10] = '\0'; } cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); /* crm_log_xml_warn(cmd, "send_lrm_rsc_op"); */ free_xml(msg_data); if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { rc = 0; } else { CMD_ERR("Could not send %s op to the crmd", op); rc = -ENOTCONN; } free_xml(cmd); return rc; } static int cli_delete_attr(cib_t * cib_conn, const char * host_uname, const char * attr_name, pe_working_set_t * data_set) { node_t *node = pe_find_node(data_set->nodes, host_uname); int attr_options = attrd_opt_none; if (node && is_remote_node(node)) { #if HAVE_ATOMIC_ATTRD set_bit(attr_options, attrd_opt_remote); #else /* Talk directly to cib for remote nodes if it's legacy attrd */ return delete_attr_delegate(cib_conn, cib_sync_call, XML_CIB_TAG_STATUS, node->details->id, NULL, NULL, NULL, attr_name, NULL, FALSE, NULL); #endif } return attrd_update_delegate(NULL, 'D', host_uname, attr_name, NULL, XML_CIB_TAG_STATUS, NULL, NULL, NULL, attr_options); } int cli_resource_delete(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_uname, resource_t * rsc, pe_working_set_t * data_set) { int rc = pcmk_ok; node_t *node = NULL; if (rsc == NULL) { return -ENXIO; } else if (rsc->children) { GListPtr lpc = NULL; for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; rc = cli_resource_delete(cib_conn, crmd_channel, host_uname, child, data_set); if(rc != pcmk_ok || (rsc->variant >= pe_clone && is_not_set(rsc->flags, pe_rsc_unique))) { return rc; } } return pcmk_ok; } else if (host_uname == NULL) { GListPtr lpc = NULL; for (lpc = data_set->nodes; lpc != NULL; lpc = lpc->next) { node = (node_t *) lpc->data; if (node->details->online) { cli_resource_delete(cib_conn, crmd_channel, node->details->uname, rsc, data_set); } } return pcmk_ok; } node = pe_find_node(data_set->nodes, host_uname); if (node && node->details->rsc_discovery_enabled) { printf("Cleaning up %s on %s", rsc->id, host_uname); rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id, TRUE, data_set); } else { printf("Resource discovery disabled on %s. Unable to delete lrm state.\n", host_uname); rc = -EOPNOTSUPP; } if (rc == pcmk_ok) { char *attr_name = NULL; if(node && node->details->remote_rsc == NULL && node->details->rsc_discovery_enabled) { crmd_replies_needed++; } if(is_not_set(rsc->flags, pe_rsc_unique)) { char *id = clone_strip(rsc->id); - attr_name = crm_strdup_printf("fail-count-%s", id); + attr_name = crm_failcount_name(id); free(id); } else if (rsc->clone_name) { - attr_name = crm_strdup_printf("fail-count-%s", rsc->clone_name); + attr_name = crm_failcount_name(rsc->clone_name); } else { - attr_name = crm_strdup_printf("fail-count-%s", rsc->id); + attr_name = crm_failcount_name(rsc->id); } printf(", removing %s\n", attr_name); rc = cli_delete_attr(cib_conn, host_uname, attr_name, data_set); free(attr_name); } else if(rc != -EOPNOTSUPP) { printf(" - FAILED\n"); } return rc; } void cli_resource_check(cib_t * cib_conn, resource_t *rsc) { int need_nl = 0; char *role_s = NULL; char *managed = NULL; resource_t *parent = uber_parent(rsc); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); if(role_s) { enum rsc_role_e role = text2role(role_s); if(role == RSC_ROLE_UNKNOWN) { // Treated as if unset } else if(role == RSC_ROLE_STOPPED) { printf("\n * The configuration specifies that '%s' should remain stopped\n", parent->id); need_nl++; } else if(parent->variant > pe_clone && role == RSC_ROLE_SLAVE) { printf("\n * The configuration specifies that '%s' should not be promoted\n", parent->id); need_nl++; } } if(managed && crm_is_true(managed) == FALSE) { printf("%s * The configuration prevents the cluster from stopping or starting '%s' (unmanaged)\n", need_nl == 0?"\n":"", parent->id); need_nl++; } if(need_nl) { printf("\n"); } } int cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t * data_set) { crm_warn("Failing: %s", rsc_id); return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); } static GHashTable * generate_resource_params(resource_t * rsc, pe_working_set_t * data_set) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; if (!rsc) { crm_err("Resource does not exist in config"); return NULL; } params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); meta = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); combined = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(params, rsc, NULL /* TODO: Pass in local node */ , data_set); get_meta_attributes(meta, rsc, NULL /* TODO: Pass in local node */ , data_set); if (params) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { g_hash_table_insert(combined, strdup(key), strdup(value)); } g_hash_table_destroy(params); } if (meta) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } static bool resource_is_running_on(resource_t *rsc, const char *host) { bool found = TRUE; GListPtr hIter = NULL; GListPtr hosts = NULL; if(rsc == NULL) { return FALSE; } rsc->fns->location(rsc, &hosts, TRUE); for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) { pe_node_t *node = (pe_node_t *) hIter->data; if(strcmp(host, node->details->uname) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } else if(strcmp(host, node->details->id) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } } if(host != NULL) { crm_trace("Resource %s is not running on: %s\n", rsc->id, host); found = FALSE; } else if(host == NULL && hosts == NULL) { crm_trace("Resource %s is not running\n", rsc->id); found = FALSE; } done: g_list_free(hosts); return found; } /*! * \internal * \brief Create a list of all resources active on host from a given list * * \param[in] host Name of host to check whether resources are active * \param[in] rsc_list List of resources to check * * \return New list of resources from list that are active on host */ static GList * get_active_resources(const char *host, GList *rsc_list) { GList *rIter = NULL; GList *active = NULL; for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) { resource_t *rsc = (resource_t *) rIter->data; /* Expand groups to their members, because if we're restarting a member * other than the first, we can't otherwise tell which resources are * stopping and starting. */ if (rsc->variant == pe_group) { active = g_list_concat(active, get_active_resources(host, rsc->children)); } else if (resource_is_running_on(rsc, host)) { active = g_list_append(active, strdup(rsc->id)); } } return active; } static GList *subtract_lists(GList *from, GList *items) { GList *item = NULL; GList *result = g_list_copy(from); for (item = items; item != NULL; item = item->next) { GList *candidate = NULL; for (candidate = from; candidate != NULL; candidate = candidate->next) { crm_info("Comparing %s with %s", candidate->data, item->data); if(strcmp(candidate->data, item->data) == 0) { result = g_list_remove(result, candidate->data); break; } } } return result; } static void dump_list(GList *items, const char *tag) { int lpc = 0; GList *item = NULL; for (item = items; item != NULL; item = item->next) { crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data); lpc++; } } static void display_list(GList *items, const char *tag) { GList *item = NULL; for (item = items; item != NULL; item = item->next) { fprintf(stdout, "%s%s\n", tag, (const char *)item->data); } } /*! * \internal * \brief Upgrade XML to latest schema version and use it as working set input * * This also updates the working set timestamp to the current time. * * \param[in] data_set Working set instance to update * \param[in] xml XML to use as input * * \return pcmk_ok on success, -ENOKEY if unable to upgrade XML * \note On success, caller is responsible for freeing memory allocated for * data_set->now. * \todo This follows the example of other callers of cli_config_update() * and returns -ENOKEY ("Required key not available") if that fails, * but perhaps -pcmk_err_schema_validation would be better in that case. */ int update_working_set_xml(pe_working_set_t *data_set, xmlNode **xml) { if (cli_config_update(xml, NULL, FALSE) == FALSE) { return -ENOKEY; } data_set->input = *xml; data_set->now = crm_time_new(NULL); return pcmk_ok; } /*! * \internal * \brief Update a working set's XML input based on a CIB query * * \param[in] data_set Data set instance to initialize * \param[in] cib Connection to the CIB * * \return pcmk_ok on success, -errno on failure * \note On success, caller is responsible for freeing memory allocated for * data_set->input and data_set->now. */ static int update_working_set_from_cib(pe_working_set_t * data_set, cib_t *cib) { xmlNode *cib_xml_copy = NULL; int rc; rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { fprintf(stderr, "Could not obtain the current CIB: %s (%d)\n", pcmk_strerror(rc), rc); return rc; } rc = update_working_set_xml(data_set, &cib_xml_copy); if (rc != pcmk_ok) { fprintf(stderr, "Could not upgrade the current CIB XML\n"); free_xml(cib_xml_copy); return rc; } return pcmk_ok; } static int update_dataset(cib_t *cib, pe_working_set_t * data_set, bool simulate) { char *pid = NULL; char *shadow_file = NULL; cib_t *shadow_cib = NULL; int rc; cleanup_alloc_calculations(data_set); rc = update_working_set_from_cib(data_set, cib); if (rc != pcmk_ok) { return rc; } if(simulate) { pid = crm_itoa(getpid()); shadow_cib = cib_shadow_new(pid); shadow_file = get_shadow_file(pid); if (shadow_cib == NULL) { fprintf(stderr, "Could not create shadow cib: '%s'\n", pid); rc = -ENXIO; goto cleanup; } rc = write_xml_file(data_set->input, shadow_file, FALSE); if (rc < 0) { fprintf(stderr, "Could not populate shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command); if(rc != pcmk_ok) { fprintf(stderr, "Could not connect to shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } do_calculations(data_set, data_set->input, NULL); run_simulation(data_set, shadow_cib, NULL, TRUE); rc = update_dataset(shadow_cib, data_set, FALSE); } else { cluster_status(data_set); } cleanup: /* Do not free data_set->input here, we need rsc->xml to be valid later on */ cib_delete(shadow_cib); free(pid); if(shadow_file) { unlink(shadow_file); free(shadow_file); } return rc; } static int max_delay_for_resource(pe_working_set_t * data_set, resource_t *rsc) { int delay = 0; int max_delay = 0; if(rsc && rsc->children) { GList *iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; delay = max_delay_for_resource(data_set, child); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, child->id); max_delay = delay; } } } else if(rsc) { char *key = crm_strdup_printf("%s_%s_0", rsc->id, RSC_STOP); action_t *stop = custom_action(rsc, key, RSC_STOP, NULL, TRUE, FALSE, data_set); const char *value = g_hash_table_lookup(stop->meta, XML_ATTR_TIMEOUT); max_delay = crm_int_helper(value, NULL); pe_free_action(stop); } return max_delay; } static int max_delay_in(pe_working_set_t * data_set, GList *resources) { int max_delay = 0; GList *item = NULL; for (item = resources; item != NULL; item = item->next) { int delay = 0; resource_t *rsc = pe_find_resource(data_set->resources, (const char *)item->data); delay = max_delay_for_resource(data_set, rsc); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, rsc->id); max_delay = delay; } } return 5 + (max_delay / 1000); } #define waiting_for_starts(d, r, h) ((g_list_length(d) > 0) || \ (resource_is_running_on((r), (h)) == FALSE)) /*! * \internal * \brief Restart a resource (on a particular host if requested). * * \param[in] rsc The resource to restart * \param[in] host The host to restart the resource on (or NULL for all) * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but a two-second * granularity is actually used; if 0, a timeout will be * calculated based on the resource timeout) * \param[in] cib Connection to the CIB for modifying/checking resource * * \return pcmk_ok on success, -errno on failure (exits on certain failures) */ int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib) { int rc = 0; int lpc = 0; int before = 0; int step_timeout_s = 0; int sleep_interval = 2; int timeout = timeout_ms / 1000; bool is_clone = FALSE; char *rsc_id = NULL; char *orig_target_role = NULL; GList *list_delta = NULL; GList *target_active = NULL; GList *current_active = NULL; GList *restart_target_active = NULL; pe_working_set_t data_set; if(resource_is_running_on(rsc, host) == FALSE) { const char *id = rsc->clone_name?rsc->clone_name:rsc->id; if(host) { printf("%s is not running on %s and so cannot be restarted\n", id, host); } else { printf("%s is not running anywhere and so cannot be restarted\n", id); } return -ENXIO; } /* We might set the target-role meta-attribute */ attr_set_type = XML_TAG_META_SETS; rsc_id = strdup(rsc->id); if(rsc->variant > pe_group) { is_clone = TRUE; } /* grab full cib determine originally active resources disable or ban poll cib and watch for affected resources to get stopped without --timeout, calculate the stop timeout for each step and wait for that if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down if everything stopped, re-enable or un-ban poll cib and watch for affected resources to get started without --timeout, calculate the start timeout for each step and wait for that if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up report success Optimizations: - use constraints to determine ordered list of affected resources - Allow a --no-deps option (aka. --force-restart) */ set_working_set_defaults(&data_set); rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stdout, "Could not get new resource list: %s (%d)\n", pcmk_strerror(rc), rc); free(rsc_id); return rc; } restart_target_active = get_active_resources(host, data_set.resources); current_active = get_active_resources(host, data_set.resources); dump_list(current_active, "Origin"); if(is_clone && host) { /* Stop the clone instance by banning it from the host */ BE_QUIET = TRUE; rc = cli_resource_ban(rsc_id, host, NULL, cib); } else { /* Stop the resource by setting target-role to Stopped. * Remember any existing target-role so we can restore it later * (though it only makes any difference if it's Slave). */ char *lookup_id = clone_strip(rsc->id); find_resource_attr(cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role); free(lookup_id); rc = cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, RSC_STOPPED, FALSE, cib, &data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not set target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); if (current_active) { g_list_free_full(current_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } free(rsc_id); return crm_exit(rc); } rc = update_dataset(cib, &data_set, TRUE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources would be stopped\n"); goto failure; } target_active = get_active_resources(host, data_set.resources); dump_list(target_active, "Target"); list_delta = subtract_lists(current_active, target_active); fprintf(stdout, "Waiting for %d resources to stop:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while(g_list_length(list_delta) > 0) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for(lpc = 0; lpc < step_timeout_s && g_list_length(list_delta) > 0; lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were stopped\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } current_active = get_active_resources(host, data_set.resources); g_list_free(list_delta); list_delta = subtract_lists(current_active, target_active); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before); if(before == g_list_length(list_delta)) { /* aborted during stop phase, print the contents of list_delta */ fprintf(stderr, "Could not complete shutdown of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } if(is_clone && host) { rc = cli_resource_clear(rsc_id, host, NULL, cib); } else if (orig_target_role) { rc = cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, &data_set); free(orig_target_role); orig_target_role = NULL; } else { rc = cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not unset target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); free(rsc_id); return crm_exit(rc); } if (target_active) { g_list_free_full(target_active, free); } target_active = restart_target_active; if (list_delta) { g_list_free(list_delta); } list_delta = subtract_lists(target_active, current_active); fprintf(stdout, "Waiting for %d resources to start again:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (waiting_for_starts(list_delta, rsc, host)) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were started\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } /* It's OK if dependent resources moved to a different node, * so we check active resources on all nodes. */ current_active = get_active_resources(NULL, data_set.resources); g_list_free(list_delta); list_delta = subtract_lists(target_active, current_active); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } if(before == g_list_length(list_delta)) { /* aborted during start phase, print the contents of list_delta */ fprintf(stdout, "Could not complete restart of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } rc = pcmk_ok; goto done; failure: if(is_clone && host) { cli_resource_clear(rsc_id, host, NULL, cib); } else if (orig_target_role) { cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, &data_set); free(orig_target_role); } else { cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set); } done: if (list_delta) { g_list_free(list_delta); } if (current_active) { g_list_free_full(current_active, free); } if (target_active && (target_active != restart_target_active)) { g_list_free_full(target_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } cleanup_alloc_calculations(&data_set); free(rsc_id); return rc; } #define action_is_pending(action) \ ((is_set((action)->flags, pe_action_optional) == FALSE) \ && (is_set((action)->flags, pe_action_runnable) == TRUE) \ && (is_set((action)->flags, pe_action_pseudo) == FALSE)) /*! * \internal * \brief Return TRUE if any actions in a list are pending * * \param[in] actions List of actions to check * * \return TRUE if any actions in the list are pending, FALSE otherwise */ static bool actions_are_pending(GListPtr actions) { GListPtr action; for (action = actions; action != NULL; action = action->next) { if (action_is_pending((action_t *) action->data)) { return TRUE; } } return FALSE; } /*! * \internal * \brief Print pending actions to stderr * * \param[in] actions List of actions to check * * \return void */ static void print_pending_actions(GListPtr actions) { GListPtr action; fprintf(stderr, "Pending actions:\n"); for (action = actions; action != NULL; action = action->next) { action_t *a = (action_t *) action->data; if (action_is_pending(a)) { fprintf(stderr, "\tAction %d: %s", a->id, a->uuid); if (a->node) { fprintf(stderr, "\ton %s", a->node->details->uname); } fprintf(stderr, "\n"); } } } /* For --wait, timeout (in seconds) to use if caller doesn't specify one */ #define WAIT_DEFAULT_TIMEOUT_S (60 * 60) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) /*! * \internal * \brief Wait until all pending cluster actions are complete * * This waits until either the CIB's transition graph is idle or a timeout is * reached. * * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but one-second granularity * is actually used; if 0, a default will be used) * \param[in] cib Connection to the CIB * * \return pcmk_ok on success, -errno on failure */ int wait_till_stable(int timeout_ms, cib_t * cib) { pe_working_set_t data_set; int rc = -1; int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S; time_t expire_time = time(NULL) + timeout_s; time_t time_diff; set_working_set_defaults(&data_set); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); if (time_diff > 0) { crm_info("Waiting up to %d seconds for cluster actions to complete", time_diff); } else { print_pending_actions(data_set.actions); cleanup_alloc_calculations(&data_set); return -ETIME; } if (rc == pcmk_ok) { /* this avoids sleep on first loop iteration */ sleep(WAIT_SLEEP_S); } /* Get latest transition graph */ cleanup_alloc_calculations(&data_set); rc = update_working_set_from_cib(&data_set, cib); if (rc != pcmk_ok) { cleanup_alloc_calculations(&data_set); return rc; } do_calculations(&data_set, data_set.input, NULL); } while (actions_are_pending(data_set.actions)); return pcmk_ok; } int cli_resource_execute(const char *rsc_id, const char *rsc_action, GHashTable *override_hash, cib_t * cib, pe_working_set_t *data_set) { int rc = pcmk_ok; svc_action_t *op = NULL; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; const char *action = NULL; GHashTable *params = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Must supply a resource id with -r"); return -ENXIO; } if (safe_str_eq(rsc_action, "force-check")) { action = "monitor"; } else if (safe_str_eq(rsc_action, "force-stop")) { action = rsc_action+6; } else if (safe_str_eq(rsc_action, "force-start") || safe_str_eq(rsc_action, "force-demote") || safe_str_eq(rsc_action, "force-promote")) { action = rsc_action+6; if(rsc->variant >= pe_clone) { rc = cli_resource_search(rsc_id, data_set); if(rc > 0 && do_force == FALSE) { CMD_ERR("It is not safe to %s %s here: the cluster claims it is already active", action, rsc_id); CMD_ERR("Try setting target-role=stopped first or specifying --force"); crm_exit(EPERM); } } } if(rsc->variant == pe_clone || rsc->variant == pe_master) { /* Grab the first child resource in the hope it's not a group */ rsc = rsc->children->data; } if(rsc->variant == pe_group) { CMD_ERR("Sorry, --%s doesn't support group resources", rsc_action); crm_exit(EOPNOTSUPP); } rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); if(safe_str_eq(rclass, "stonith")){ CMD_ERR("Sorry, --%s doesn't support %s resources yet", rsc_action, rclass); crm_exit(EOPNOTSUPP); } params = generate_resource_params(rsc, data_set); op = resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params, 0); if(do_trace) { setenv("OCF_TRACE_RA", "1", 1); } if(op && override_hash) { GHashTableIter iter; char *name = NULL; char *value = NULL; g_hash_table_iter_init(&iter, override_hash); while (g_hash_table_iter_next(&iter, (gpointer *) & name, (gpointer *) & value)) { printf("Overriding the cluster configuration for '%s' with '%s' = '%s'\n", rsc->id, name, value); g_hash_table_replace(op->params, strdup(name), strdup(value)); } } if(op == NULL) { /* Re-run but with stderr enabled so we can display a sane error message */ crm_enable_stderr(TRUE); resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params, 0); return crm_exit(EINVAL); } else if (services_action_sync(op)) { int more, lpc, last; char *local_copy = NULL; if (op->status == PCMK_LRM_OP_DONE) { printf("Operation %s for %s (%s:%s:%s) returned %d\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, op->rc); } else { printf("Operation %s for %s (%s:%s:%s) failed: %d\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, op->status); } if (op->stdout_data) { local_copy = strdup(op->stdout_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stdout: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } if (op->stderr_data) { local_copy = strdup(op->stderr_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stderr: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } } rc = op->rc; services_action_free(op); return rc; } int cli_resource_move(const char *rsc_id, const char *host_name, cib_t * cib, pe_working_set_t *data_set) { int rc = -EINVAL; int count = 0; node_t *current = NULL; node_t *dest = pe_find_node(data_set->nodes, host_name); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); bool cur_is_dest = FALSE; if (rsc == NULL) { CMD_ERR("Resource '%s' not moved: not found", rsc_id); return -ENXIO; } else if (scope_master && rsc->variant < pe_master) { resource_t *p = uber_parent(rsc); if(p->variant == pe_master) { CMD_ERR("Using parent '%s' for --move command instead of '%s'.", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { CMD_ERR("Ignoring '--master' option: not valid for %s resources.", get_resource_typename(rsc->variant)); scope_master = FALSE; } } if(rsc->variant == pe_master) { GListPtr iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { rsc = child; count++; } } if(scope_master == FALSE && count == 0) { count = g_list_length(rsc->running_on); } } else if (rsc->variant > pe_group) { count = g_list_length(rsc->running_on); } else if (g_list_length(rsc->running_on) > 1) { CMD_ERR("Resource '%s' not moved: active on multiple nodes", rsc_id); return rc; } if(dest == NULL) { CMD_ERR("Error performing operation: node '%s' is unknown", host_name); return -ENXIO; } if(g_list_length(rsc->running_on) == 1) { current = rsc->running_on->data; } if(current == NULL) { /* Nothing to check */ } else if(scope_master && rsc->fns->state(rsc, TRUE) != RSC_ROLE_MASTER) { crm_trace("%s is already active on %s but not in correct state", rsc_id, dest->details->uname); } else if (safe_str_eq(current->details->uname, dest->details->uname)) { cur_is_dest = TRUE; if (do_force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", rsc_id, scope_master?"promoted":"active", dest->details->uname); } else { CMD_ERR("Error performing operation: %s is already %s on %s", rsc_id, scope_master?"promoted":"active", dest->details->uname); return rc; } } /* Clear any previous constraints for 'dest' */ cli_resource_clear(rsc_id, dest->details->uname, data_set->nodes, cib); /* Record an explicit preference for 'dest' */ rc = cli_resource_prefer(rsc_id, dest->details->uname, cib); crm_trace("%s%s now prefers node %s%s", rsc->id, scope_master?" (master)":"", dest->details->uname, do_force?"(forced)":""); /* only ban the previous location if current location != destination location. * it is possible to use -M to enforce a location without regard of where the * resource is currently located */ if(do_force && (cur_is_dest == FALSE)) { /* Ban the original location if possible */ if(current) { (void)cli_resource_ban(rsc_id, current->details->uname, NULL, cib); } else if(count > 1) { CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move one to %s", rsc_id, scope_master?"promoted":"active", count, dest->details->uname); CMD_ERR("You can prevent '%s' from being %s at a specific location with:" " --ban %s--host ", rsc_id, scope_master?"promoted":"active", scope_master?"--master ":""); } else { crm_trace("Not banning %s from its current location: not active", rsc_id); } } return rc; } diff --git a/tools/fake_transition.c b/tools/fake_transition.c index d4e61892c1..c45349f545 100644 --- a/tools/fake_transition.c +++ b/tools/fake_transition.c @@ -1,852 +1,852 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fake_transition.h" static bool fake_quiet = FALSE; static cib_t *fake_cib = NULL; static GListPtr fake_resource_list = NULL; static GListPtr fake_op_fail_list = NULL; gboolean bringing_nodes_online = FALSE; #define STATUS_PATH_MAX 512 #define quiet_log(fmt, args...) do { \ if(fake_quiet) { \ crm_trace(fmt, ##args); \ } else { \ printf(fmt , ##args); \ } \ } while(0) #define new_node_template "//"XML_CIB_TAG_NODE"[@uname='%s']" #define node_template "//"XML_CIB_TAG_STATE"[@uname='%s']" #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" /* #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" */ static void inject_transient_attr(xmlNode * cib_node, const char *name, const char *value) { xmlNode *attrs = NULL; xmlNode *container = NULL; xmlNode *nvp = NULL; xmlChar *node_path; const char *node_uuid = ID(cib_node); char *nvp_id = crm_concat(name, node_uuid, '-'); node_path = xmlGetNodePath(cib_node); quiet_log("Injecting attribute %s=%s into %s '%s'", name, value, node_path, ID(cib_node)); free(node_path); attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS); if (attrs == NULL) { attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(attrs, XML_ATTR_ID, node_uuid); } container = first_named_child(attrs, XML_TAG_ATTR_SETS); if (container == NULL) { container = create_xml_node(attrs, XML_TAG_ATTR_SETS); crm_xml_add(container, XML_ATTR_ID, node_uuid); } nvp = create_xml_node(container, XML_CIB_TAG_NVPAIR); crm_xml_add(nvp, XML_ATTR_ID, nvp_id); crm_xml_add(nvp, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(nvp, XML_NVPAIR_ATTR_VALUE, value); free(nvp_id); } static void update_failcounts(xmlNode * cib_node, const char *resource, int interval, int rc) { if (rc == 0) { return; } else if (rc == 7 && interval == 0) { return; } else { char *name = NULL; char *now = crm_itoa(time(NULL)); - name = crm_concat("fail-count", resource, '-'); + name = crm_failcount_name(resource); inject_transient_attr(cib_node, name, "value++"); + free(name); - name = crm_concat("last-failure", resource, '-'); + name = crm_lastfailure_name(resource); inject_transient_attr(cib_node, name, now); - free(name); free(now); } } static void create_node_entry(cib_t * cib_conn, const char *node) { int rc = pcmk_ok; int max = strlen(new_node_template) + strlen(node) + 1; char *xpath = NULL; xpath = calloc(1, max); snprintf(xpath, max, new_node_template, node); rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); if (rc == -ENXIO) { xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE); /* Using node uname as uuid ala corosync/openais */ crm_xml_add(cib_object, XML_ATTR_ID, node); crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object, cib_sync_call | cib_scope_local); /* Not bothering with subsequent query to see if it exists, we'll bomb out later in the call to query_node_uuid()... */ free_xml(cib_object); } free(xpath); } static lrmd_event_data_t * create_op(xmlNode * cib_resource, const char *task, int interval, int outcome) { lrmd_event_data_t *op = NULL; xmlNode *xop = NULL; op = calloc(1, sizeof(lrmd_event_data_t)); op->rsc_id = strdup(ID(cib_resource)); op->interval = interval; op->op_type = strdup(task); op->rc = outcome; op->op_status = 0; op->params = NULL; /* TODO: Fill me in */ op->t_run = time(NULL); op->t_rcchange = op->t_run; op->call_id = 0; for (xop = __xml_first_child(cib_resource); xop != NULL; xop = __xml_next(xop)) { int tmp = 0; crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; } static xmlNode * inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc) { return create_operation_update(cib_resource, op, CRM_FEATURE_SET, target_rc, NULL, crm_system_name, LOG_DEBUG_2); } static xmlNode * inject_node_state(cib_t * cib_conn, const char *node, const char *uuid) { int rc = pcmk_ok; int max = strlen(rsc_template) + strlen(node) + 1; char *xpath = NULL; xmlNode *cib_object = NULL; xpath = calloc(1, max); if (bringing_nodes_online) { create_node_entry(cib_conn, node); } snprintf(xpath, max, node_template, node); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); if (cib_object && ID(cib_object) == NULL) { crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath); crm_log_xml_warn(cib_object, "Duplicates"); crm_exit(ENOTUNIQ); } if (rc == -ENXIO) { char *found_uuid = NULL; if (uuid == NULL) { query_node_uuid(cib_conn, node, &found_uuid, NULL); } else { found_uuid = strdup(uuid); } cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(cib_object, XML_ATTR_UUID, found_uuid); crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object, cib_sync_call | cib_scope_local); free_xml(cib_object); free(found_uuid); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); crm_trace("injecting node state for %s. rc is %d", node, rc); } free(xpath); CRM_ASSERT(rc == pcmk_ok); return cib_object; } static xmlNode * modify_node(cib_t * cib_conn, char *node, gboolean up) { xmlNode *cib_node = inject_node_state(cib_conn, node, NULL); if (up) { crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES); crm_xml_add(cib_node, XML_NODE_IS_PEER, ONLINESTATUS); crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); } else { crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); crm_xml_add(cib_node, XML_NODE_IS_PEER, OFFLINESTATUS); crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN); crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN); } crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name); return cib_node; } static xmlNode * find_resource_xml(xmlNode * cib_node, const char *resource) { char *xpath = NULL; xmlNode *match = NULL; const char *node = crm_element_value(cib_node, XML_ATTR_UNAME); int max = strlen(rsc_template) + strlen(resource) + strlen(node) + 1; xpath = calloc(1, max); snprintf(xpath, max, rsc_template, node, resource); match = get_xpath_object(xpath, cib_node, LOG_DEBUG_2); free(xpath); return match; } static xmlNode * inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, const char *rtype, const char *rprovider) { xmlNode *lrm = NULL; xmlNode *container = NULL; xmlNode *cib_resource = NULL; char *xpath = NULL; cib_resource = find_resource_xml(cib_node, resource); if (cib_resource != NULL) { return cib_resource; } /* One day, add query for class, provider, type */ if (rclass == NULL || rtype == NULL) { fprintf(stderr, "Resource %s not found in the status section of %s." " Please supply the class and type to continue\n", resource, ID(cib_node)); return NULL; } else if (safe_str_neq(rclass, "ocf") && safe_str_neq(rclass, "stonith") && safe_str_neq(rclass, "heartbeat") && safe_str_neq(rclass, "service") && safe_str_neq(rclass, "upstart") && safe_str_neq(rclass, "systemd") && safe_str_neq(rclass, "lsb")) { fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass); return NULL; } else if (safe_str_eq(rclass, "ocf") && rprovider == NULL) { fprintf(stderr, "Please specify the provider for resource %s\n", resource); return NULL; } xpath = (char *)xmlGetNodePath(cib_node); crm_info("Injecting new resource %s into %s '%s'", resource, xpath, ID(cib_node)); free(xpath); lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); if (lrm == NULL) { const char *node_uuid = ID(cib_node); lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM); crm_xml_add(lrm, XML_ATTR_ID, node_uuid); } container = first_named_child(lrm, XML_LRM_TAG_RESOURCES); if (container == NULL) { container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES); } cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); crm_xml_add(cib_resource, XML_ATTR_ID, resource); crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass); crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider); crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype); return cib_resource; } static int find_ticket_state(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_state_xml) { int offset = 0; static int xpath_max = 1024; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; CRM_ASSERT(ticket_state_xml != NULL); *ticket_state_xml = NULL; xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "%s", "/cib/status/tickets"); if (ticket_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, "/%s[@id=\"%s\"]", XML_CIB_TAG_TICKET_STATE, ticket_id); } CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { if (ticket_id) { fprintf(stdout, "Multiple ticket_states match ticket_id=%s\n", ticket_id); } *ticket_state_xml = xml_search; } else { *ticket_state_xml = xml_search; } bail: free(xpath_string); return rc; } static int set_ticket_state_attr(const char *ticket_id, const char *attr_name, const char *attr_value, cib_t * cib, int cib_options) { int rc = pcmk_ok; xmlNode *xml_top = NULL; xmlNode *ticket_state_xml = NULL; rc = find_ticket_state(cib, ticket_id, &ticket_state_xml); if (rc == pcmk_ok) { crm_debug("Found a match state for ticket: id=%s", ticket_id); xml_top = ticket_state_xml; } else if (rc != -ENXIO) { return rc; } else { xmlNode *xml_obj = NULL; xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE); crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id); } crm_xml_add(ticket_state_xml, attr_name, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options); free_xml(xml_top); return rc; } void modify_configuration(pe_working_set_t * data_set, cib_t *cib, const char *quorum, const char *watchdog, GListPtr node_up, GListPtr node_down, GListPtr node_fail, GListPtr op_inject, GListPtr ticket_grant, GListPtr ticket_revoke, GListPtr ticket_standby, GListPtr ticket_activate) { int rc = pcmk_ok; GListPtr gIter = NULL; xmlNode *cib_op = NULL; xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; lrmd_event_data_t *op = NULL; if (quorum) { xmlNode *top = create_xml_node(NULL, XML_TAG_CIB); quiet_log(" + Setting quorum: %s\n", quorum); /* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */ crm_xml_add(top, XML_ATTR_HAVE_QUORUM, quorum); rc = cib->cmds->modify(cib, NULL, top, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } if (watchdog) { quiet_log(" + Setting watchdog: %s\n", watchdog); rc = update_attr_delegate(cib, cib_sync_call | cib_scope_local, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_HAVE_WATCHDOG, watchdog, FALSE, NULL, NULL); CRM_ASSERT(rc == pcmk_ok); } for (gIter = node_up; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; quiet_log(" + Bringing node %s online\n", node); cib_node = modify_node(cib, node, TRUE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (gIter = node_down; gIter != NULL; gIter = gIter->next) { char xpath[STATUS_PATH_MAX]; char *node = (char *)gIter->data; quiet_log(" + Taking node %s offline\n", node); cib_node = modify_node(cib, node, FALSE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_CIB_TAG_LRM); cib->cmds->delete(cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_TAG_TRANSIENT_NODEATTRS); cib->cmds->delete(cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); } for (gIter = node_fail; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; quiet_log(" + Failing node %s\n", node); cib_node = modify_node(cib, node, TRUE); crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Granting ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "true", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_revoke; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Revoking ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "false", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_standby; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Making ticket %s standby\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "true", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_activate; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Activating ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "false", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = op_inject; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; int rc = 0; int outcome = 0; int interval = 0; char *key = NULL; char *node = NULL; char *task = NULL; char *resource = NULL; const char *rtype = NULL; const char *rclass = NULL; const char *rprovider = NULL; resource_t *rsc = NULL; quiet_log(" + Injecting %s into the configuration\n", spec); key = calloc(1, strlen(spec) + 1); node = calloc(1, strlen(spec) + 1); rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome); if (rc != 3) { fprintf(stderr, "Invalid operation spec: %s. Only found %d fields\n", spec, rc); free(key); free(node); continue; } parse_op_key(key, &resource, &task, &interval); rsc = pe_find_resource(data_set->resources, resource); if (rsc == NULL) { fprintf(stderr, " - Invalid resource name: %s\n", resource); } else { rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); cib_node = inject_node_state(cib, node, NULL); CRM_ASSERT(cib_node != NULL); update_failcounts(cib_node, resource, interval, outcome); cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = create_op(cib_resource, task, interval, outcome); CRM_ASSERT(op != NULL); cib_op = inject_op(cib_resource, op, 0); CRM_ASSERT(cib_op != NULL); lrmd_free_event(op); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } free(task); free(node); free(key); } } static gboolean exec_pseudo_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); action->confirmed = TRUE; quiet_log(" * Pseudo action: %s%s%s\n", task, node ? " on " : "", node ? node : ""); update_graph(graph, action); return TRUE; } static gboolean exec_rsc_action(crm_graph_t * graph, crm_action_t * action) { int rc = 0; GListPtr gIter = NULL; lrmd_event_data_t *op = NULL; int target_outcome = 0; gboolean uname_is_uuid = FALSE; const char *rtype = NULL; const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; const char *operation = crm_element_value(action->xml, "operation"); const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (safe_str_eq(operation, CRM_OP_PROBED) || safe_str_eq(operation, CRM_OP_REPROBE)) { crm_info("Skipping %s op for %s", operation, node); goto done; } if (action_rsc == NULL) { crm_log_xml_err(action->xml, "Bad"); free(node); free(uuid); return FALSE; } /* Look for the preferred name * If not found, try the expected 'local' name * If not found use the preferred name anyway */ resource = crm_element_value(action_rsc, XML_ATTR_ID); if (pe_find_resource(fake_resource_list, resource) == NULL) { const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG); if (pe_find_resource(fake_resource_list, longname)) { resource = longname; } } if (safe_str_eq(operation, "delete")) { quiet_log(" * Resource action: %-15s delete on %s\n", resource, node); goto done; } rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(action_rsc, XML_ATTR_TYPE); rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER); if (target_rc_s != NULL) { target_outcome = crm_parse_int(target_rc_s, "0"); } CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call | cib_scope_local) == pcmk_ok); if (router_node) { uname_is_uuid = TRUE; } cib_node = inject_node_state(fake_cib, node, uname_is_uuid ? node : uuid); CRM_ASSERT(cib_node != NULL); cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = convert_graph_action(cib_resource, action, 0, target_outcome); if (op->interval) { quiet_log(" * Resource action: %-15s %s=%d on %s\n", resource, op->op_type, op->interval, node); } else { quiet_log(" * Resource action: %-15s %s on %s\n", resource, op->op_type, node); } for (gIter = fake_op_fail_list; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; char *key = NULL; key = calloc(1, 1 + strlen(spec)); snprintf(key, strlen(spec), "%s_%s_%d@%s=", resource, op->op_type, op->interval, node); if (strncasecmp(key, spec, strlen(key)) == 0) { sscanf(spec, "%*[^=]=%d", (int *)&op->rc); action->failed = TRUE; graph->abort_priority = INFINITY; printf("\tPretending action %d failed with rc=%d\n", action->id, op->rc); update_failcounts(cib_node, resource, op->interval, op->rc); free(key); break; } free(key); } inject_op(cib_resource, op, target_outcome); lrmd_free_event(op); rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); done: free(node); free(uuid); free_xml(cib_node); action->confirmed = TRUE; update_graph(graph, action); return TRUE; } static gboolean exec_crmd_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); action->confirmed = TRUE; if(rsc) { quiet_log(" * Cluster action: %s for %s on %s\n", task, ID(rsc), node); } else { quiet_log(" * Cluster action: %s on %s\n", task, node); } update_graph(graph, action); return TRUE; } static gboolean exec_stonith_action(crm_graph_t * graph, crm_action_t * action) { const char *op = crm_meta_value(action->params, "stonith_action"); char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); quiet_log(" * Fencing %s (%s)\n", target, op); if(safe_str_neq(op, "on")) { int rc = 0; char xpath[STATUS_PATH_MAX]; xmlNode *cib_node = modify_node(fake_cib, target, FALSE); crm_xml_add(cib_node, XML_ATTR_ORIGIN, __FUNCTION__); CRM_ASSERT(cib_node != NULL); rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM); fake_cib->cmds->delete(fake_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_TAG_TRANSIENT_NODEATTRS); fake_cib->cmds->delete(fake_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); free_xml(cib_node); } action->confirmed = TRUE; update_graph(graph, action); free(target); return TRUE; } int run_simulation(pe_working_set_t * data_set, cib_t *cib, GListPtr op_fail_list, bool quiet) { crm_graph_t *transition = NULL; enum transition_status graph_rc = -1; crm_graph_functions_t exec_fns = { exec_pseudo_action, exec_rsc_action, exec_crmd_action, exec_stonith_action, }; fake_cib = cib; fake_quiet = quiet; fake_op_fail_list = op_fail_list; quiet_log("\nExecuting cluster transition:\n"); set_graph_functions(&exec_fns); transition = unpack_graph(data_set->graph, crm_system_name); print_graph(LOG_DEBUG, transition); fake_resource_list = data_set->resources; do { graph_rc = run_graph(transition); } while (graph_rc == transition_active); fake_resource_list = NULL; if (graph_rc != transition_complete) { fprintf(stdout, "Transition failed: %s\n", transition_status(graph_rc)); print_graph(LOG_ERR, transition); } destroy_graph(transition); if (graph_rc != transition_complete) { fprintf(stdout, "An invalid transition was produced\n"); } if (quiet == FALSE) { xmlNode *cib_object = NULL; int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); cleanup_alloc_calculations(data_set); data_set->input = cib_object; } if (graph_rc != transition_complete) { return graph_rc; } return 0; } diff --git a/tools/regression.sh b/tools/regression.sh index 47bff438c7..8a3227901b 100755 --- a/tools/regression.sh +++ b/tools/regression.sh @@ -1,797 +1,810 @@ #!/bin/bash : ${shadow=tools-regression} test_home=`dirname $0` num_errors=0 num_passed=0 GREP_OPTIONS= verbose=0 tests="dates tools acls validity" function test_assert() { target=$1; shift cib=$1; shift app=`echo "$cmd" | sed 's/\ .*//'` printf "* Running: $app - $desc\n" 1>&2 printf "=#=#=#= Begin test: $desc =#=#=#=\n" eval $VALGRIND_CMD $cmd 2>&1 rc=$? if [ x$cib != x0 ]; then printf "=#=#=#= Current cib after: $desc =#=#=#=\n" CIB_user=root cibadmin -Q fi printf "=#=#=#= End test: $desc - `crm_error $rc` ($rc) =#=#=#=\n" if [ $rc -ne $target ]; then num_errors=`expr $num_errors + 1` printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc" printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc (`which $app`)" 1>&2 return exit 1 else printf "* Passed: %-14s - %s\n" $app "$desc" num_passed=`expr $num_passed + 1` fi } function usage() { echo "Usage: ./regression.sh [-s(ave)] [-x] [-v(erbose)]" exit $1 } done=0 do_save=0 VALGRIND_CMD= while test "$done" = "0"; do case "$1" in -t) tests=$2; shift; shift;; -V|--verbose) verbose=1; shift;; -v|--valgrind) export G_SLICE=always-malloc VALGRIND_CMD="valgrind -q --gen-suppressions=all --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions" shift;; -x) set -x; shift;; -s) do_save=1; shift;; -p) PATH="$2:$PATH"; export PATH; shift 1;; -?) usage 0;; -*) echo "unknown option: $1"; usage 1;; *) done=1;; esac done if [ "x$VALGRIND_CMD" = "x" -a -x $test_home/crm_simulate ]; then xml_home=`dirname ${test_home}` echo "Using local binaries from: $test_home, schemas from $xml_home" export PATH="$test_home:$PATH" export PCMK_schema_directory=${xml_home}/xml fi function test_tools() { export CIB_shadow_dir=$test_home $VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow 2>&1 export CIB_shadow=$shadow desc="Validate CIB" cmd="cibadmin -Q" test_assert 0 desc="Configure something before erasing" cmd="crm_attribute -n cluster-delay -v 60s" test_assert 0 desc="Require --force for CIB erasure" cmd="cibadmin -E" test_assert 22 desc="Allow CIB erasure with --force" cmd="cibadmin -E --force" test_assert 0 desc="Query CIB" cmd="cibadmin -Q > /tmp/$$.existing.xml" test_assert 0 desc="Set cluster option" cmd="crm_attribute -n cluster-delay -v 60s" test_assert 0 desc="Query new cluster option" cmd="cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay" test_assert 0 desc="Query cluster options" cmd="cibadmin -Q -o crm_config > /tmp/$$.opt.xml" test_assert 0 desc="Set no-quorum policy" cmd="crm_attribute -n no-quorum-policy -v ignore" test_assert 0 desc="Delete nvpair" cmd="cibadmin -D -o crm_config --xml-text ''" test_assert 0 desc="Create operaton should fail" cmd="cibadmin -C -o crm_config --xml-file /tmp/$$.opt.xml" test_assert 76 desc="Modify cluster options section" cmd="cibadmin -M -o crm_config --xml-file /tmp/$$.opt.xml" test_assert 0 desc="Query updated cluster option" cmd="cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay" test_assert 0 desc="Set duplicate cluster option" cmd="crm_attribute -n cluster-delay -v 40s -s duplicate" test_assert 0 desc="Setting multiply defined cluster option should fail" cmd="crm_attribute -n cluster-delay -v 30s" test_assert 76 desc="Set cluster option with -s" cmd="crm_attribute -n cluster-delay -v 30s -s duplicate" test_assert 0 desc="Delete cluster option with -i" cmd="crm_attribute -n cluster-delay -D -i cib-bootstrap-options-cluster-delay" test_assert 0 desc="Create node1 and bring it online" cmd="crm_simulate --live-check --in-place --node-up=node1" test_assert 0 desc="Create node attribute" cmd="crm_attribute -n ram -v 1024M -U node1 -t nodes" test_assert 0 desc="Query new node attribute" cmd="cibadmin -Q -o nodes | grep node1-ram" test_assert 0 + desc="Set a transient (fail-count) node attribute" + cmd="crm_attribute -n fail-count-foo -v 3 -N node1 -t status" + test_assert 0 + + desc="Query a fail count" + cmd="crm_failcount --query -r foo -N node1" + test_assert 0 + + desc="Delete a transient (fail-count) node attribute" + cmd="crm_attribute -n fail-count-foo -D -N node1 -t status" + test_assert 0 + desc="Digest calculation" cmd="cibadmin -Q | cibadmin -5 -p 2>&1 > /dev/null" test_assert 0 # This update will fail because it has version numbers desc="Replace operation should fail" cmd="cibadmin -R --xml-file /tmp/$$.existing.xml" test_assert 205 desc="Default standby value" cmd="crm_standby -N node1 -G" test_assert 0 desc="Set standby status" cmd="crm_standby -N node1 -v true" test_assert 0 desc="Query standby value" cmd="crm_standby -N node1 -G" test_assert 0 desc="Delete standby value" cmd="crm_standby -N node1 -D" test_assert 0 desc="Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert 0 desc="Create a resource meta attribute" cmd="crm_resource -r dummy --meta -p is-managed -v false" test_assert 0 desc="Query a resource meta attribute" cmd="crm_resource -r dummy --meta -g is-managed" test_assert 0 desc="Remove a resource meta attribute" cmd="crm_resource -r dummy --meta -d is-managed" test_assert 0 desc="Create a resource attribute" cmd="crm_resource -r dummy -p delay -v 10s" test_assert 0 desc="List the configured resources" cmd="crm_resource -L" test_assert 0 - desc="Set a resource's fail-count" - cmd="crm_failcount -r dummy -v 10 -N node1" - test_assert 0 - desc="Require a destination when migrating a resource that is stopped" cmd="crm_resource -r dummy -M" test_assert 22 desc="Don't support migration to non-existent locations" cmd="crm_resource -r dummy -M -N i.dont.exist" test_assert 6 desc="Create a fencing resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert 0 desc="Bring resources online" cmd="crm_simulate --live-check --in-place -S" test_assert 0 desc="Try to move a resource to its existing location" cmd="crm_resource -r dummy --move --host node1" test_assert 22 desc="Move a resource from its existing location" cmd="crm_resource -r dummy --move" test_assert 0 desc="Clear out constraints generated by --move" cmd="crm_resource -r dummy --clear" test_assert 0 desc="Default ticket granted state" cmd="crm_ticket -t ticketA -G granted -d false" test_assert 0 desc="Set ticket granted state" cmd="crm_ticket -t ticketA -r --force" test_assert 0 desc="Query ticket granted state" cmd="crm_ticket -t ticketA -G granted" test_assert 0 desc="Delete ticket granted state" cmd="crm_ticket -t ticketA -D granted --force" test_assert 0 desc="Make a ticket standby" cmd="crm_ticket -t ticketA -s" test_assert 0 desc="Query ticket standby state" cmd="crm_ticket -t ticketA -G standby" test_assert 0 desc="Activate a ticket" cmd="crm_ticket -t ticketA -a" test_assert 0 desc="Delete ticket standby state" cmd="crm_ticket -t ticketA -D standby" test_assert 0 desc="Ban a resource on unknown node" cmd="crm_resource -r dummy -B -N host1" test_assert 6 desc="Create two more nodes and bring them online" cmd="crm_simulate --live-check --in-place --node-up=node2 --node-up=node3" test_assert 0 desc="Ban dummy from node1" cmd="crm_resource -r dummy -B -N node1" test_assert 0 desc="Ban dummy from node2" cmd="crm_resource -r dummy -B -N node2" test_assert 0 desc="Relocate resources due to ban" cmd="crm_simulate --live-check --in-place -S" test_assert 0 desc="Move dummy to node1" cmd="crm_resource -r dummy -M -N node1" test_assert 0 desc="Clear implicit constraints for dummy on node2" cmd="crm_resource -r dummy -U -N node2" test_assert 0 desc="Drop the status section" cmd="cibadmin -R -o status --xml-text ''" test_assert 0 0 desc="Create a clone" cmd="cibadmin -C -o resources --xml-text ''" test_assert 0 0 desc="Create a resource meta attribute" cmd="crm_resource -r test-primitive --meta -p is-managed -v false" test_assert 0 desc="Create a resource meta attribute in the primitive" cmd="crm_resource -r test-primitive --meta -p is-managed -v false --force" test_assert 0 desc="Update resource meta attribute with duplicates" cmd="crm_resource -r test-clone --meta -p is-managed -v true" test_assert 0 desc="Update resource meta attribute with duplicates (force clone)" cmd="crm_resource -r test-clone --meta -p is-managed -v true --force" test_assert 0 desc="Update child resource meta attribute with duplicates" cmd="crm_resource -r test-primitive --meta -p is-managed -v false" test_assert 0 desc="Delete resource meta attribute with duplicates" cmd="crm_resource -r test-clone --meta -d is-managed" test_assert 0 desc="Delete resource meta attribute in parent" cmd="crm_resource -r test-primitive --meta -d is-managed" test_assert 0 desc="Create a resource meta attribute in the primitive" cmd="crm_resource -r test-primitive --meta -p is-managed -v false --force" test_assert 0 desc="Update existing resource meta attribute" cmd="crm_resource -r test-clone --meta -p is-managed -v true" test_assert 0 desc="Create a resource meta attribute in the parent" cmd="crm_resource -r test-clone --meta -p is-managed -v true --force" test_assert 0 desc="Copy resources" cmd="cibadmin -Q -o resources > /tmp/$$.resources.xml" test_assert 0 0 desc="Delete resource paremt meta attribute (force)" cmd="crm_resource -r test-clone --meta -d is-managed --force" test_assert 0 desc="Restore duplicates" cmd="cibadmin -R -o resources --xml-file /tmp/$$.resources.xml" test_assert 0 desc="Delete resource child meta attribute" cmd="crm_resource -r test-primitive --meta -d is-managed" test_assert 0 rm -f /tmp/$$.existing.xml /tmp/$$.resources.xml } function test_dates() { desc="2014-01-01 00:30:00 - 1 Hour" cmd="iso8601 -d '2014-01-01 00:30:00Z' -D P-1H -E '2013-12-31 23:30:00Z'" test_assert 0 0 for y in 06 07 08 09 10 11 12 13 14 15 16 17 18; do desc="20$y-W01-7" cmd="iso8601 -d '20$y-W01-7 00Z'" test_assert 0 0 desc="20$y-W01-7 - round-trip" cmd="iso8601 -d '20$y-W01-7 00Z' -W -E '20$y-W01-7 00:00:00Z'" test_assert 0 0 desc="20$y-W01-1" cmd="iso8601 -d '20$y-W01-1 00Z'" test_assert 0 0 desc="20$y-W01-1 - round-trip" cmd="iso8601 -d '20$y-W01-1 00Z' -W -E '20$y-W01-1 00:00:00Z'" test_assert 0 0 done desc="2009-W53-07" cmd="iso8601 -d '2009-W53-7 00:00:00Z' -W -E '2009-W53-7 00:00:00Z'" test_assert 0 0 desc="2009-01-31 + 1 Month" cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P1M -E '2009-02-28 00:00:00Z'" test_assert 0 0 desc="2009-01-31 + 2 Months" cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P2M -E '2009-03-31 00:00:00Z'" test_assert 0 0 desc="2009-01-31 + 3 Months" cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P3M -E '2009-04-30 00:00:00Z'" test_assert 0 0 desc="2009-03-31 - 1 Month" cmd="iso8601 -d '2009-03-31 00:00:00Z' -D P-1M -E '2009-02-28 00:00:00Z'" test_assert 0 0 } function get_epoch() { CIB_user=root CIB_file=$1 CIB_shadow="" cibadmin -Q | head -n 1 | sed -e 's/.* epoch=\"\([0-9]*\).*/\1/' } function restore_epoch() { infile=$1; shift old=$1; shift new=$(get_epoch $infile) sed -i 's/epoch=.$old/epoch=\"$new/g' $infile } function test_acl_loop() { # Make sure we're rejecting things for the right reasons export PCMK_trace_functions=__xml_acl_check,__xml_acl_post_process export PCMK_stderr=1 CIB_user=root cibadmin --replace --xml-text '' export CIB_user=unknownguy desc="$CIB_user: Query configuration" cmd="cibadmin -Q" test_assert 13 0 desc="$CIB_user: Set enable-acl" cmd="crm_attribute -n enable-acl -v false" test_assert 13 0 desc="$CIB_user: Set stonith-enabled" cmd="crm_attribute -n stonith-enabled -v false" test_assert 13 0 desc="$CIB_user: Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert 13 0 export CIB_user=l33t-haxor desc="$CIB_user: Query configuration" cmd="cibadmin -Q" test_assert 13 0 desc="$CIB_user: Set enable-acl" cmd="crm_attribute -n enable-acl -v false" test_assert 13 0 desc="$CIB_user: Set stonith-enabled" cmd="crm_attribute -n stonith-enabled -v false" test_assert 13 0 desc="$CIB_user: Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert 13 0 export CIB_user=niceguy desc="$CIB_user: Query configuration" cmd="cibadmin -Q" test_assert 0 0 desc="$CIB_user: Set enable-acl" cmd="crm_attribute -n enable-acl -v false" test_assert 13 0 desc="$CIB_user: Set stonith-enabled" cmd="crm_attribute -n stonith-enabled -v false" test_assert 0 desc="$CIB_user: Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert 13 0 export CIB_user=root desc="$CIB_user: Query configuration" cmd="cibadmin -Q" test_assert 0 0 desc="$CIB_user: Set stonith-enabled" cmd="crm_attribute -n stonith-enabled -v true" test_assert 0 desc="$CIB_user: Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert 0 export CIB_user=l33t-haxor desc="$CIB_user: Create a resource meta attribute" cmd="crm_resource -r dummy --meta -p target-role -v Stopped" test_assert 13 0 desc="$CIB_user: Query a resource meta attribute" cmd="crm_resource -r dummy --meta -g target-role" test_assert 13 0 desc="$CIB_user: Remove a resource meta attribute" cmd="crm_resource -r dummy --meta -d target-role" test_assert 13 0 export CIB_user=niceguy desc="$CIB_user: Create a resource meta attribute" cmd="crm_resource -r dummy --meta -p target-role -v Stopped" test_assert 0 desc="$CIB_user: Query a resource meta attribute" cmd="crm_resource -r dummy --meta -g target-role" test_assert 0 desc="$CIB_user: Remove a resource meta attribute" cmd="crm_resource -r dummy --meta -d target-role" test_assert 0 desc="$CIB_user: Create a resource meta attribute" cmd="crm_resource -r dummy --meta -p target-role -v Started" test_assert 0 export CIB_user=badidea desc="$CIB_user: Query configuration - implied deny" cmd="cibadmin -Q" test_assert 0 0 export CIB_user=betteridea desc="$CIB_user: Query configuration - explicit deny" cmd="cibadmin -Q" test_assert 0 0 CIB_user=root cibadmin -Q > /tmp/$$.haxor.xml CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin --delete --xml-text '' CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -Ql export CIB_user=niceguy desc="$CIB_user: Replace - remove acls" cmd="cibadmin --replace --xml-file /tmp/$$.haxor.xml" test_assert 13 0 CIB_user=root cibadmin -Q > /tmp/$$.haxor.xml CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -C -o resources --xml-text '' CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create resource" cmd="cibadmin --replace --xml-file /tmp/$$.haxor.xml" test_assert 13 0 CIB_user=root cibadmin -Q > /tmp/$$.haxor.xml CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" crm_attribute -n enable-acl -v false CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - modify attribute (deny)" cmd="cibadmin --replace --xml-file /tmp/$$.haxor.xml" test_assert 13 0 CIB_user=root cibadmin -Q > /tmp/$$.haxor.xml CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin --replace --xml-text '' CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - delete attribute (deny)" cmd="cibadmin --replace --xml-file /tmp/$$.haxor.xml" test_assert 13 0 CIB_user=root cibadmin -Q > /tmp/$$.haxor.xml CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create attribute (deny)" cmd="cibadmin --replace --xml-file /tmp/$$.haxor.xml" test_assert 13 0 rm -rf /tmp/$$.haxor.xml CIB_user=bob CIB_user=root cibadmin -Q > /tmp/$$.haxor.xml CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create attribute (allow)" cmd="cibadmin --replace -o resources --xml-file /tmp/$$.haxor.xml" test_assert 0 0 CIB_user=root cibadmin -Q > /tmp/$$.haxor.xml CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - modify attribute (allow)" cmd="cibadmin --replace -o resources --xml-file /tmp/$$.haxor.xml" test_assert 0 0 CIB_user=root cibadmin -Q > /tmp/$$.haxor.xml CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin --replace -o resources --xml-text '' CIB_user=root CIB_file=/tmp/$$.haxor.xml CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - delete attribute (allow)" cmd="cibadmin --replace -o resources --xml-file /tmp/$$.haxor.xml" test_assert 0 0 } function test_acls() { export CIB_shadow_dir=$test_home $VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow --validate-with pacemaker-1.3 2>&1 export CIB_shadow=$shadow - cat</tmp/$$.acls.xml + cat </tmp/$$.acls.xml EOF desc="Configure some ACLs" cmd="cibadmin -M -o acls --xml-file /tmp/$$.acls.xml" test_assert 0 desc="Enable ACLs" cmd="crm_attribute -n enable-acl -v true" test_assert 0 desc="Set cluster option" cmd="crm_attribute -n no-quorum-policy -v ignore" test_assert 0 desc="New ACL" cmd="cibadmin --create -o acls --xml-text ''" test_assert 0 desc="Another ACL" cmd="cibadmin --create -o acls --xml-text ''" test_assert 0 desc="Updated ACL" cmd="cibadmin --replace -o acls --xml-text ''" test_assert 0 test_acl_loop printf "\n\n !#!#!#!#! Upgrading to pacemaker-2.0 and retesting !#!#!#!#!\n" printf "\nUpgrading to pacemaker-2.0 and re-testing\n" 1>&2 export CIB_user=root desc="$CIB_user: Upgrade to pacemaker-2.0" cmd="cibadmin --upgrade --force -V" test_assert 0 sed -i 's/epoch=.2/epoch=\"6/g' $CIB_shadow_dir/shadow.$CIB_shadow sed -i 's/admin_epoch=.1/admin_epoch=\"0/g' $CIB_shadow_dir/shadow.$CIB_shadow test_acl_loop } function test_validity() { export CIB_shadow_dir=$test_home $VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow --validate-with pacemaker-1.2 2>&1 export CIB_shadow=$shadow export PCMK_trace_functions=update_validation,cli_config_update export PCMK_stderr=1 cibadmin -C -o resources --xml-text '' cibadmin -C -o resources --xml-text '' cibadmin -C -o constraints --xml-text '' cibadmin -Q > /tmp/$$.good-1.2.xml desc="Try to make resulting CIB invalid (enum violation)" cmd="cibadmin -M -o constraints --xml-text ''" test_assert 203 sed 's|"start"|"break"|' /tmp/$$.good-1.2.xml > /tmp/$$.bad-1.2.xml desc="Run crm_simulate with invalid CIB (enum violation)" cmd="crm_simulate -x /tmp/$$.bad-1.2.xml -S" test_assert 126 0 desc="Try to make resulting CIB invalid (unrecognized validate-with)" cmd="cibadmin -M --xml-text ''" test_assert 203 sed 's|"pacemaker-1.2"|"pacemaker-9999.0"|' /tmp/$$.good-1.2.xml > /tmp/$$.bad-1.2.xml desc="Run crm_simulate with invalid CIB (unrecognized validate-with)" cmd="crm_simulate -x /tmp/$$.bad-1.2.xml -S" test_assert 126 0 desc="Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1)" cmd="cibadmin -C -o configuration --xml-text ''" test_assert 203 sed 's||\0|' /tmp/$$.good-1.2.xml > /tmp/$$.bad-1.2.xml desc="Run crm_simulate with invalid, but possibly recoverable CIB (valid with X.Y+1)" cmd="crm_simulate -x /tmp/$$.bad-1.2.xml -S" test_assert 0 0 sed 's|\s\s*validate-with="[^"]*"||' /tmp/$$.good-1.2.xml > /tmp/$$.bad-1.2.xml desc="Make resulting CIB valid, although without validate-with attribute" cmd="cibadmin -R --xml-file /tmp/$$.bad-1.2.xml" test_assert 0 desc="Run crm_simulate with valid CIB, but without validate-with attribute" cmd="crm_simulate -x /tmp/$$.bad-1.2.xml -S" test_assert 0 0 # this will just disable validation and accept the config, outputting # validation errors sed -e 's|\s\s*validate-with="[^"]*"||' \ -e 's|\(\s\s*epoch="[^"]*\)"|\10"|' -e 's|"start"|"break"|' \ /tmp/$$.good-1.2.xml > /tmp/$$.bad-1.2.xml desc="Make resulting CIB invalid, and without validate-with attribute" cmd="cibadmin -R --xml-file /tmp/$$.bad-1.2.xml" test_assert 0 desc="Run crm_simulate with invalid CIB, also without validate-with attribute" cmd="crm_simulate -x /tmp/$$.bad-1.2.xml -S" test_assert 0 0 rm -f /tmp/$$.good-1.2.xml /tmp/$$.bad-1.2.xml } for t in $tests; do echo "Testing $t" test_$t > $test_home/regression.$t.out sed -i -e 's/cib-last-written.*>/>/'\ -e 's/ last-run=\"[0-9]*\"//'\ -e 's/crm_feature_set="[^"]*" //'\ -e 's/validate-with="[^"]*" //'\ -e 's/Created new pacemaker-.* configuration/Created new pacemaker configuration/'\ -e 's/.*__xml_acl_check/__xml_acl_check/g'\ -e 's/.*__xml_acl_post_process/__xml_acl_post_process/g'\ -e 's/.*error: unpack_resources:/error: unpack_resources:/g'\ -e 's/ last-rc-change=\"[0-9]*\"//'\ -e 's|^/tmp/[0-9][0-9]*\.||'\ -e 's/^Entity: line [0-9][0-9]*: //'\ + -e 's/schemas\.c:\([0-9][0-9]*\)/schemas.c:NNN/' \ -e 's/\(validation ([0-9][0-9]* of \)[0-9][0-9]*\().*\)/\1X\2/' $test_home/regression.$t.out if [ $do_save = 1 ]; then cp $test_home/regression.$t.out $test_home/regression.$t.exp fi done failed=0 echo -e "\n\nResults" for t in $tests; do if [ $do_save = 1 ]; then cp $test_home/regression.$t.out $test_home/regression.$t.exp fi if [ $verbose = 1 ]; then diff -wu $test_home/regression.$t.exp $test_home/regression.$t.out else diff -wu $test_home/regression.$t.exp $test_home/regression.$t.out fi if [ $? != 0 ]; then failed=1 fi done echo -e "\n\nSummary" for t in $tests; do grep -e "^*" $test_home/regression.$t.out done if [ $num_errors != 0 ]; then echo $num_errors tests failed exit 1 elif [ $failed = 1 ]; then echo $num_passed tests passed but diff failed exit 2 else echo $num_passed tests passed + for t in $tests; do + rm -f "$test_home/regression.$t.out" + done + crm_shadow --force --delete $shadow >/dev/null 2>&1 exit 0 fi diff --git a/tools/regression.tools.exp b/tools/regression.tools.exp index 42b71298a7..94b33ec890 100644 --- a/tools/regression.tools.exp +++ b/tools/regression.tools.exp @@ -1,2916 +1,2982 @@ Created new pacemaker configuration Setting up shadow instance A new shadow instance was created. To begin using it paste the following into your shell: CIB_shadow=tools-regression ; export CIB_shadow =#=#=#= Begin test: Validate CIB =#=#=#= =#=#=#= Current cib after: Validate CIB =#=#=#= =#=#=#= End test: Validate CIB - OK (0) =#=#=#= * Passed: cibadmin - Validate CIB =#=#=#= Begin test: Configure something before erasing =#=#=#= =#=#=#= Current cib after: Configure something before erasing =#=#=#= =#=#=#= End test: Configure something before erasing - OK (0) =#=#=#= * Passed: crm_attribute - Configure something before erasing =#=#=#= Begin test: Require --force for CIB erasure =#=#=#= The supplied command is considered dangerous. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= Current cib after: Require --force for CIB erasure =#=#=#= =#=#=#= End test: Require --force for CIB erasure - Invalid argument (22) =#=#=#= * Passed: cibadmin - Require --force for CIB erasure =#=#=#= Begin test: Allow CIB erasure with --force =#=#=#= =#=#=#= Current cib after: Allow CIB erasure with --force =#=#=#= =#=#=#= End test: Allow CIB erasure with --force - OK (0) =#=#=#= * Passed: cibadmin - Allow CIB erasure with --force =#=#=#= Begin test: Query CIB =#=#=#= =#=#=#= Current cib after: Query CIB =#=#=#= =#=#=#= End test: Query CIB - OK (0) =#=#=#= * Passed: cibadmin - Query CIB =#=#=#= Begin test: Set cluster option =#=#=#= =#=#=#= Current cib after: Set cluster option =#=#=#= =#=#=#= End test: Set cluster option - OK (0) =#=#=#= * Passed: crm_attribute - Set cluster option =#=#=#= Begin test: Query new cluster option =#=#=#= =#=#=#= Current cib after: Query new cluster option =#=#=#= =#=#=#= End test: Query new cluster option - OK (0) =#=#=#= * Passed: cibadmin - Query new cluster option =#=#=#= Begin test: Query cluster options =#=#=#= =#=#=#= Current cib after: Query cluster options =#=#=#= =#=#=#= End test: Query cluster options - OK (0) =#=#=#= * Passed: cibadmin - Query cluster options =#=#=#= Begin test: Set no-quorum policy =#=#=#= =#=#=#= Current cib after: Set no-quorum policy =#=#=#= =#=#=#= End test: Set no-quorum policy - OK (0) =#=#=#= * Passed: crm_attribute - Set no-quorum policy =#=#=#= Begin test: Delete nvpair =#=#=#= =#=#=#= Current cib after: Delete nvpair =#=#=#= =#=#=#= End test: Delete nvpair - OK (0) =#=#=#= * Passed: cibadmin - Delete nvpair =#=#=#= Begin test: Create operaton should fail =#=#=#= Call failed: Name not unique on network =#=#=#= Current cib after: Create operaton should fail =#=#=#= =#=#=#= End test: Create operaton should fail - Name not unique on network (76) =#=#=#= * Passed: cibadmin - Create operaton should fail =#=#=#= Begin test: Modify cluster options section =#=#=#= =#=#=#= Current cib after: Modify cluster options section =#=#=#= =#=#=#= End test: Modify cluster options section - OK (0) =#=#=#= * Passed: cibadmin - Modify cluster options section =#=#=#= Begin test: Query updated cluster option =#=#=#= =#=#=#= Current cib after: Query updated cluster option =#=#=#= =#=#=#= End test: Query updated cluster option - OK (0) =#=#=#= * Passed: cibadmin - Query updated cluster option =#=#=#= Begin test: Set duplicate cluster option =#=#=#= =#=#=#= Current cib after: Set duplicate cluster option =#=#=#= =#=#=#= End test: Set duplicate cluster option - OK (0) =#=#=#= * Passed: crm_attribute - Set duplicate cluster option =#=#=#= Begin test: Setting multiply defined cluster option should fail =#=#=#= Error performing operation: Name not unique on network Multiple attributes match name=cluster-delay Value: 60s (id=cib-bootstrap-options-cluster-delay) Value: 40s (id=duplicate-cluster-delay) =#=#=#= Current cib after: Setting multiply defined cluster option should fail =#=#=#= =#=#=#= End test: Setting multiply defined cluster option should fail - Name not unique on network (76) =#=#=#= * Passed: crm_attribute - Setting multiply defined cluster option should fail =#=#=#= Begin test: Set cluster option with -s =#=#=#= =#=#=#= Current cib after: Set cluster option with -s =#=#=#= =#=#=#= End test: Set cluster option with -s - OK (0) =#=#=#= * Passed: crm_attribute - Set cluster option with -s =#=#=#= Begin test: Delete cluster option with -i =#=#=#= Deleted crm_config option: id=(null) name=cluster-delay =#=#=#= Current cib after: Delete cluster option with -i =#=#=#= =#=#=#= End test: Delete cluster option with -i - OK (0) =#=#=#= * Passed: crm_attribute - Delete cluster option with -i =#=#=#= Begin test: Create node1 and bring it online =#=#=#= Current cluster status: Performing requested modifications + Bringing node node1 online Transition Summary: Executing cluster transition: Revised cluster status: Online: [ node1 ] =#=#=#= Current cib after: Create node1 and bring it online =#=#=#= =#=#=#= End test: Create node1 and bring it online - OK (0) =#=#=#= * Passed: crm_simulate - Create node1 and bring it online =#=#=#= Begin test: Create node attribute =#=#=#= =#=#=#= Current cib after: Create node attribute =#=#=#= =#=#=#= End test: Create node attribute - OK (0) =#=#=#= * Passed: crm_attribute - Create node attribute =#=#=#= Begin test: Query new node attribute =#=#=#= =#=#=#= Current cib after: Query new node attribute =#=#=#= =#=#=#= End test: Query new node attribute - OK (0) =#=#=#= * Passed: cibadmin - Query new node attribute +=#=#=#= Begin test: Set a transient (fail-count) node attribute =#=#=#= +=#=#=#= Current cib after: Set a transient (fail-count) node attribute =#=#=#= + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +=#=#=#= End test: Set a transient (fail-count) node attribute - OK (0) =#=#=#= +* Passed: crm_attribute - Set a transient (fail-count) node attribute +=#=#=#= Begin test: Query a fail count =#=#=#= +scope=status name=fail-count-foo value=3 +=#=#=#= Current cib after: Query a fail count =#=#=#= + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +=#=#=#= End test: Query a fail count - OK (0) =#=#=#= +* Passed: crm_failcount - Query a fail count +=#=#=#= Begin test: Delete a transient (fail-count) node attribute =#=#=#= +Deleted status attribute: id=status-node1-fail-count-foo name=fail-count-foo + +=#=#=#= Current cib after: Delete a transient (fail-count) node attribute =#=#=#= + + + + + + + + + + + + + + + + + + + + + + + + + + + + +=#=#=#= End test: Delete a transient (fail-count) node attribute - OK (0) =#=#=#= +* Passed: crm_attribute - Delete a transient (fail-count) node attribute =#=#=#= Begin test: Digest calculation =#=#=#= Digest: =#=#=#= Current cib after: Digest calculation =#=#=#= - + - + + + + + =#=#=#= End test: Digest calculation - OK (0) =#=#=#= * Passed: cibadmin - Digest calculation =#=#=#= Begin test: Replace operation should fail =#=#=#= Call failed: Update was older than existing configuration =#=#=#= Current cib after: Replace operation should fail =#=#=#= - + - + + + + + =#=#=#= End test: Replace operation should fail - Update was older than existing configuration (205) =#=#=#= * Passed: cibadmin - Replace operation should fail =#=#=#= Begin test: Default standby value =#=#=#= Error performing operation: No such device or address scope=status name=standby value=off =#=#=#= Current cib after: Default standby value =#=#=#= - + - + + + + + =#=#=#= End test: Default standby value - OK (0) =#=#=#= * Passed: crm_standby - Default standby value =#=#=#= Begin test: Set standby status =#=#=#= =#=#=#= Current cib after: Set standby status =#=#=#= - + + + + + =#=#=#= End test: Set standby status - OK (0) =#=#=#= * Passed: crm_standby - Set standby status =#=#=#= Begin test: Query standby value =#=#=#= scope=nodes name=standby value=true =#=#=#= Current cib after: Query standby value =#=#=#= - + + + + + =#=#=#= End test: Query standby value - OK (0) =#=#=#= * Passed: crm_standby - Query standby value =#=#=#= Begin test: Delete standby value =#=#=#= Deleted nodes attribute: id=nodes-node1-standby name=standby =#=#=#= Current cib after: Delete standby value =#=#=#= - + + + + + =#=#=#= End test: Delete standby value - OK (0) =#=#=#= * Passed: crm_standby - Delete standby value =#=#=#= Begin test: Create a resource =#=#=#= =#=#=#= Current cib after: Create a resource =#=#=#= - + + + + + =#=#=#= End test: Create a resource - OK (0) =#=#=#= * Passed: cibadmin - Create a resource =#=#=#= Begin test: Create a resource meta attribute =#=#=#= Set 'dummy' option: id=dummy-meta_attributes-is-managed set=dummy-meta_attributes name=is-managed=false =#=#=#= Current cib after: Create a resource meta attribute =#=#=#= - + + + + + =#=#=#= End test: Create a resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute =#=#=#= Begin test: Query a resource meta attribute =#=#=#= false =#=#=#= Current cib after: Query a resource meta attribute =#=#=#= - + + + + + =#=#=#= End test: Query a resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Query a resource meta attribute =#=#=#= Begin test: Remove a resource meta attribute =#=#=#= Deleted 'dummy' option: id=dummy-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Remove a resource meta attribute =#=#=#= - + + + + + =#=#=#= End test: Remove a resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Remove a resource meta attribute =#=#=#= Begin test: Create a resource attribute =#=#=#= Set 'dummy' option: id=dummy-instance_attributes-delay set=dummy-instance_attributes name=delay=10s =#=#=#= Current cib after: Create a resource attribute =#=#=#= - + + + + + =#=#=#= End test: Create a resource attribute - OK (0) =#=#=#= * Passed: crm_resource - Create a resource attribute =#=#=#= Begin test: List the configured resources =#=#=#= dummy (ocf::pacemaker:Dummy): Stopped =#=#=#= Current cib after: List the configured resources =#=#=#= - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -=#=#=#= End test: List the configured resources - OK (0) =#=#=#= -* Passed: crm_resource - List the configured resources -=#=#=#= Begin test: Set a resource's fail-count =#=#=#= -=#=#=#= Current cib after: Set a resource's fail-count =#=#=#= - - - - + -=#=#=#= End test: Set a resource's fail-count - OK (0) =#=#=#= -* Passed: crm_failcount - Set a resource's fail-count +=#=#=#= End test: List the configured resources - OK (0) =#=#=#= +* Passed: crm_resource - List the configured resources =#=#=#= Begin test: Require a destination when migrating a resource that is stopped =#=#=#= Resource 'dummy' not moved: active in 0 locations. You can prevent 'dummy' from running on a specific location with: --ban --host Error performing operation: Invalid argument =#=#=#= Current cib after: Require a destination when migrating a resource that is stopped =#=#=#= - + - - - + =#=#=#= End test: Require a destination when migrating a resource that is stopped - Invalid argument (22) =#=#=#= * Passed: crm_resource - Require a destination when migrating a resource that is stopped =#=#=#= Begin test: Don't support migration to non-existent locations =#=#=#= Error performing operation: node 'i.dont.exist' is unknown Error performing operation: No such device or address =#=#=#= Current cib after: Don't support migration to non-existent locations =#=#=#= - + - - - + =#=#=#= End test: Don't support migration to non-existent locations - No such device or address (6) =#=#=#= * Passed: crm_resource - Don't support migration to non-existent locations =#=#=#= Begin test: Create a fencing resource =#=#=#= =#=#=#= Current cib after: Create a fencing resource =#=#=#= - - - + =#=#=#= End test: Create a fencing resource - OK (0) =#=#=#= * Passed: cibadmin - Create a fencing resource =#=#=#= Begin test: Bring resources online =#=#=#= Current cluster status: Online: [ node1 ] dummy (ocf::pacemaker:Dummy): Stopped Fence (stonith:fence_true): Stopped Transition Summary: * Start dummy (node1) * Start Fence (node1) Executing cluster transition: * Resource action: dummy monitor on node1 * Resource action: Fence monitor on node1 * Resource action: dummy start on node1 * Resource action: Fence start on node1 Revised cluster status: Online: [ node1 ] dummy (ocf::pacemaker:Dummy): Started node1 Fence (stonith:fence_true): Started node1 =#=#=#= Current cib after: Bring resources online =#=#=#= - - - + =#=#=#= End test: Bring resources online - OK (0) =#=#=#= * Passed: crm_simulate - Bring resources online =#=#=#= Begin test: Try to move a resource to its existing location =#=#=#= Error performing operation: dummy is already active on node1 Error performing operation: Invalid argument =#=#=#= Current cib after: Try to move a resource to its existing location =#=#=#= - - - + =#=#=#= End test: Try to move a resource to its existing location - Invalid argument (22) =#=#=#= * Passed: crm_resource - Try to move a resource to its existing location =#=#=#= Begin test: Move a resource from its existing location =#=#=#= WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. This will prevent dummy from running on node1 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin This will be the case even if node1 is the last node in the cluster This message can be disabled with --quiet =#=#=#= Current cib after: Move a resource from its existing location =#=#=#= - - - + =#=#=#= End test: Move a resource from its existing location - OK (0) =#=#=#= * Passed: crm_resource - Move a resource from its existing location =#=#=#= Begin test: Clear out constraints generated by --move =#=#=#= =#=#=#= Current cib after: Clear out constraints generated by --move =#=#=#= - - - + =#=#=#= End test: Clear out constraints generated by --move - OK (0) =#=#=#= * Passed: crm_resource - Clear out constraints generated by --move =#=#=#= Begin test: Default ticket granted state =#=#=#= false =#=#=#= Current cib after: Default ticket granted state =#=#=#= - - - + =#=#=#= End test: Default ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Default ticket granted state =#=#=#= Begin test: Set ticket granted state =#=#=#= =#=#=#= Current cib after: Set ticket granted state =#=#=#= - - - + =#=#=#= End test: Set ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Set ticket granted state =#=#=#= Begin test: Query ticket granted state =#=#=#= false =#=#=#= Current cib after: Query ticket granted state =#=#=#= - - - + =#=#=#= End test: Query ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket granted state =#=#=#= Begin test: Delete ticket granted state =#=#=#= =#=#=#= Current cib after: Delete ticket granted state =#=#=#= - - - + =#=#=#= End test: Delete ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Delete ticket granted state =#=#=#= Begin test: Make a ticket standby =#=#=#= =#=#=#= Current cib after: Make a ticket standby =#=#=#= - - - + =#=#=#= End test: Make a ticket standby - OK (0) =#=#=#= * Passed: crm_ticket - Make a ticket standby =#=#=#= Begin test: Query ticket standby state =#=#=#= true =#=#=#= Current cib after: Query ticket standby state =#=#=#= - - - + =#=#=#= End test: Query ticket standby state - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket standby state =#=#=#= Begin test: Activate a ticket =#=#=#= =#=#=#= Current cib after: Activate a ticket =#=#=#= - - - + =#=#=#= End test: Activate a ticket - OK (0) =#=#=#= * Passed: crm_ticket - Activate a ticket =#=#=#= Begin test: Delete ticket standby state =#=#=#= =#=#=#= Current cib after: Delete ticket standby state =#=#=#= - - - + =#=#=#= End test: Delete ticket standby state - OK (0) =#=#=#= * Passed: crm_ticket - Delete ticket standby state =#=#=#= Begin test: Ban a resource on unknown node =#=#=#= Error performing operation: node 'host1' is unknown Error performing operation: No such device or address =#=#=#= Current cib after: Ban a resource on unknown node =#=#=#= - - - + =#=#=#= End test: Ban a resource on unknown node - No such device or address (6) =#=#=#= * Passed: crm_resource - Ban a resource on unknown node =#=#=#= Begin test: Create two more nodes and bring them online =#=#=#= Current cluster status: Online: [ node1 ] dummy (ocf::pacemaker:Dummy): Started node1 Fence (stonith:fence_true): Started node1 Performing requested modifications + Bringing node node2 online + Bringing node node3 online Transition Summary: * Move Fence (Started node1 -> node2) Executing cluster transition: * Resource action: dummy monitor on node3 * Resource action: dummy monitor on node2 * Resource action: Fence monitor on node3 * Resource action: Fence monitor on node2 * Resource action: Fence stop on node1 * Pseudo action: all_stopped * Resource action: Fence start on node2 Revised cluster status: Online: [ node1 node2 node3 ] dummy (ocf::pacemaker:Dummy): Started node1 Fence (stonith:fence_true): Started node2 =#=#=#= Current cib after: Create two more nodes and bring them online =#=#=#= - - - + =#=#=#= End test: Create two more nodes and bring them online - OK (0) =#=#=#= * Passed: crm_simulate - Create two more nodes and bring them online =#=#=#= Begin test: Ban dummy from node1 =#=#=#= WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. This will prevent dummy from running on node1 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin This will be the case even if node1 is the last node in the cluster This message can be disabled with --quiet =#=#=#= Current cib after: Ban dummy from node1 =#=#=#= - - - + =#=#=#= End test: Ban dummy from node1 - OK (0) =#=#=#= * Passed: crm_resource - Ban dummy from node1 =#=#=#= Begin test: Ban dummy from node2 =#=#=#= WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node2' with a score of -INFINITY for resource dummy on node2. This will prevent dummy from running on node2 until the constraint is removed using the 'crm_resource --clear' command or manually with cibadmin This will be the case even if node2 is the last node in the cluster This message can be disabled with --quiet =#=#=#= Current cib after: Ban dummy from node2 =#=#=#= - - - + =#=#=#= End test: Ban dummy from node2 - OK (0) =#=#=#= * Passed: crm_resource - Ban dummy from node2 =#=#=#= Begin test: Relocate resources due to ban =#=#=#= Current cluster status: Online: [ node1 node2 node3 ] dummy (ocf::pacemaker:Dummy): Started node1 Fence (stonith:fence_true): Started node2 Transition Summary: * Move dummy (Started node1 -> node3) Executing cluster transition: * Resource action: dummy stop on node1 * Pseudo action: all_stopped * Resource action: dummy start on node3 Revised cluster status: Online: [ node1 node2 node3 ] dummy (ocf::pacemaker:Dummy): Started node3 Fence (stonith:fence_true): Started node2 =#=#=#= Current cib after: Relocate resources due to ban =#=#=#= - - - + =#=#=#= End test: Relocate resources due to ban - OK (0) =#=#=#= * Passed: crm_simulate - Relocate resources due to ban =#=#=#= Begin test: Move dummy to node1 =#=#=#= =#=#=#= Current cib after: Move dummy to node1 =#=#=#= - - - + =#=#=#= End test: Move dummy to node1 - OK (0) =#=#=#= * Passed: crm_resource - Move dummy to node1 =#=#=#= Begin test: Clear implicit constraints for dummy on node2 =#=#=#= =#=#=#= Current cib after: Clear implicit constraints for dummy on node2 =#=#=#= - - - + =#=#=#= End test: Clear implicit constraints for dummy on node2 - OK (0) =#=#=#= * Passed: crm_resource - Clear implicit constraints for dummy on node2 =#=#=#= Begin test: Drop the status section =#=#=#= =#=#=#= End test: Drop the status section - OK (0) =#=#=#= * Passed: cibadmin - Drop the status section =#=#=#= Begin test: Create a clone =#=#=#= =#=#=#= End test: Create a clone - OK (0) =#=#=#= * Passed: cibadmin - Create a clone =#=#=#= Begin test: Create a resource meta attribute =#=#=#= Performing update of 'is-managed' on 'test-clone', the parent of 'test-primitive' Set 'test-clone' option: id=test-clone-meta_attributes-is-managed set=test-clone-meta_attributes name=is-managed=false =#=#=#= Current cib after: Create a resource meta attribute =#=#=#= =#=#=#= End test: Create a resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute =#=#=#= Begin test: Create a resource meta attribute in the primitive =#=#=#= Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed set=test-primitive-meta_attributes name=is-managed=false =#=#=#= Current cib after: Create a resource meta attribute in the primitive =#=#=#= =#=#=#= End test: Create a resource meta attribute in the primitive - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute in the primitive =#=#=#= Begin test: Update resource meta attribute with duplicates =#=#=#= Multiple attributes match name=is-managed Value: false (id=test-primitive-meta_attributes-is-managed) Value: false (id=test-clone-meta_attributes-is-managed) A value for 'is-managed' already exists in child 'test-primitive', performing update on that instead of 'test-clone' Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed=true =#=#=#= Current cib after: Update resource meta attribute with duplicates =#=#=#= =#=#=#= End test: Update resource meta attribute with duplicates - OK (0) =#=#=#= * Passed: crm_resource - Update resource meta attribute with duplicates =#=#=#= Begin test: Update resource meta attribute with duplicates (force clone) =#=#=#= Set 'test-clone' option: id=test-clone-meta_attributes-is-managed name=is-managed=true =#=#=#= Current cib after: Update resource meta attribute with duplicates (force clone) =#=#=#= =#=#=#= End test: Update resource meta attribute with duplicates (force clone) - OK (0) =#=#=#= * Passed: crm_resource - Update resource meta attribute with duplicates (force clone) =#=#=#= Begin test: Update child resource meta attribute with duplicates =#=#=#= Multiple attributes match name=is-managed Value: true (id=test-primitive-meta_attributes-is-managed) Value: true (id=test-clone-meta_attributes-is-managed) Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed=false =#=#=#= Current cib after: Update child resource meta attribute with duplicates =#=#=#= =#=#=#= End test: Update child resource meta attribute with duplicates - OK (0) =#=#=#= * Passed: crm_resource - Update child resource meta attribute with duplicates =#=#=#= Begin test: Delete resource meta attribute with duplicates =#=#=#= Multiple attributes match name=is-managed Value: false (id=test-primitive-meta_attributes-is-managed) Value: true (id=test-clone-meta_attributes-is-managed) A value for 'is-managed' already exists in child 'test-primitive', performing delete on that instead of 'test-clone' Deleted 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Delete resource meta attribute with duplicates =#=#=#= =#=#=#= End test: Delete resource meta attribute with duplicates - OK (0) =#=#=#= * Passed: crm_resource - Delete resource meta attribute with duplicates =#=#=#= Begin test: Delete resource meta attribute in parent =#=#=#= Performing delete of 'is-managed' on 'test-clone', the parent of 'test-primitive' Deleted 'test-clone' option: id=test-clone-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Delete resource meta attribute in parent =#=#=#= =#=#=#= End test: Delete resource meta attribute in parent - OK (0) =#=#=#= * Passed: crm_resource - Delete resource meta attribute in parent =#=#=#= Begin test: Create a resource meta attribute in the primitive =#=#=#= Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed set=test-primitive-meta_attributes name=is-managed=false =#=#=#= Current cib after: Create a resource meta attribute in the primitive =#=#=#= =#=#=#= End test: Create a resource meta attribute in the primitive - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute in the primitive =#=#=#= Begin test: Update existing resource meta attribute =#=#=#= A value for 'is-managed' already exists in child 'test-primitive', performing update on that instead of 'test-clone' Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed=true =#=#=#= Current cib after: Update existing resource meta attribute =#=#=#= =#=#=#= End test: Update existing resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Update existing resource meta attribute =#=#=#= Begin test: Create a resource meta attribute in the parent =#=#=#= Set 'test-clone' option: id=test-clone-meta_attributes-is-managed set=test-clone-meta_attributes name=is-managed=true =#=#=#= Current cib after: Create a resource meta attribute in the parent =#=#=#= =#=#=#= End test: Create a resource meta attribute in the parent - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute in the parent =#=#=#= Begin test: Copy resources =#=#=#= =#=#=#= End test: Copy resources - OK (0) =#=#=#= * Passed: cibadmin - Copy resources =#=#=#= Begin test: Delete resource paremt meta attribute (force) =#=#=#= Deleted 'test-clone' option: id=test-clone-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Delete resource paremt meta attribute (force) =#=#=#= =#=#=#= End test: Delete resource paremt meta attribute (force) - OK (0) =#=#=#= * Passed: crm_resource - Delete resource paremt meta attribute (force) =#=#=#= Begin test: Restore duplicates =#=#=#= =#=#=#= Current cib after: Restore duplicates =#=#=#= =#=#=#= End test: Restore duplicates - OK (0) =#=#=#= * Passed: cibadmin - Restore duplicates =#=#=#= Begin test: Delete resource child meta attribute =#=#=#= Multiple attributes match name=is-managed Value: true (id=test-primitive-meta_attributes-is-managed) Value: true (id=test-clone-meta_attributes-is-managed) Deleted 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Delete resource child meta attribute =#=#=#= =#=#=#= End test: Delete resource child meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Delete resource child meta attribute diff --git a/tools/regression.validity.exp b/tools/regression.validity.exp index 827b66025d..1278a09fc1 100644 --- a/tools/regression.validity.exp +++ b/tools/regression.validity.exp @@ -1,397 +1,397 @@ Created new pacemaker configuration Setting up shadow instance A new shadow instance was created. To begin using it paste the following into your shell: CIB_shadow=tools-regression ; export CIB_shadow =#=#=#= Begin test: Try to make resulting CIB invalid (enum violation) =#=#=#= Call failed: Update does not conform to the configured schema =#=#=#= Current cib after: Try to make resulting CIB invalid (enum violation) =#=#=#= =#=#=#= End test: Try to make resulting CIB invalid (enum violation) - Update does not conform to the configured schema (203) =#=#=#= * Passed: cibadmin - Try to make resulting CIB invalid (enum violation) =#=#=#= Begin test: Run crm_simulate with invalid CIB (enum violation) =#=#=#= -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-1.2' validation (4 of X) +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-1.2' validation (4 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-1.2 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-1.3' validation (5 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-1.2 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-1.3' validation (5 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-1.3 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.0' validation (6 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-1.3 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.0' validation (6 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-2.0 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.1' validation (7 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.0 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.1' validation (7 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-2.1 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.2' validation (8 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.1 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.2' validation (8 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-2.2 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.3' validation (9 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.2 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.3' validation (9 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-2.3 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.4' validation (10 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.3 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.4' validation (10 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-2.4 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.5' validation (11 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.4 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.5' validation (11 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-2.5 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.6' validation (12 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.5 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.6' validation (12 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-2.6 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.7' validation (13 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.6 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.7' validation (13 of X) element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-2.7 validation failed +( schemas.c:NNN ) trace: update_validation: pacemaker-2.7 validation failed Your current configuration pacemaker-1.2 could not validate with any schema in range [pacemaker-1.2, pacemaker-2.7], cannot upgrade to pacemaker-2.0. =#=#=#= End test: Run crm_simulate with invalid CIB (enum violation) - Required key not available (126) =#=#=#= * Passed: crm_simulate - Run crm_simulate with invalid CIB (enum violation) =#=#=#= Begin test: Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#= Call failed: Update does not conform to the configured schema =#=#=#= Current cib after: Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#= =#=#=#= End test: Try to make resulting CIB invalid (unrecognized validate-with) - Update does not conform to the configured schema (203) =#=#=#= * Passed: cibadmin - Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#= Begin test: Run crm_simulate with invalid CIB (unrecognized validate-with) =#=#=#= -( schemas.c:736 ) debug: update_validation: Unknown validation type -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-0.6' validation (0 of X) +( schemas.c:NNN ) debug: update_validation: Unknown validation type +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-0.6' validation (0 of X) element rsc_order: validity error : Element rsc_order does not carry attribute to element rsc_order: validity error : Element rsc_order does not carry attribute from element rsc_order: validity error : No declaration for attribute first of element rsc_order element rsc_order: validity error : No declaration for attribute first-action of element rsc_order element rsc_order: validity error : No declaration for attribute then of element rsc_order -( schemas.c:760 ) trace: update_validation: pacemaker-0.6 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'transitional-0.6' validation (1 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-0.6 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'transitional-0.6' validation (1 of X) element rsc_order: validity error : Element rsc_order does not carry attribute to element rsc_order: validity error : Element rsc_order does not carry attribute from element rsc_order: validity error : No declaration for attribute first of element rsc_order element rsc_order: validity error : No declaration for attribute first-action of element rsc_order element rsc_order: validity error : No declaration for attribute then of element rsc_order -( schemas.c:760 ) trace: update_validation: transitional-0.6 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-0.7' validation (2 of X) +( schemas.c:NNN ) trace: update_validation: transitional-0.6 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-0.7' validation (2 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-0.7 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-1.0' validation (3 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-0.7 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-1.0' validation (3 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-1.0 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-1.2' validation (4 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-1.0 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-1.2' validation (4 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-1.2 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-1.3' validation (5 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-1.2 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-1.3' validation (5 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-1.3 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.0' validation (6 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-1.3 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.0' validation (6 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-2.0 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.1' validation (7 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.0 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.1' validation (7 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-2.1 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.2' validation (8 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.1 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.2' validation (8 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-2.2 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.3' validation (9 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.2 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.3' validation (9 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-2.3 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.4' validation (10 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.3 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.4' validation (10 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-2.4 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.5' validation (11 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.4 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.5' validation (11 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-2.5 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.6' validation (12 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.5 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.6' validation (12 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-2.6 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.7' validation (13 of X) +( schemas.c:NNN ) trace: update_validation: pacemaker-2.6 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.7' validation (13 of X) element cib: Relax-NG validity error : Invalid attribute validate-with for element cib -( schemas.c:760 ) trace: update_validation: pacemaker-2.7 validation failed +( schemas.c:NNN ) trace: update_validation: pacemaker-2.7 validation failed Your current configuration pacemaker-9999.0 could not validate with any schema in range [unknown, pacemaker-2.7], cannot upgrade to pacemaker-2.0. =#=#=#= End test: Run crm_simulate with invalid CIB (unrecognized validate-with) - Required key not available (126) =#=#=#= * Passed: crm_simulate - Run crm_simulate with invalid CIB (unrecognized validate-with) =#=#=#= Begin test: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#= Call failed: Update does not conform to the configured schema =#=#=#= Current cib after: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#= =#=#=#= End test: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) - Update does not conform to the configured schema (203) =#=#=#= * Passed: cibadmin - Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#= Begin test: Run crm_simulate with invalid, but possibly recoverable CIB (valid with X.Y+1) =#=#=#= -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-1.2' validation (4 of X) +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-1.2' validation (4 of X) element tags: Relax-NG validity error : Element configuration has extra content: tags -( schemas.c:760 ) trace: update_validation: pacemaker-1.2 validation failed -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-1.3' validation (5 of X) -( schemas.c:804 ) debug: update_validation: Upgrading pacemaker-1.3-style configuration to pacemaker-2.0 with upgrade-1.3.xsl -( schemas.c:816 ) info: update_validation: Transformation upgrade-1.3.xsl successful -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.0' validation (6 of X) -( schemas.c:797 ) debug: update_validation: pacemaker-2.0-style configuration is also valid for pacemaker-2.1 -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.1' validation (7 of X) -( schemas.c:771 ) debug: update_validation: Configuration valid for schema: pacemaker-2.1 -( schemas.c:797 ) debug: update_validation: pacemaker-2.1-style configuration is also valid for pacemaker-2.2 -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.2' validation (8 of X) -( schemas.c:771 ) debug: update_validation: Configuration valid for schema: pacemaker-2.2 -( schemas.c:797 ) debug: update_validation: pacemaker-2.2-style configuration is also valid for pacemaker-2.3 -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.3' validation (9 of X) -( schemas.c:771 ) debug: update_validation: Configuration valid for schema: pacemaker-2.3 -( schemas.c:797 ) debug: update_validation: pacemaker-2.3-style configuration is also valid for pacemaker-2.4 -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.4' validation (10 of X) -( schemas.c:771 ) debug: update_validation: Configuration valid for schema: pacemaker-2.4 -( schemas.c:797 ) debug: update_validation: pacemaker-2.4-style configuration is also valid for pacemaker-2.5 -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.5' validation (11 of X) -( schemas.c:771 ) debug: update_validation: Configuration valid for schema: pacemaker-2.5 -( schemas.c:797 ) debug: update_validation: pacemaker-2.5-style configuration is also valid for pacemaker-2.6 -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.6' validation (12 of X) -( schemas.c:771 ) debug: update_validation: Configuration valid for schema: pacemaker-2.6 -( schemas.c:797 ) debug: update_validation: pacemaker-2.6-style configuration is also valid for pacemaker-2.7 -( schemas.c:751 ) debug: update_validation: Testing 'pacemaker-2.7' validation (13 of X) -( schemas.c:771 ) debug: update_validation: Configuration valid for schema: pacemaker-2.7 -( schemas.c:787 ) trace: update_validation: Stopping at pacemaker-2.7 -( schemas.c:843 ) info: update_validation: Transformed the configuration from pacemaker-1.2 to pacemaker-2.7 +( schemas.c:NNN ) trace: update_validation: pacemaker-1.2 validation failed +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-1.3' validation (5 of X) +( schemas.c:NNN ) debug: update_validation: Upgrading pacemaker-1.3-style configuration to pacemaker-2.0 with upgrade-1.3.xsl +( schemas.c:NNN ) info: update_validation: Transformation upgrade-1.3.xsl successful +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.0' validation (6 of X) +( schemas.c:NNN ) debug: update_validation: pacemaker-2.0-style configuration is also valid for pacemaker-2.1 +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.1' validation (7 of X) +( schemas.c:NNN ) debug: update_validation: Configuration valid for schema: pacemaker-2.1 +( schemas.c:NNN ) debug: update_validation: pacemaker-2.1-style configuration is also valid for pacemaker-2.2 +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.2' validation (8 of X) +( schemas.c:NNN ) debug: update_validation: Configuration valid for schema: pacemaker-2.2 +( schemas.c:NNN ) debug: update_validation: pacemaker-2.2-style configuration is also valid for pacemaker-2.3 +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.3' validation (9 of X) +( schemas.c:NNN ) debug: update_validation: Configuration valid for schema: pacemaker-2.3 +( schemas.c:NNN ) debug: update_validation: pacemaker-2.3-style configuration is also valid for pacemaker-2.4 +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.4' validation (10 of X) +( schemas.c:NNN ) debug: update_validation: Configuration valid for schema: pacemaker-2.4 +( schemas.c:NNN ) debug: update_validation: pacemaker-2.4-style configuration is also valid for pacemaker-2.5 +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.5' validation (11 of X) +( schemas.c:NNN ) debug: update_validation: Configuration valid for schema: pacemaker-2.5 +( schemas.c:NNN ) debug: update_validation: pacemaker-2.5-style configuration is also valid for pacemaker-2.6 +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.6' validation (12 of X) +( schemas.c:NNN ) debug: update_validation: Configuration valid for schema: pacemaker-2.6 +( schemas.c:NNN ) debug: update_validation: pacemaker-2.6-style configuration is also valid for pacemaker-2.7 +( schemas.c:NNN ) debug: update_validation: Testing 'pacemaker-2.7' validation (13 of X) +( schemas.c:NNN ) debug: update_validation: Configuration valid for schema: pacemaker-2.7 +( schemas.c:NNN ) trace: update_validation: Stopping at pacemaker-2.7 +( schemas.c:NNN ) info: update_validation: Transformed the configuration from pacemaker-1.2 to pacemaker-2.7 error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity Current cluster status: dummy1 (ocf::pacemaker:Dummy): Stopped dummy2 (ocf::pacemaker:Dummy): Stopped Transition Summary: error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity Executing cluster transition: Revised cluster status: dummy1 (ocf::pacemaker:Dummy): Stopped dummy2 (ocf::pacemaker:Dummy): Stopped =#=#=#= End test: Run crm_simulate with invalid, but possibly recoverable CIB (valid with X.Y+1) - OK (0) =#=#=#= * Passed: crm_simulate - Run crm_simulate with invalid, but possibly recoverable CIB (valid with X.Y+1) =#=#=#= Begin test: Make resulting CIB valid, although without validate-with attribute =#=#=#= element rsc_order: validity error : Element rsc_order does not carry attribute to element rsc_order: validity error : Element rsc_order does not carry attribute from element rsc_order: validity error : No declaration for attribute first of element rsc_order element rsc_order: validity error : No declaration for attribute first-action of element rsc_order element rsc_order: validity error : No declaration for attribute then of element rsc_order element rsc_order: validity error : Element rsc_order does not carry attribute to element rsc_order: validity error : Element rsc_order does not carry attribute from element rsc_order: validity error : No declaration for attribute first of element rsc_order element rsc_order: validity error : No declaration for attribute first-action of element rsc_order element rsc_order: validity error : No declaration for attribute then of element rsc_order =#=#=#= Current cib after: Make resulting CIB valid, although without validate-with attribute =#=#=#= =#=#=#= End test: Make resulting CIB valid, although without validate-with attribute - OK (0) =#=#=#= * Passed: cibadmin - Make resulting CIB valid, although without validate-with attribute =#=#=#= Begin test: Run crm_simulate with valid CIB, but without validate-with attribute =#=#=#= Configuration validation is currently disabled. It is highly encouraged and prevents many common cluster issues. bad-1.2.xml:10: element rsc_order: validity error : Element rsc_order does not carry attribute to bad-1.2.xml:10: element rsc_order: validity error : Element rsc_order does not carry attribute from bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute first of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute first-action of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute then of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : Element rsc_order does not carry attribute to bad-1.2.xml:10: element rsc_order: validity error : Element rsc_order does not carry attribute from bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute first of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute first-action of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute then of element rsc_order error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity Current cluster status: dummy1 (ocf::pacemaker:Dummy): Stopped dummy2 (ocf::pacemaker:Dummy): Stopped Transition Summary: error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity Executing cluster transition: Revised cluster status: dummy1 (ocf::pacemaker:Dummy): Stopped dummy2 (ocf::pacemaker:Dummy): Stopped =#=#=#= End test: Run crm_simulate with valid CIB, but without validate-with attribute - OK (0) =#=#=#= * Passed: crm_simulate - Run crm_simulate with valid CIB, but without validate-with attribute =#=#=#= Begin test: Make resulting CIB invalid, and without validate-with attribute =#=#=#= element rsc_order: validity error : Element rsc_order does not carry attribute to element rsc_order: validity error : Element rsc_order does not carry attribute from element rsc_order: validity error : No declaration for attribute first of element rsc_order element rsc_order: validity error : No declaration for attribute first-action of element rsc_order element rsc_order: validity error : No declaration for attribute then of element rsc_order element rsc_order: validity error : Element rsc_order does not carry attribute to element rsc_order: validity error : Element rsc_order does not carry attribute from element rsc_order: validity error : No declaration for attribute first of element rsc_order element rsc_order: validity error : No declaration for attribute first-action of element rsc_order element rsc_order: validity error : No declaration for attribute then of element rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order =#=#=#= Current cib after: Make resulting CIB invalid, and without validate-with attribute =#=#=#= =#=#=#= End test: Make resulting CIB invalid, and without validate-with attribute - OK (0) =#=#=#= * Passed: cibadmin - Make resulting CIB invalid, and without validate-with attribute =#=#=#= Begin test: Run crm_simulate with invalid CIB, also without validate-with attribute =#=#=#= Configuration validation is currently disabled. It is highly encouraged and prevents many common cluster issues. bad-1.2.xml:10: element rsc_order: validity error : Element rsc_order does not carry attribute to bad-1.2.xml:10: element rsc_order: validity error : Element rsc_order does not carry attribute from bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute first of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute first-action of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute then of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : Element rsc_order does not carry attribute to bad-1.2.xml:10: element rsc_order: validity error : Element rsc_order does not carry attribute from bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute first of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute first-action of element rsc_order bad-1.2.xml:10: element rsc_order: validity error : No declaration for attribute then of element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order bad-1.2.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity (constraints.:430 ) error: unpack_simple_rsc_order: Cannot invert rsc_order constraint ord_1-2. Please specify the inverse manually. Current cluster status: dummy1 (ocf::pacemaker:Dummy): Stopped dummy2 (ocf::pacemaker:Dummy): Stopped Transition Summary: error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity Executing cluster transition: Revised cluster status: dummy1 (ocf::pacemaker:Dummy): Stopped dummy2 (ocf::pacemaker:Dummy): Stopped =#=#=#= End test: Run crm_simulate with invalid CIB, also without validate-with attribute - OK (0) =#=#=#= * Passed: crm_simulate - Run crm_simulate with invalid CIB, also without validate-with attribute