diff --git a/crmd/control.c b/crmd/control.c index 5a9c90ca5d..42743851a6 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,1165 +1,1170 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; extern gboolean crm_connect_corosync(crm_cluster_t * cluster); extern void crmd_ha_connection_destroy(gpointer user_data); void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; bool no_quorum_suicide_escalation = FALSE; static gboolean election_timeout_popped(gpointer data) { /* Not everyone voted */ crm_info("Election failed: Declaring ourselves the winner"); register_fsa_input(C_TIMER_POPPED, I_ELECTION_DC, NULL); return FALSE; } /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = calloc(1, sizeof(crm_cluster_t)); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); set_bit(fsa_input_register, R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); if (is_openais_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(cluster); #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT cluster->destroy = crmd_ha_connection_destroy; cluster->hb_dispatch = crmd_ha_msg_callback; registered = crm_cluster_connect(cluster); fsa_cluster_conn = cluster->hb_conn; crm_trace("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_trace("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } if (registered) { crm_trace("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } #endif } fsa_election = election_init(NULL, cluster->uname, 60000/*60s*/, election_timeout_popped); fsa_our_uname = cluster->uname; fsa_our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } if (registered == FALSE) { set_bit(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __FUNCTION__); clear_bit(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static bool need_spawn_pengine_from_crmd(void) { static int result = -1; if (result != -1) return result; if (!is_heartbeat_cluster()) { result = 0; return result; } /* NULL, or "strange" value: rather spawn from here. */ result = TRUE; crm_str_to_boolean(daemon_option("crmd_spawns_pengine"), &result); return result; } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ set_bit(fsa_input_register, R_SHUTDOWN); if (need_spawn_pengine_from_crmd()) { if (is_set(fsa_input_register, pe_subsystem->flag_connected)) { crm_info("Terminating the %s", pe_subsystem->name); if (stop_subsystem(pe_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", pe_subsystem->name); clear_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(FALSE); } } crm_info("All subsystems stopped, continuing"); } if (stonith_api) { /* Prevent it from coming up again */ clear_bit(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting STONITH..."); stonith_api->cmds->disconnect(stonith_api); } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; set_bit(fsa_input_register, R_SHUTDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", (fsa_our_dc? fsa_our_dc : "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); /* set_bit(fsa_input_register, R_STAYDOWN); */ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern crm_ipc_t *attrd_ipc; extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; extern char *te_client_id; +extern regex_t *version_format_regex; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crm_client_t *client = value; crm_err("%s is still connected at exit", crm_client_name(client)); } int crmd_fast_exit(int rc) { if (is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", rc, DAEMON_RESPAWN_STOP); rc = DAEMON_RESPAWN_STOP; } if (rc == pcmk_ok && is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); rc = pcmk_err_generic; } return crm_exit(rc); } int crmd_exit(int rc) { GListPtr gIter = NULL; GMainLoop *mloop = crmd_mainloop; static bool in_progress = FALSE; if(in_progress && rc == 0) { crm_debug("Exit is already in progress"); return rc; } else if(in_progress) { crm_notice("Error during shutdown process, terminating now with status %d: %s", rc, pcmk_strerror(rc)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(rc); } in_progress = TRUE; crm_trace("Preparing to exit: %d", rc); /* Suppress secondary errors resulting from us disconnecting everything */ set_bit(fsa_input_register, R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } if (attrd_ipc) { crm_trace("Closing attrd connection"); crm_ipc_close(attrd_ipc); crm_ipc_destroy(attrd_ipc); attrd_ipc = NULL; } if (pe_subsystem && pe_subsystem->client && pe_subsystem->client->ipcs) { crm_trace("Disconnecting Policy Engine"); qb_ipcs_disconnect(pe_subsystem->client->ipcs); } if(stonith_api) { crm_trace("Disconnecting fencing API"); clear_bit(fsa_input_register, R_ST_REQUIRED); stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (rc == pcmk_ok && crmd_mainloop == NULL) { crm_debug("No mainloop detected"); rc = EPROTO; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if(rc != pcmk_ok) { crm_notice("Forcing immediate exit with status %d: %s", rc, pcmk_strerror(rc)); crm_write_blackbox(SIGTRAP, NULL); return crmd_fast_exit(rc); } /* Clean up as much memory as possible for valgrind */ for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } clear_bit(fsa_input_register, R_MEMBERSHIP); g_list_free(fsa_message_queue); fsa_message_queue = NULL; free(pe_subsystem); pe_subsystem = NULL; free(te_subsystem); te_subsystem = NULL; free(cib_subsystem); cib_subsystem = NULL; + if (version_format_regex) { + regfree(version_format_regex); + free(version_format_regex); + } election_fini(fsa_election); fsa_election = NULL; /* Tear down the CIB connection, but don't free it yet -- it could be used * when we drain the mainloop later. */ cib_free_callbacks(fsa_cib_conn); fsa_cib_conn->cmds->signoff(fsa_cib_conn); verify_stopped(fsa_state, LOG_WARNING); clear_bit(fsa_input_register, R_LRM_CONNECTED); lrm_state_destroy_all(); /* This basically will not work, since mainloop has a reference to it */ mainloop_destroy_trigger(fsa_source); fsa_source = NULL; mainloop_destroy_trigger(config_read); config_read = NULL; mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL; crm_client_cleanup(); crm_peer_destroy(); crm_timer_stop(transition_timer); crm_timer_stop(integration_timer); crm_timer_stop(finalization_timer); crm_timer_stop(election_trigger); election_timeout_stop(fsa_election); crm_timer_stop(shutdown_escalation_timer); crm_timer_stop(wait_timer); crm_timer_stop(recheck_timer); free(transition_timer); transition_timer = NULL; free(integration_timer); integration_timer = NULL; free(finalization_timer); finalization_timer = NULL; free(election_trigger); election_trigger = NULL; free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; free(wait_timer); wait_timer = NULL; free(recheck_timer); recheck_timer = NULL; free(fsa_our_dc_version); fsa_our_dc_version = NULL; free(fsa_our_uname); fsa_our_uname = NULL; free(fsa_our_uuid); fsa_our_uuid = NULL; free(fsa_our_dc); fsa_our_dc = NULL; free(fsa_cluster_name); fsa_cluster_name = NULL; free(te_uuid); te_uuid = NULL; free(te_client_id); te_client_id = NULL; free(fsa_pe_ref); fsa_pe_ref = NULL; free(failed_stop_offset); failed_stop_offset = NULL; free(failed_start_offset); failed_start_offset = NULL; free(max_generation_from); max_generation_from = NULL; free_xml(max_generation_xml); max_generation_xml = NULL; mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); /* Don't re-enter this block */ crmd_mainloop = NULL; crmd_drain_alerts(ctx); /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); #if SUPPORT_HEARTBEAT /* Do this only after g_main_loop_quit(). * * This interface was broken (incomplete) since it was introduced. * ->delete() does cleanup and free most of it, but it does not * actually remove and destroy the corresponding GSource, so the next * prepare/check iteratioin would find a corrupt (because partially * freed) GSource, and segfault. * * Apparently one was supposed to store the GSource as returned by * G_main_add_ll_cluster(), and g_source_destroy() that "by hand". * * But no-one ever did this, not even in the old hb code when this was * introduced. * * Note that fsa_cluster_conn was set as an "alias" to cluster->hb_conn * in do_ha_control() right after crm_cluster_connect(), and only * happens to still point at that object, because do_ha_control() does * not reset it to NULL after crm_cluster_disconnect() above does * reset cluster->hb_conn to NULL. * Not sure if that's something to cleanup, too. * * I'll try to fix this up in heartbeat proper, so ->delete * will actually remove, and destroy, and unref, and free this thing. * Doing so after g_main_loop_quit() is valid with both old, * and eventually fixed heartbeat. * * If we introduce the "by hand" destroy/remove/unref, * this may break again once heartbeat is fixed :-( * * -- Lars Ellenberg */ if (fsa_cluster_conn) { crm_trace("Deleting heartbeat api object"); fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); crm_trace("Done %d", rc); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; throttle_fini(); /* Graceful */ return rc; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int exit_code = pcmk_ok; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { /* exit_code = pcmk_err_generic; */ log_level = LOG_ERR; exit_type = "forcefully"; exit_code = pcmk_err_generic; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int was_error = 0; crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); lrm_state_init_local(); /* set up the timers */ transition_timer = calloc(1, sizeof(fsa_timer_t)); integration_timer = calloc(1, sizeof(fsa_timer_t)); finalization_timer = calloc(1, sizeof(fsa_timer_t)); election_trigger = calloc(1, sizeof(fsa_timer_t)); shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); wait_timer = calloc(1, sizeof(fsa_timer_t)); recheck_timer = calloc(1, sizeof(fsa_timer_t)); if (election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if (transition_timer != NULL) { transition_timer->source_id = 0; transition_timer->period_ms = -1; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->repeat = FALSE; } else { was_error = TRUE; } if (integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if (finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if (shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if (wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if (recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); te_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); if (cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if (te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if (pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = CRM_DAEMON_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if (was_error == FALSE && need_spawn_pengine_from_crmd()) { if (start_subsystem(pe_subsystem) == FALSE) { was_error = TRUE; } } if (was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static int32_t crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void crmd_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags); crm_trace("Invoked: %s", crm_client_name(client)); crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(msg, F_CRM_USER, client->user); #endif crm_trace("Processing msg from %s", crm_client_name(client)); crm_log_xml_trace(msg, "CRMd[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (crmd_authorize_message(msg, client, NULL)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); struct crm_subsystem_s *the_subsystem = NULL; if (client == NULL) { return 0; } crm_trace("Connection %p", c); if (client->userdata == NULL) { crm_trace("Client hadn't registered with us yet"); } else if (strcasecmp(CRM_SYSTEM_PENGINE, client->userdata) == 0) { the_subsystem = pe_subsystem; } else if (strcasecmp(CRM_SYSTEM_TENGINE, client->userdata) == 0) { the_subsystem = te_subsystem; } else if (strcasecmp(CRM_SYSTEM_CIB, client->userdata) == 0) { the_subsystem = cib_subsystem; } if (the_subsystem != NULL) { the_subsystem->source = NULL; the_subsystem->client = NULL; crm_info("Received HUP from %s:[%d]", the_subsystem->name, the_subsystem->pid); } else { /* else that was a transient client */ crm_trace("Received HUP from transient client"); } crm_trace("Disconnecting client %s (%p)", crm_client_name(client), client); free(client->userdata); crm_client_destroy(client); trigger_fsa(fsa_source); return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); crmd_ipc_closed(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, .connection_created = crmd_ipc_created, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) { /* try reading from HA */ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { HA_Message *msg = NULL; crm_trace("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if (msg != NULL) { crm_trace("There was a HA message"); ha_msg_del(msg); } } #endif crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = crmd_ipc_server_init(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } if (stonith_reconnect == NULL) { int dummy; stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy); } set_bit(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); crm_notice("The local CRM is operational"); clear_bit(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { set_bit(fsa_input_register, R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* *INDENT-OFF* */ pe_cluster_option crmd_opts[] = { /* name, old-name, validate, values, default, short description, long description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, #ifdef RHEL7_COMPAT /* These options were superseded by the alerts feature and now are just an * alternate interface to it. It was never released upstream, but was * released in RHEL 7, so we allow it to be enabled at compile-time by * defining RHEL7_COMPAT. */ { "notification-agent", NULL, "string", NULL, "/dev/null", &check_script, "Deprecated", "Use alert path in alerts section instead" }, { "notification-recipient", NULL, "string", NULL, "", NULL, "Deprecated", "Use recipient value in alerts section instead" }, #endif { "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization, "The maximum amount of system resources that should be used by nodes in the cluster", "The cluster will slow down its recovery process when the amount of system resources used" " (currently CPU) approaches this limit", }, { "node-action-limit", NULL, "integer", NULL, "0", &check_number, "The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"}, { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\n" "Enabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n" "Useful if your configuration is sensitive to the order in which ping updates arrive." }, { "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout, "How long to wait before we can assume nodes are safely down", NULL }, { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number, "How many times stonith can fail before it will no longer be attempted on a target" }, { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, #if SUPPORT_PLUGIN { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." }, #endif }; /* *INDENT-ON* */ void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { #ifdef RHEL7_COMPAT const char *script = NULL; #endif const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit(fsa_input_register, R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig) && (crm_element_name(crmconfig)) && (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); #ifdef RHEL7_COMPAT script = crmd_pref(config_hash, "notification-agent"); value = crmd_pref(config_hash, "notification-recipient"); crmd_enable_alerts(script, value); #endif value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */ throttle_update_job_max(value); value = crmd_pref(config_hash, "load-threshold"); if(value) { throttle_set_load_target(strtof(value, NULL) / 100.0); } value = crmd_pref(config_hash, "no-quorum-policy"); if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) { no_quorum_suicide_escalation = TRUE; } value = crmd_pref(config_hash,"stonith-max-attempts"); update_stonith_max_attempts(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); /* How long to declare an election over - even if not everyone voted */ crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout_set_period(fsa_election, crm_get_msec(value)); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "crmd-transition-delay"); transition_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); crm_debug("Sending expected-votes=%s to corosync", value); send_cluster_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); } #endif free(fsa_cluster_name); fsa_cluster_name = NULL; value = g_hash_table_lookup(config_hash, "cluster-name"); if (value) { fsa_cluster_name = strdup(value); } alerts = first_named_child(output, XML_CIB_TAG_ALERTS); parse_alerts(alerts); set_bit(fsa_input_register, R_READ_CONFIG); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS, NULL, cib_xpath | cib_scope_local); fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { set_bit(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_debug("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* can't rely on this... */ crm_notice("Shutting down cluster resource manager " CRM_XS " limit=%dms", shutdown_escalation_timer->period_ms); crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); crmd_exit(pcmk_ok); } } diff --git a/crmd/lrm.c b/crmd/lrm.c index 95992f50f8..8dda3738d5 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,2584 +1,2625 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 #define s_if_plural(i) (((i) == 1)? "" : "s") struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } } static char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; op_id = calloc(1, strlen(rsc) + 34); if (op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval == existing->interval) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.class); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.class = strdup(rsc->class); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval > 0) { crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: %s_%s_%d", g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); CRM_ASSERT(op != NULL); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } void lrm_op_callback(lrmd_event_data_t * op) { const char *nodename = NULL; lrm_state_t *lrm_state = NULL; CRM_CHECK(op != NULL, return); /* determine the node name for this connection. */ nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname; if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) { /* if this is the local lrmd ipc connection, set the right bits in the * crmd when the connection goes down */ lrm_connection_destroy(); return; } else if (op->type != lrmd_event_exec_complete) { /* we only need to process execution results */ return; } lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local lrmd connections. Remote connections are handled as * resources within the pengine. Connecting and disconnecting from remote lrmd instances * handled differently than the local. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the LRM"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); crm_notice("Disconnected from the LRM"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the LRM"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the LRM %d time%s (%d max)", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS); crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to connect to the LRM the max allowed %d time%s", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("LRM connection established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, s_if_plural(removed), when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending LRM operation%s at %s", counter, s_if_plural(counter), when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static char * build_parameter_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode *result, const char *criteria, bool target, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; xmlNode *param = NULL; xmlNode *params = NULL; const char *secure_terms[] = { "password", "passwd", "user", }; if(safe_str_eq("private", criteria)) { /* It will take time for the agents to be updated * Check for some common terms */ max = DIMOF(secure_terms); } params = find_xml_node(metadata, "parameters", TRUE); for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) { if (crm_str_eq((const char *)param->name, "parameter", TRUE)) { bool accept = FALSE; const char *name = crm_element_value(param, "name"); const char *value = crm_element_value(param, criteria); if(max && value) { /* Turn off the compatibility logic once an agent has been updated to know about 'private' */ max = 0; } if (name == NULL) { crm_err("Invalid parameter in %s metadata", op->rsc_id); } else if(target == crm_is_true(value)) { accept = TRUE; } else if(max) { int lpc = 0; bool found = FALSE; for(lpc = 0; found == FALSE && lpc < max; lpc++) { if(safe_str_eq(secure_terms[lpc], name)) { found = TRUE; } } if(found == target) { accept = TRUE; } } if(accept) { int start = len; crm_trace("Attr %s is %s%s", name, target?"":"not ", criteria); len += strlen(name) + 2; list = realloc_safe(list, len + 1); sprintf(list + start, " %s ", name); } else { crm_trace("Rejecting %s for %s", name, criteria); } if(invert_for_xml) { crm_trace("Inverting %s match for %s xml", name, criteria); accept = !accept; } if(result && accept) { value = g_hash_table_lookup(op->params, name); - if(value != NULL) { -#ifdef ENABLE_VERSIONED_ATTRS - char *summary = crm_versioned_param_summary(op->versioned_params, name); - - if (summary) { - crm_trace("Adding attr %s=%s to the xml result", name, summary); - crm_xml_add(result, name, summary); - free(summary); - } else { -#endif - crm_trace("Adding attr %s=%s to the xml result", name, value); - crm_xml_add(result, name, value); -#ifdef ENABLE_VERSIONED_ATTRS - } -#endif + crm_trace("Adding attr %s=%s to the xml result", name, value); + crm_xml_add(result, name, value); } } } } return list; } static bool resource_supports_action(xmlNode *metadata, const char *name) { const char *value = NULL; xmlNode *action = NULL; xmlNode *actions = NULL; actions = find_xml_node(metadata, "actions", TRUE); for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) { if (crm_str_eq((const char *)action->name, "action", TRUE)) { value = crm_element_value(action, "name"); if (safe_str_eq(name, value)) { return TRUE; } } } return FALSE; } static void append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval > 0) { /* monitors are not reloadable */ return; } if(resource_supports_action(metadata, "reload")) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Any parameters with unique="1" should be added into the "op-force-restart" list. */ list = build_parameter_list(op, metadata, restart, "unique", TRUE, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, "private", TRUE, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } +regex_t *version_format_regex = NULL; + +static gboolean +valid_version_format(const char *version) +{ + if (version == NULL) { + return FALSE; + } + + if (version_format_regex == NULL) { + const char *regex_string = "^[[:digit:]]+([.][[:digit:]]+)*$"; + version_format_regex = calloc(1, sizeof(regex_t)); + regcomp(version_format_regex, regex_string, REG_EXTENDED | REG_NOSUB); + + CRM_CHECK(version_format_regex != NULL, return TRUE); + } + + return regexec(version_format_regex, version, 0, NULL, 0) == 0; +} + +static const char* +get_ra_version(lrmd_rsc_info_t *rsc, xmlNode *metadata) +{ + static const char *default_version = "0.1"; + const char *version = NULL; + xmlNode *version_xml = NULL; + + if (!metadata) { + return default_version; + } + + version_xml = get_xpath_object("/resource-agent/@version", metadata, LOG_TRACE); + if (!version_xml) { + if (rsc) { + crm_debug("Resource agent %s:%s:%s (for %s) doesn't specify a version number", + rsc->class, rsc->provider, rsc->type, rsc->id); + } + return default_version; + } + + version = crm_element_value(version_xml, XML_ATTR_VERSION); + if (!valid_version_format(version)) { + if (rsc) { + crm_notice("Resource agent version for %s (%s:%s:%s) has unrecognized format", + rsc->id, rsc->class, rsc->provider, rsc->type); + } + return default_version; + } + + if (rsc) { + crm_trace("Resource agent version for %s (%s:%s:%s) is %s", + rsc->id, rsc->class, rsc->provider, rsc->type, version); + } + + return version; +} + static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *node_name, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; xmlNode *metadata = NULL; + const char *ra_version = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if (rsc == NULL || op->params == NULL || crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE) || safe_str_eq(op->op_type, CRMD_ACTION_METADATA)) { /* Stopped resources don't need the digest logic */ /* As well as meta-data query results */ crm_trace("No digests needed for %s %p %p %s", op->rsc_id, op->params, rsc, op->op_type); return TRUE; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation %s_%s_%d because we have no LRM connection to %s", op->rsc_id, op->op_type, op->interval, node_name); return TRUE; } metadata = lrm_state_get_rsc_metadata(lrm_state, rsc); if (metadata == NULL && lrm_state_is_local(lrm_state)) { char *metadata_str = NULL; int rc = lrm_state_get_metadata(lrm_state, rsc->class, rsc->provider, rsc->type, &metadata_str, 0); if (rc != pcmk_ok) { crm_warn("Cannot get metadata for %s (%s:%s:%s)", rsc->id, rsc->class, rsc->provider, rsc->type); return TRUE; } metadata = lrm_state_update_rsc_metadata(lrm_state, rsc, metadata_str); free(metadata_str); } if (metadata == NULL) { crm_warn("Cannot update metadata for %s (%s:%s:%s)", rsc->id, rsc->class, rsc->provider, rsc->type); return TRUE; } + ra_version = get_ra_version(rsc, metadata); + crm_xml_add(xml_op, XML_ATTR_RA_VERSION, ra_version); + crm_trace("Including additional digests for %s::%s:%s", rsc->class, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current state of the LRM"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); xmlNode *xml_state; if (!lrm_state) { crm_err("Could not query lrm state for lrmd node %s", node_name); return NULL; } xml_state = do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); /* In case this function is called to generate a join confirmation to * send to the DC, force the current and expected join state to member. * This isn't necessary for newer DCs but is backward compatible. */ crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); return xml_state; } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); CRM_ASSERT(op != NULL); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int max = 0; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1; rsc_xpath = calloc(1, max); snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] lrm_state LRM state of the desired node * \param[in] op Operation whose history should be deleted */ static void erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing LRM resource history for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Erase an LRM history entry from the CIB, given operation identifiers * * \param[in] lrm_state LRM state of the node to clear history for * \param[in] rsc_id Name of resource to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] orig_op If specified, delete only if it has this original op * \param[in] call_id If specified, delete entry only if it has this call ID */ static void erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id, const char *key, const char *orig_op, int call_id) { char *op_xpath = NULL; CRM_CHECK((rsc_id != NULL) && (key != NULL), return); if (call_id > 0) { op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL, lrm_state->node_name, rsc_id, key, call_id); } else if (orig_op) { op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, lrm_state->node_name, rsc_id, key, orig_op); } else { op_xpath = crm_strdup_printf(XPATH_HISTORY_ID, lrm_state->node_name, rsc_id, key); } crm_debug("Erasing LRM resource history for %s on %s (call=%d)", key, rsc_id, call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, int interval) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (safe_str_eq(op, entry->failed->op_type) && (interval == entry->failed->interval)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval If operation is specified, it has this interval in ms */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, int interval) { char *op_key = NULL; char *orig_op_key = NULL; lrm_state_t *lrm_state = NULL; lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } /* Erase from CIB */ op_key = generate_op_key(rsc_id, "last_failure", 0); if (operation) { orig_op_key = generate_op_key(rsc_id, operation, interval); } erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0); free(op_key); free(orig_op_key); /* Remove from memory */ if (lrm_state->resource_history) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } static lrmd_rsc_info_t * get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create) { lrmd_rsc_info_t *rsc = NULL; const char *id = ID(resource); const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_trace("Retrieving %s from the LRM.", id); CRM_CHECK(id != NULL, return NULL); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc && long_id) { rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0); } if (!rsc && do_create) { CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_trace("Adding rsc %s before operation", id); lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name); /* only register this as a internal error if this involves the local * lrmd. Otherwise we're likely dealing with an unresponsive remote-node * which is not a FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } return rsc; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* And finally, _delete_ the value in attrd * Setting it to FALSE results in the PE sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if(xml_rsc == NULL) { /* @TODO Should we do something else, like direct ack? */ crm_info("Skipping %s=%d on %s (%p): no resource", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Skipping %s=%d on %s (%p): no operation", crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); CRM_ASSERT(op != NULL); op->call_id = get_fake_call_id(lrm_state, op->rsc_id); if(safe_str_eq(operation, RSC_NOTIFY)) { /* Notifications can't fail yet */ op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = rc; } op->t_run = time(NULL); op->t_rcchange = op->t_run; crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state); if(lrm_state) { process_lrm_event(lrm_state, op, NULL); } else { lrmd_rsc_info_t rsc; rsc.id = strdup(op->rsc_id); rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); do_update_resource(target_node, &rsc, op); free(rsc.id); free(rsc.type); free(rsc.class); free(rsc.provider); } lrmd_free_event(op); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean create_rsc = TRUE; lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; gboolean crm_rsc_delete = FALSE; if (input->xml != NULL) { /* Remote node operations are routed here to their remote connections */ target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); } if (target_node == NULL) { target_node = fsa_our_uname; } else if (safe_str_neq(target_node, fsa_our_uname)) { is_remote_node = TRUE; } lrm_state = lrm_state_find(target_node); if (lrm_state == NULL && is_remote_node) { crm_err("Failing action because remote node %s has no connection to cluster node %s", target_node, fsa_our_uname); /* The action must be recorded here and in the CIB as failed */ synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("LRM command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("LRM command from: %s", from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { /* remember this delete op came from crm_resource */ crm_rsc_delete = TRUE; operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { operation = CRM_OP_LRM_REFRESH; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The lrmd can not fail a resource, it does not understand the * concept of success or failure in relation to a resource, it simply * executes operations and reports the results. We determine what a failure is. * Because of this, if we want to fail a resource we have to fake what we * understand a failure to look like. * * To do this we create a fake lrmd operation event for the resource * we want to fail. We then pass that event to the lrmd client callback * so it will be processed as if it actually came from the lrmd. */ op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon"); CRM_ASSERT(op != NULL); free((char *)op->user_data); op->user_data = NULL; op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->interval = 0; op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(input->msg, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local LRM refresh: call=%d", rc); if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(input->msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys) && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } if(lrm_state_is_connected(lrm_state) == FALSE) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc == NULL && create_rsc) { crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "bad input"); /* if the operation couldn't complete because we can't register * the resource, return a generic error */ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); } else if (rsc == NULL) { crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); /* Deleting something that does not exist is a success */ send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, from_host, from_sys); } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; gboolean in_progress = FALSE; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); op_interval = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id ? call_id : "NA"); call = crm_parse_int(call_id, "0"); if (call == 0) { /* the normal case when the PE cancels a recurring op */ in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } /* Acknowledge the cancellation operation if it's for a remote connection resource */ if (in_progress == FALSE || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, pcmk_strerror(cib_rc)); op = construct_op(lrm_state, input->xml, rsc->id, operation); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); lrmd_free_rsc_info(rsc); return; } #endif if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; GHashTable *params = NULL; -#ifdef ENABLE_VERSIONED_ATTRS - xmlNode *versioned_params = NULL; -#endif const char *transition = NULL; CRM_ASSERT(rsc_id != NULL); op = calloc(1, sizeof(lrmd_event_data_t)); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); -#ifdef ENABLE_VERSIONED_ATTRS - - if (!is_remote_lrmd_ra(NULL, NULL, rsc_id)) { - xmlNode *ptr = first_named_child(rsc_op, XML_TAG_VER_ATTRS); - - if (ptr) { - versioned_params = copy_xml(ptr); - } - } -#endif op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL); op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); + if (safe_str_neq(op->op_type, RSC_METADATA) && !is_remote_lrmd_ra(NULL, NULL, rsc_id)) { + lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0); + xmlNode *metadata = lrm_state_get_rsc_metadata(lrm_state, rsc); + + if (metadata) { + const char *ra_version = get_ra_version(rsc, metadata); + xmlNode *versioned_attrs = NULL; + GHashTable *hash = NULL; + char *key = NULL; + char *value = NULL; + GHashTableIter iter; + + versioned_attrs = first_named_child(rsc_op, XML_TAG_VER_ATTRS); + hash = pe_unpack_versioned_parameters(versioned_attrs, ra_version); + g_hash_table_iter_init(&iter, hash); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { + g_hash_table_iter_steal(&iter); + g_hash_table_replace(params, key, value); + } + g_hash_table_destroy(hash); + } + + lrmd_free_rsc_info(rsc); + } + if (safe_str_neq(operation, RSC_STOP)) { op->params = params; -#ifdef ENABLE_VERSIONED_ATTRS - op->versioned_params = versioned_params; -#endif } else { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; -#ifdef ENABLE_VERSIONED_ATTRS - op->versioned_params = versioned_params; -#endif } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; -#ifdef ENABLE_VERSIONED_ATTRS - - op->versioned_params = NULL; - free_xml(versioned_params); -#endif - } - } - -#ifdef ENABLE_VERSIONED_ATTRS - if (op->versioned_params) { - char *versioned_params_text = dump_xml_unformatted(op->versioned_params); - - if (versioned_params_text) { - g_hash_table_insert(op->params, strdup("#" XML_TAG_VER_ATTRS), versioned_params_text); } } -#endif /* sanity */ if (op->interval < 0) { op->interval = 0; } if (op->timeout <= 0) { op->timeout = op->interval; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %d", op->interval); op->interval = 0; } } crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { CRM_CHECK(node_name != NULL, return); CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); if (op->op_type == NULL || safe_str_eq(op->op_type, CRMD_ACTION_CANCEL) || safe_str_eq(op->op_type, CRMD_ACTION_DELETE)) { return; } if (op->params == NULL) { return; } else { const char *record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if (record_pending == NULL || crm_is_true(record_pending) == FALSE) { return; } } op->call_id = -1; op->op_status = PCMK_LRM_OP_PENDING; op->rc = PCMK_OCF_UNKNOWN; op->t_run = time(NULL); op->t_rcchange = op->t_run; /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op %s_%s_%d on %s in the CIB", op->rsc_id, op->op_type, op->interval, node_name); do_update_resource(node_name, rsc, op); } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; bool send_nack = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && op->interval == 0 && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is unexpectedly * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if (op->interval == 0 && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for %s_%s_%d", removed, s_if_plural(removed), rsc->id, operation, op->interval); } } /* now do the op */ crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); send_nack = TRUE; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { send_nack = TRUE; } if(send_nack) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), is_set(fsa_input_register, R_SHUTDOWN)?"true":"false"); op->rc = CRM_DIRECT_NACK_RC; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } record_pending_op(lrm_state->node_name, rsc, op); op_id = generate_op_key(rsc->id, op->op_type, op->interval); if (op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); op->call_id = get_fake_call_id(lrm_state, rsc->id); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; process_lrm_event(lrm_state, op, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = strdup(op->user_data); g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, node_name, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%d on %s", rc, op->op_type, op->interval, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { strncpy(pch, "\n ", 2); pch = strstr(pch, escaped_newline); } return ret; } gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); if(pending == NULL) { remove = TRUE; pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (safe_str_eq(op->op_type, RSC_NOTIFY) || safe_str_eq(op->op_type, RSC_METADATA)) { /* Keep notify and meta-data ops out of the CIB */ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { update_id = do_update_resource(lrm_state->node_name, rsc, op); } } else if (op->interval == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* We don't need to do anything for cancelled ops * that are not in our pending op list. There are no * transition actions waiting on these operations. */ } else if (op->user_data == NULL) { /* At this point we have a pending entry, but no transition * key present in the user_data field. report this */ crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ erase_lrm_history_by_op(lrm_state, op); } else if (pending && op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine * will continue waiting for this update to occur until it is timed out. * We don't want this update going to the cib though, so use a direct ack. */ crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) { removed = TRUE; g_hash_table_remove(lrm_state->pending_ops, op_id); } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: crm_info("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false")); break; case PCMK_LRM_OP_DONE: do_crm_log(op->interval?LOG_INFO:LOG_NOTICE, "Result of %s operation for %s on %s: %d (%s) " CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, op->rc, services_ocf_exitcode_str(op->rc), op->call_id, op_key, (removed? "true" : "false"), update_id); break; case PCMK_LRM_OP_TIMEOUT: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s timeout=%dms", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, op->timeout); break; default: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } if (safe_str_neq(op->op_type, RSC_METADATA)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (op->rc == PCMK_OCF_OK) { char *metadata = unescape_newlines(op->output); lrm_state_update_rsc_metadata(lrm_state, rsc, metadata); free(metadata); } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(lrm_state, rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h index 17c88ab91d..d9cb3f9724 100644 --- a/include/crm/lrmd.h +++ b/include/crm/lrmd.h @@ -1,486 +1,475 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ /** * \file * \brief Local Resource Manager * \ingroup lrmd */ #include #include #ifndef LRMD__H # define LRMD__H typedef struct lrmd_s lrmd_t; typedef struct lrmd_key_value_s { char *key; char *value; struct lrmd_key_value_s *next; } lrmd_key_value_t; #define LRMD_PROTOCOL_VERSION "1.1" /* *INDENT-OFF* */ #define DEFAULT_REMOTE_KEY_LOCATION "/etc/pacemaker/authkey" #define ALT_REMOTE_KEY_LOCATION "/etc/corosync/authkey" #define DEFAULT_REMOTE_PORT 3121 #define DEFAULT_REMOTE_USERNAME "lrmd" #define F_LRMD_OPERATION "lrmd_op" #define F_LRMD_CLIENTNAME "lrmd_clientname" #define F_LRMD_IS_IPC_PROVIDER "lrmd_is_ipc_provider" #define F_LRMD_CLIENTID "lrmd_clientid" #define F_LRMD_PROTOCOL_VERSION "lrmd_protocol_version" #define F_LRMD_REMOTE_MSG_TYPE "lrmd_remote_msg_type" #define F_LRMD_REMOTE_MSG_ID "lrmd_remote_msg_id" #define F_LRMD_CALLBACK_TOKEN "lrmd_async_id" #define F_LRMD_CALLID "lrmd_callid" #define F_LRMD_CANCEL_CALLID "lrmd_cancel_callid" #define F_LRMD_CALLOPTS "lrmd_callopt" #define F_LRMD_CALLDATA "lrmd_calldata" #define F_LRMD_RC "lrmd_rc" #define F_LRMD_EXEC_RC "lrmd_exec_rc" #define F_LRMD_OP_STATUS "lrmd_exec_op_status" #define F_LRMD_TIMEOUT "lrmd_timeout" #define F_LRMD_WATCHDOG "lrmd_watchdog" #define F_LRMD_CLASS "lrmd_class" #define F_LRMD_PROVIDER "lrmd_provider" #define F_LRMD_TYPE "lrmd_type" #define F_LRMD_ORIGIN "lrmd_origin" #define F_LRMD_RSC_RUN_TIME "lrmd_run_time" #define F_LRMD_RSC_RCCHANGE_TIME "lrmd_rcchange_time" #define F_LRMD_RSC_EXEC_TIME "lrmd_exec_time" #define F_LRMD_RSC_QUEUE_TIME "lrmd_queue_time" #define F_LRMD_RSC_ID "lrmd_rsc_id" #define F_LRMD_RSC_ACTION "lrmd_rsc_action" #define F_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str" #define F_LRMD_RSC_OUTPUT "lrmd_rsc_output" #define F_LRMD_RSC_EXIT_REASON "lrmd_rsc_exit_reason" #define F_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay" #define F_LRMD_RSC_INTERVAL "lrmd_rsc_interval" #define F_LRMD_RSC_METADATA "lrmd_rsc_metadata_res" #define F_LRMD_RSC_DELETED "lrmd_rsc_deleted" #define F_LRMD_RSC "lrmd_rsc" #define LRMD_OP_RSC_CHK_REG "lrmd_rsc_check_register" #define LRMD_OP_RSC_REG "lrmd_rsc_register" #define LRMD_OP_RSC_EXEC "lrmd_rsc_exec" #define LRMD_OP_RSC_CANCEL "lrmd_rsc_cancel" #define LRMD_OP_RSC_UNREG "lrmd_rsc_unregister" #define LRMD_OP_RSC_INFO "lrmd_rsc_info" #define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata" #define LRMD_OP_POKE "lrmd_rsc_poke" #define LRMD_OP_NEW_CLIENT "lrmd_rsc_new_client" #define LRMD_OP_CHECK "lrmd_check" #define LRMD_IPC_OP_NEW "new" #define LRMD_IPC_OP_DESTROY "destroy" #define LRMD_IPC_OP_EVENT "event" #define LRMD_IPC_OP_REQUEST "request" #define LRMD_IPC_OP_RESPONSE "response" #define LRMD_IPC_OP_SHUTDOWN_REQ "shutdown_req" #define LRMD_IPC_OP_SHUTDOWN_ACK "shutdown_ack" #define LRMD_IPC_OP_SHUTDOWN_NACK "shutdown_nack" #define F_LRMD_IPC_OP "lrmd_ipc_op" #define F_LRMD_IPC_IPC_SERVER "lrmd_ipc_server" #define F_LRMD_IPC_SESSION "lrmd_ipc_session" #define F_LRMD_IPC_CLIENT "lrmd_ipc_client" #define F_LRMD_IPC_PROXY_NODE "lrmd_ipc_proxy_node" #define F_LRMD_IPC_USER "lrmd_ipc_user" #define F_LRMD_IPC_MSG "lrmd_ipc_msg" #define F_LRMD_IPC_MSG_ID "lrmd_ipc_msg_id" #define F_LRMD_IPC_MSG_FLAGS "lrmd_ipc_msg_flags" #define T_LRMD "lrmd" #define T_LRMD_REPLY "lrmd_reply" #define T_LRMD_NOTIFY "lrmd_notify" #define T_LRMD_IPC_PROXY "lrmd_ipc_proxy" /* *INDENT-ON* */ /*! * \brief Create a new local lrmd connection */ lrmd_t *lrmd_api_new(void); /*! * \brief Create a new remote lrmd connection using tls backend * * \param nodename name of remote node identified with this connection * \param server name of server to connect to * \param port port number to connect to * * \note nodename and server may be the same value. */ lrmd_t *lrmd_remote_api_new(const char *nodename, const char *server, int port); /*! * \brief Use after lrmd_poll returns 1 to read and dispatch a message * * \param[in,out] lrmd lrmd connection object * * \return TRUE if connection is still up, FALSE if disconnected */ bool lrmd_dispatch(lrmd_t * lrmd); /*! * \brief Poll for a specified timeout period to determine if a message * is ready for dispatch. * \retval 1 msg is ready * \retval 0 timeout occurred * \retval negative error code */ int lrmd_poll(lrmd_t * lrmd, int timeout); /*! * \brief Destroy lrmd object */ void lrmd_api_delete(lrmd_t * lrmd); lrmd_key_value_t *lrmd_key_value_add(lrmd_key_value_t * kvp, const char *key, const char *value); /* *INDENT-OFF* */ /* Reserved for future use */ enum lrmd_call_options { lrmd_opt_none = 0x00000000, /* lrmd_opt_sync_call = 0x00000001, //Not implemented, patches welcome. */ /*! Only notify the client originating a exec() the results */ lrmd_opt_notify_orig_only = 0x00000002, /*! Drop recurring operations initiated by a client when client disconnects. * This call_option is only valid when registering a resource. When used * remotely with the pacemaker_remote daemon, this option means that recurring * operations will be dropped once all the remote connections disconnect. */ lrmd_opt_drop_recurring = 0x00000003, /*! Only send out notifications for recurring operations whenthe result changes */ lrmd_opt_notify_changes_only = 0x00000004, }; enum lrmd_callback_event { lrmd_event_register, lrmd_event_unregister, lrmd_event_exec_complete, lrmd_event_disconnect, lrmd_event_connect, lrmd_event_poke, lrmd_event_new_client, }; /* *INDENT-ON* */ -#ifdef ENABLE_VERSIONED_ATTRS -#include -#endif - typedef struct lrmd_event_data_s { /*! Type of event, register, unregister, call_completed... */ enum lrmd_callback_event type; /*! The resource this event occurred on. */ const char *rsc_id; /*! The action performed, start, stop, monitor... */ const char *op_type; /*! The userdata string given do exec() api function */ const char *user_data; /*! The client api call id associated with this event */ int call_id; /*! The operation's timeout period in ms. */ int timeout; /*! The operation's recurring interval in ms. */ int interval; /*! The operation's start delay value in ms. */ int start_delay; /*! This operation that just completed is on a deleted rsc. */ int rsc_deleted; /*! The executed ra return code mapped to OCF */ enum ocf_exitcode rc; /*! The lrmd status returned for exec_complete events */ int op_status; /*! stdout from resource agent operation */ const char *output; /*! Timestamp of when op ran */ unsigned int t_run; /*! Timestamp of last rc change */ unsigned int t_rcchange; /*! Time in length op took to execute */ unsigned int exec_time; /*! Time in length spent in queue */ unsigned int queue_time; /*! int connection result. Used for connection and poke events */ int connection_rc; /* This is a GHashTable containing the * parameters given to the operation */ void *params; /*! client node name associated with this connection * (used to match actions to the proper client when there are multiple) */ const char *remote_nodename; /*! exit failure reason string from resource agent operation */ const char *exit_reason; - -#ifdef ENABLE_VERSIONED_ATTRS - /* This is an xmlNode containing the versioned parameters - * that should be evaluated */ - xmlNode *versioned_params; -#endif - } lrmd_event_data_t; lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t * event); void lrmd_free_event(lrmd_event_data_t * event); typedef struct lrmd_rsc_info_s { char *id; char *type; char *class; char *provider; } lrmd_rsc_info_t; lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info); void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info); typedef void (*lrmd_event_callback) (lrmd_event_data_t * event); typedef struct lrmd_list_s { const char *val; struct lrmd_list_s *next; } lrmd_list_t; void lrmd_list_freeall(lrmd_list_t * head); void lrmd_key_value_freeall(lrmd_key_value_t * head); typedef struct lrmd_api_operations_s { /*! * \brief Connect from the lrmd. * * \retval 0, success * \retval negative error code on failure */ int (*connect) (lrmd_t * lrmd, const char *client_name, int *fd); /*! * \brief Establish an connection to lrmd, don't block while connecting. * \note this function requires the use of mainloop. * * \note The is returned using the event callback. * \note When this function returns 0, the callback will be invoked * to report the final result of the connect. * \retval 0, connect in progress, wait for event callback * \retval -1, failure. */ int (*connect_async) (lrmd_t * lrmd, const char *client_name, int timeout /*ms */ ); /*! * \brief Is connected to lrmd daemon? * * \retval 0, false * \retval 1, true */ int (*is_connected) (lrmd_t * lrmd); /*! * \brief Poke lrmd connection to verify it is still capable of serving requests * \note The response comes in the form of a poke event to the callback. * * \retval 0, wait for response in callback * \retval -1, connection failure, callback may not be invoked */ int (*poke_connection) (lrmd_t * lrmd); /*! * \brief Disconnect from the lrmd. * * \retval 0, success * \retval negative error code on failure */ int (*disconnect) (lrmd_t * lrmd); /*! * \brief Register a resource with the lrmd. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \retval 0, success * \retval negative error code on failure */ int (*register_rsc) (lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options); /*! * \brief Retrieve registration info for a rsc * * \retval info on success * \retval NULL on failure */ lrmd_rsc_info_t *(*get_rsc_info) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Unregister a resource from the lrmd. * * \note All pending and recurring operations will be cancelled * automatically. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \retval 0, success * \retval -1, success, but operations are currently executing on the rsc which will * return once they are completed. * \retval negative error code on failure * */ int (*unregister_rsc) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Sets the callback to receive lrmd events on. */ void (*set_callback) (lrmd_t * lrmd, lrmd_event_callback callback); /*! * \brief Issue a command on a resource * * \note Asynchronous, command is queued in daemon on function return, but * execution of command is not synced. * * \note Operations on individual resources are guaranteed to occur * in the order the client api calls them in. * * \note Operations between different resources are not guaranteed * to occur in any specific order in relation to one another * regardless of what order the client api is called in. * \retval call_id to track async event result on success * \retval negative error code on failure */ int (*exec) (lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, /* userdata string given back in event notification */ int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params); /* ownership of params is given up to api here */ /*! * \brief Cancel a recurring command. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \note The cancel is completed async from this call. * We can be guaranteed the cancel has completed once * the callback receives an exec_complete event with * the lrmd_op_status signifying that the operation is * cancelled. * \note For each resource, cancel operations and exec operations * are processed in the order they are received. * It is safe to assume that for a single resource, a cancel * will occur in the lrmd before an exec if the client's cancel * api call occurs before the exec api call. * * It is not however safe to assume any operation on one resource will * occur before an operation on another resource regardless of * the order the client api is called in. * * \retval 0, cancel command sent. * \retval negative error code on failure */ int (*cancel) (lrmd_t * lrmd, const char *rsc_id, const char *action, int interval); /*! * \brief Get the metadata documentation for a resource. * * \note Value is returned in output. Output must be freed when set * * \retval lrmd_ok success * \retval negative error code on failure */ int (*get_metadata) (lrmd_t * lrmd, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options); /*! * \brief Retrieve a list of installed resource agents. * * \note if class is not provided, all known agents will be returned * \note list must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_agents) (lrmd_t * lrmd, lrmd_list_t ** agents, const char *class, const char *provider); /*! * \brief Retrieve a list of resource agent providers * * \note When the agent is provided, only the agent's provider will be returned * \note When no agent is supplied, all providers will be returned. * \note List must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_ocf_providers) (lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers); /*! * \brief Retrieve a list of standards supported by this machine/installation * * \note List must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_standards) (lrmd_t * lrmd, lrmd_list_t ** standards); } lrmd_api_operations_t; struct lrmd_s { lrmd_api_operations_t *cmds; void *private; }; static inline const char * lrmd_event_type2str(enum lrmd_callback_event type) { switch (type) { case lrmd_event_register: return "register"; case lrmd_event_unregister: return "unregister"; case lrmd_event_exec_complete: return "exec_complete"; case lrmd_event_disconnect: return "disconnect"; case lrmd_event_connect: return "connect"; case lrmd_event_poke: return "poke"; case lrmd_event_new_client: return "new_client"; } return "unknown"; } #endif diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index 45af41adcd..61be546236 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -1,441 +1,442 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef XML_TAGS__H # define XML_TAGS__H # ifndef F_ORIG # define F_ORIG "src" # endif # ifndef F_SEQ # define F_SEQ "seq" # endif # ifndef F_SUBTYPE # define F_SUBTYPE "subt" # endif # ifndef F_TYPE # define F_TYPE "t" # endif # ifndef F_CLIENTNAME # define F_CLIENTNAME "cn" # endif # ifndef F_XML_TAGNAME # define F_XML_TAGNAME "__name__" # endif # ifndef T_CRM # define T_CRM "crmd" # endif # ifndef T_ATTRD # define T_ATTRD "attrd" # endif # define CIB_OPTIONS_FIRST "cib-bootstrap-options" # define F_CRM_DATA "crm_xml" # define F_CRM_TASK "crm_task" # define F_CRM_HOST_TO "crm_host_to" # define F_CRM_MSG_TYPE F_SUBTYPE # define F_CRM_SYS_TO "crm_sys_to" # define F_CRM_SYS_FROM "crm_sys_from" # define F_CRM_HOST_FROM F_ORIG # define F_CRM_REFERENCE XML_ATTR_REFERENCE # define F_CRM_VERSION XML_ATTR_VERSION # define F_CRM_ORIGIN "origin" # define F_CRM_USER "crm_user" # define F_CRM_JOIN_ID "join_id" # define F_CRM_DC_LEAVING "dc-leaving" # define F_CRM_ELECTION_ID "election-id" # define F_CRM_ELECTION_AGE_S "election-age-sec" # define F_CRM_ELECTION_AGE_US "election-age-nano-sec" # define F_CRM_ELECTION_OWNER "election-owner" # define F_CRM_TGRAPH "crm-tgraph" # define F_CRM_TGRAPH_INPUT "crm-tgraph-in" # define F_CRM_THROTTLE_MODE "crm-limit-mode" # define F_CRM_THROTTLE_MAX "crm-limit-max" /*---- Common tags/attrs */ # define XML_DIFF_MARKER "__crm_diff_marker__" # define XML_ATTR_TAGNAME F_XML_TAGNAME # define XML_TAG_CIB "cib" # define XML_TAG_FAILED "failed" # define XML_ATTR_CRM_VERSION "crm_feature_set" # define XML_ATTR_DIGEST "digest" # define XML_ATTR_VALIDATION "validate-with" +# define XML_ATTR_RA_VERSION "ra-version" # define XML_ATTR_QUORUM_PANIC "no-quorum-panic" # define XML_ATTR_HAVE_QUORUM "have-quorum" # define XML_ATTR_HAVE_WATCHDOG "have-watchdog" # define XML_ATTR_EXPECTED_VOTES "expected-quorum-votes" # define XML_ATTR_GENERATION "epoch" # define XML_ATTR_GENERATION_ADMIN "admin_epoch" # define XML_ATTR_NUMUPDATES "num_updates" # define XML_ATTR_TIMEOUT "timeout" # define XML_ATTR_ORIGIN "crm-debug-origin" # define XML_ATTR_TSTAMP "crm-timestamp" # define XML_CIB_ATTR_WRITTEN "cib-last-written" # define XML_ATTR_VERSION "version" # define XML_ATTR_DESC "description" # define XML_ATTR_ID "id" # define XML_ATTR_IDREF "id-ref" # define XML_ATTR_ID_LONG "long-id" # define XML_ATTR_TYPE "type" # define XML_ATTR_FILTER_TYPE "type-filter" # define XML_ATTR_FILTER_ID "id-filter" # define XML_ATTR_FILTER_PRIORITY "priority-filter" # define XML_ATTR_VERBOSE "verbose" # define XML_ATTR_OP "op" # define XML_ATTR_DC "is_dc" # define XML_ATTR_DC_UUID "dc-uuid" # define XML_ATTR_UPDATE_ORIG "update-origin" # define XML_ATTR_UPDATE_CLIENT "update-client" # define XML_ATTR_UPDATE_USER "update-user" # define XML_BOOLEAN_TRUE "true" # define XML_BOOLEAN_FALSE "false" # define XML_BOOLEAN_YES XML_BOOLEAN_TRUE # define XML_BOOLEAN_NO XML_BOOLEAN_FALSE # define XML_TAG_OPTIONS "options" /*---- top level tags/attrs */ # define XML_MSG_TAG "crm_message" # define XML_MSG_TAG_DATA "msg_data" # define XML_ATTR_REQUEST "request" # define XML_ATTR_RESPONSE "response" # define XML_ATTR_UNAME "uname" # define XML_ATTR_UUID "id" # define XML_ATTR_REFERENCE "reference" # define XML_FAIL_TAG_RESOURCE "failed_resource" # define XML_FAILRES_ATTR_RESID "resource_id" # define XML_FAILRES_ATTR_REASON "reason" # define XML_FAILRES_ATTR_RESSTATUS "resource_status" # define XML_CRM_TAG_PING "ping_response" # define XML_PING_ATTR_STATUS "result" # define XML_PING_ATTR_SYSFROM "crm_subsystem" # define XML_TAG_FRAGMENT "cib_fragment" # define XML_ATTR_RESULT "result" # define XML_ATTR_SECTION "section" # define XML_FAIL_TAG_CIB "failed_update" # define XML_FAILCIB_ATTR_ID "id" # define XML_FAILCIB_ATTR_OBJTYPE "object_type" # define XML_FAILCIB_ATTR_OP "operation" # define XML_FAILCIB_ATTR_REASON "reason" /*---- CIB specific tags/attrs */ # define XML_CIB_TAG_SECTION_ALL "all" # define XML_CIB_TAG_CONFIGURATION "configuration" # define XML_CIB_TAG_STATUS "status" # define XML_CIB_TAG_RESOURCES "resources" # define XML_CIB_TAG_NODES "nodes" # define XML_CIB_TAG_DOMAINS "domains" # define XML_CIB_TAG_CONSTRAINTS "constraints" # define XML_CIB_TAG_CRMCONFIG "crm_config" # define XML_CIB_TAG_OPCONFIG "op_defaults" # define XML_CIB_TAG_RSCCONFIG "rsc_defaults" # define XML_CIB_TAG_ACLS "acls" # define XML_CIB_TAG_ALERTS "alerts" # define XML_CIB_TAG_ALERT "alert" # define XML_CIB_TAG_ALERT_RECIPIENT "recipient" # define XML_CIB_TAG_STATE "node_state" # define XML_CIB_TAG_NODE "node" # define XML_CIB_TAG_DOMAIN "domain" # define XML_CIB_TAG_CONSTRAINT "constraint" # define XML_CIB_TAG_NVPAIR "nvpair" # define XML_CIB_TAG_PROPSET "cluster_property_set" # define XML_TAG_ATTR_SETS "instance_attributes" # define XML_TAG_META_SETS "meta_attributes" # define XML_TAG_ATTRS "attributes" #ifdef ENABLE_VERSIONED_ATTRS # define XML_TAG_VER_ATTRS "versioned_attributes" #endif # define XML_TAG_PARAMS "parameters" # define XML_TAG_PARAM "param" # define XML_TAG_UTILIZATION "utilization" # define XML_TAG_RESOURCE_REF "resource_ref" # define XML_CIB_TAG_RESOURCE "primitive" # define XML_CIB_TAG_GROUP "group" # define XML_CIB_TAG_INCARNATION "clone" # define XML_CIB_TAG_MASTER "master" # define XML_CIB_TAG_CONTAINER "bundle" # define XML_CIB_TAG_RSC_TEMPLATE "template" # define XML_RSC_ATTR_ISOLATION_INSTANCE "isolation-instance" # define XML_RSC_ATTR_ISOLATION_WRAPPER "isolation-wrapper" # define XML_RSC_ATTR_ISOLATION_HOST "isolation-host" # define XML_RSC_ATTR_ISOLATION "isolation" # define XML_RSC_ATTR_RESTART "restart-type" # define XML_RSC_ATTR_ORDERED "ordered" # define XML_RSC_ATTR_INTERLEAVE "interleave" # define XML_RSC_ATTR_INCARNATION "clone" # define XML_RSC_ATTR_INCARNATION_MAX "clone-max" # define XML_RSC_ATTR_INCARNATION_MIN "clone-min" # define XML_RSC_ATTR_INCARNATION_NODEMAX "clone-node-max" # define XML_RSC_ATTR_MASTER_MAX "master-max" # define XML_RSC_ATTR_MASTER_NODEMAX "master-node-max" # define XML_RSC_ATTR_STATE "clone-state" # define XML_RSC_ATTR_MANAGED "is-managed" # define XML_RSC_ATTR_TARGET_ROLE "target-role" # define XML_RSC_ATTR_UNIQUE "globally-unique" # define XML_RSC_ATTR_NOTIFY "notify" # define XML_RSC_ATTR_STICKINESS "resource-stickiness" # define XML_RSC_ATTR_FAIL_STICKINESS "migration-threshold" # define XML_RSC_ATTR_FAIL_TIMEOUT "failure-timeout" # define XML_RSC_ATTR_MULTIPLE "multiple-active" # define XML_RSC_ATTR_PRIORITY "priority" # define XML_RSC_ATTR_REQUIRES "requires" # define XML_RSC_ATTR_PROVIDES "provides" # define XML_RSC_ATTR_CONTAINER "container" # define XML_RSC_ATTR_INTERNAL_RSC "internal_rsc" # define XML_RSC_ATTR_MAINTENANCE "maintenance" # define XML_RSC_ATTR_REMOTE_NODE "remote-node" # define XML_RSC_ATTR_CLEAR_OP "clear_failure_op" # define XML_RSC_ATTR_CLEAR_INTERVAL "clear_failure_interval" # define XML_REMOTE_ATTR_RECONNECT_INTERVAL "reconnect_interval" # define XML_OP_ATTR_ON_FAIL "on-fail" # define XML_OP_ATTR_START_DELAY "start-delay" # define XML_OP_ATTR_ALLOW_MIGRATE "allow-migrate" # define XML_OP_ATTR_DEPENDENT "dependent-on" # define XML_OP_ATTR_ORIGIN "interval-origin" # define XML_OP_ATTR_PENDING "record-pending" # define XML_CIB_TAG_LRM "lrm" # define XML_LRM_TAG_RESOURCES "lrm_resources" # define XML_LRM_TAG_RESOURCE "lrm_resource" # define XML_LRM_TAG_AGENTS "lrm_agents" # define XML_LRM_TAG_AGENT "lrm_agent" # define XML_LRM_TAG_RSC_OP "lrm_rsc_op" # define XML_AGENT_ATTR_CLASS "class" # define XML_AGENT_ATTR_PROVIDER "provider" # define XML_LRM_TAG_ATTRIBUTES "attributes" # define XML_CIB_ATTR_REPLACE "replace" # define XML_CIB_ATTR_SOURCE "source" # define XML_CIB_ATTR_HEALTH "health" # define XML_CIB_ATTR_WEIGHT "weight" # define XML_CIB_ATTR_PRIORITY "priority" # define XML_CIB_ATTR_CLEAR "clear_on" # define XML_CIB_ATTR_SOURCE "source" # define XML_NODE_JOIN_STATE "join" # define XML_NODE_EXPECTED "expected" # define XML_NODE_IN_CLUSTER "in_ccm" # define XML_NODE_IS_PEER "crmd" # define XML_NODE_IS_REMOTE "remote_node" # define XML_NODE_IS_FENCED "node_fenced" # define XML_NODE_IS_MAINTENANCE "node_in_maintenance" # define XML_CIB_ATTR_SHUTDOWN "shutdown" # define XML_CIB_ATTR_STONITH "stonith" # define XML_CIB_ATTR_STANDBY "standby" /* LRM is a bit of a misnomer here; the crmd and pengine use these to track * actions, which usually but not always are LRM operations */ # define XML_LRM_ATTR_INTERVAL "interval" # define XML_LRM_ATTR_TASK "operation" # define XML_LRM_ATTR_TASK_KEY "operation_key" # define XML_LRM_ATTR_TARGET "on_node" # define XML_LRM_ATTR_TARGET_UUID "on_node_uuid" /*! Actions to be executed on Pacemaker Remote nodes are routed through * crmd on the cluster node hosting the remote connection. That cluster node * is considered the router node for the action. */ # define XML_LRM_ATTR_ROUTER_NODE "router_node" # define XML_LRM_ATTR_RSCID "rsc-id" # define XML_LRM_ATTR_OPSTATUS "op-status" # define XML_LRM_ATTR_RC "rc-code" # define XML_LRM_ATTR_CALLID "call-id" # define XML_LRM_ATTR_OP_DIGEST "op-digest" # define XML_LRM_ATTR_OP_RESTART "op-force-restart" # define XML_LRM_ATTR_OP_SECURE "op-secure-params" # define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest" # define XML_LRM_ATTR_SECURE_DIGEST "op-secure-digest" # define XML_LRM_ATTR_EXIT_REASON "exit-reason" # define XML_RSC_OP_LAST_CHANGE "last-rc-change" # define XML_RSC_OP_LAST_RUN "last-run" # define XML_RSC_OP_T_EXEC "exec-time" # define XML_RSC_OP_T_QUEUE "queue-time" # define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source" # define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target" # define XML_TAG_GRAPH "transition_graph" # define XML_GRAPH_TAG_RSC_OP "rsc_op" # define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event" # define XML_GRAPH_TAG_CRM_EVENT "crm_event" # define XML_GRAPH_TAG_DOWNED "downed" # define XML_GRAPH_TAG_MAINTENANCE "maintenance" # define XML_TAG_RULE "rule" # define XML_RULE_ATTR_SCORE "score" # define XML_RULE_ATTR_SCORE_ATTRIBUTE "score-attribute" /* following has no use (hardly ever meaningful); kept for compatibility */ # define XML_RULE_ATTR_SCORE_MANGLED "score-attribute-mangled" # define XML_RULE_ATTR_ROLE "role" # define XML_RULE_ATTR_RESULT "result" # define XML_RULE_ATTR_BOOLEAN_OP "boolean-op" # define XML_TAG_EXPRESSION "expression" # define XML_EXPR_ATTR_ATTRIBUTE "attribute" # define XML_EXPR_ATTR_OPERATION "operation" # define XML_EXPR_ATTR_VALUE "value" # define XML_EXPR_ATTR_TYPE "type" # define XML_EXPR_ATTR_VALUE_SOURCE "value-source" # define XML_CONS_TAG_RSC_DEPEND "rsc_colocation" # define XML_CONS_TAG_RSC_ORDER "rsc_order" # define XML_CONS_TAG_RSC_LOCATION "rsc_location" # define XML_CONS_TAG_RSC_TICKET "rsc_ticket" # define XML_CONS_TAG_RSC_SET "resource_set" # define XML_CONS_ATTR_SYMMETRICAL "symmetrical" # define XML_LOCATION_ATTR_DISCOVERY "resource-discovery" # define XML_COLOC_ATTR_SOURCE "rsc" # define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role" # define XML_COLOC_ATTR_TARGET "with-rsc" # define XML_COLOC_ATTR_TARGET_ROLE "with-rsc-role" # define XML_COLOC_ATTR_NODE_ATTR "node-attribute" # define XML_COLOC_ATTR_SOURCE_INSTANCE "rsc-instance" # define XML_COLOC_ATTR_TARGET_INSTANCE "with-rsc-instance" # define XML_LOC_ATTR_SOURCE "rsc" # define XML_LOC_ATTR_SOURCE_PATTERN "rsc-pattern" # define XML_ORDER_ATTR_FIRST "first" # define XML_ORDER_ATTR_THEN "then" # define XML_ORDER_ATTR_FIRST_ACTION "first-action" # define XML_ORDER_ATTR_THEN_ACTION "then-action" # define XML_ORDER_ATTR_FIRST_INSTANCE "first-instance" # define XML_ORDER_ATTR_THEN_INSTANCE "then-instance" # define XML_ORDER_ATTR_KIND "kind" # define XML_TICKET_ATTR_TICKET "ticket" # define XML_TICKET_ATTR_LOSS_POLICY "loss-policy" # define XML_NVPAIR_ATTR_NAME "name" # define XML_NVPAIR_ATTR_VALUE "value" # define XML_NODE_ATTR_STATE "state" # define XML_NODE_ATTR_RSC_DISCOVERY "resource-discovery-enabled" # define XML_CONFIG_ATTR_DC_DEADTIME "dc-deadtime" # define XML_CONFIG_ATTR_ELECTION_FAIL "election-timeout" # define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation" # define XML_CONFIG_ATTR_RECHECK "cluster-recheck-interval" # define XML_ALERT_ATTR_PATH "path" # define XML_ALERT_ATTR_TIMEOUT "timeout" # define XML_ALERT_ATTR_TSTAMP_FORMAT "timestamp-format" # define XML_ALERT_ATTR_REC_VALUE "value" # define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple" # define XML_ATTR_TRANSITION_MAGIC "transition-magic" # define XML_ATTR_TRANSITION_KEY "transition-key" # define XML_ATTR_TE_NOWAIT "op_no_wait" # define XML_ATTR_TE_TARGET_RC "op_target_rc" # define XML_ATTR_LRM_PROBE "lrm-is-probe" # define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes" # define XML_TAG_DIFF_ADDED "diff-added" # define XML_TAG_DIFF_REMOVED "diff-removed" # define XML_ACL_TAG_USER "acl_target" # define XML_ACL_TAG_USERv1 "acl_user" # define XML_ACL_TAG_GROUP "acl_group" # define XML_ACL_TAG_ROLE "acl_role" # define XML_ACL_TAG_PERMISSION "acl_permission" # define XML_ACL_TAG_ROLE_REF "role" # define XML_ACL_TAG_ROLE_REFv1 "role_ref" # define XML_ACL_ATTR_KIND "kind" # define XML_ACL_TAG_READ "read" # define XML_ACL_TAG_WRITE "write" # define XML_ACL_TAG_DENY "deny" # define XML_ACL_ATTR_REF "reference" # define XML_ACL_ATTR_REFv1 "ref" # define XML_ACL_ATTR_TAG "object-type" # define XML_ACL_ATTR_TAGv1 "tag" # define XML_ACL_ATTR_XPATH "xpath" # define XML_ACL_ATTR_ATTRIBUTE "attribute" # define XML_CIB_TAG_TICKETS "tickets" # define XML_CIB_TAG_TICKET_STATE "ticket_state" # define XML_CIB_TAG_TAGS "tags" # define XML_CIB_TAG_TAG "tag" # define XML_CIB_TAG_OBJ_REF "obj_ref" # define XML_TAG_FENCING_TOPOLOGY "fencing-topology" # define XML_TAG_FENCING_LEVEL "fencing-level" # define XML_ATTR_STONITH_INDEX "index" # define XML_ATTR_STONITH_TARGET "target" # define XML_ATTR_STONITH_TARGET_VALUE "target-value" # define XML_ATTR_STONITH_TARGET_PATTERN "target-pattern" # define XML_ATTR_STONITH_TARGET_ATTRIBUTE "target-attribute" # define XML_ATTR_STONITH_DEVICES "devices" # define XML_TAG_DIFF "diff" # define XML_DIFF_VERSION "version" # define XML_DIFF_VSOURCE "source" # define XML_DIFF_VTARGET "target" # define XML_DIFF_CHANGE "change" # define XML_DIFF_LIST "change-list" # define XML_DIFF_ATTR "change-attr" # define XML_DIFF_RESULT "change-result" # define XML_DIFF_OP "operation" # define XML_DIFF_PATH "path" # define XML_DIFF_POSITION "position" /* Defined for backward API compatibility but no longer used by Pacemaker */ # define XML_ATTR_TE_ALLOWFAIL "op_allow_fail" # include # define ID(x) crm_element_value(x, XML_ATTR_ID) # define INSTANCE(x) crm_element_value(x, XML_CIB_ATTR_INSTANCE) # define TSTAMP(x) crm_element_value(x, XML_ATTR_TSTAMP) # define TYPE(x) crm_element_name(x) # define NAME(x) crm_element_value(x, XML_NVPAIR_ATTR_NAME) # define VALUE(x) crm_element_value(x, XML_NVPAIR_ATTR_VALUE) #endif diff --git a/include/crm/pengine/rules.h b/include/crm/pengine/rules.h index 8e058796a4..32b3aca2d1 100644 --- a/include/crm/pengine/rules.h +++ b/include/crm/pengine/rules.h @@ -1,84 +1,86 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PENGINE_RULES__H # define PENGINE_RULES__H # include # include # include # include # include enum expression_type { not_expr, nested_rule, attr_expr, loc_expr, role_expr, time_expr, #ifdef ENABLE_VERSIONED_ATTRS version_expr #endif }; typedef struct pe_re_match_data { char *string; int nregs; regmatch_t *pmatch; } pe_re_match_data_t; typedef struct pe_match_data { pe_re_match_data_t *re; GHashTable *params; GHashTable *meta; } pe_match_data_t; enum expression_type find_expression_type(xmlNode * expr); gboolean test_ruleset(xmlNode * ruleset, GHashTable * node_hash, crm_time_t * now); gboolean test_rule(xmlNode * rule, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now); gboolean pe_test_rule_re(xmlNode * rule, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_re_match_data_t * re_match_data); gboolean pe_test_rule_full(xmlNode * rule, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_match_data_t * match_data); gboolean test_expression(xmlNode * expr, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now); gboolean pe_test_expression_re(xmlNode * expr, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_re_match_data_t * re_match_data); gboolean pe_test_expression_full(xmlNode * expr, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_match_data_t * match_data); void unpack_instance_attributes(xmlNode * top, xmlNode * xml_obj, const char *set_name, GHashTable * node_hash, GHashTable * hash, const char *always_first, gboolean overwrite, crm_time_t * now); #ifdef ENABLE_VERSIONED_ATTRS void pe_unpack_versioned_attributes(xmlNode * top, xmlNode * xml_obj, const char *set_name, GHashTable * node_hash, xmlNode * hash, crm_time_t * now); #endif char *pe_expand_re_matches(const char *string, pe_re_match_data_t * match_data); +GHashTable *pe_unpack_versioned_parameters(xmlNode *versioned_params, const char *ra_version); + #endif diff --git a/include/crm_internal.h b/include/crm_internal.h index 7944c8a30e..e40b4ba625 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -1,395 +1,390 @@ /* crm_internal.h */ /* * Copyright (C) 2006 - 2008 * Andrew Beekhof * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_INTERNAL__H # define CRM_INTERNAL__H # include # include # include # include # include # include # include # include # include /* Dynamic loading of libraries */ void *find_library_function(void **handle, const char *lib, const char *fn, int fatal); void *convert_const_pointer(const void *ptr); /* For ACLs */ char *uid2username(uid_t uid); void determine_request_user(const char *user, xmlNode * request, const char *field); const char *crm_acl_get_set_user(xmlNode * request, const char *field, const char *peer_user); # if ENABLE_ACL # include static inline gboolean is_privileged(const char *user) { if (user == NULL) { return FALSE; } else if (strcmp(user, CRM_DAEMON_USER) == 0) { return TRUE; } else if (strcmp(user, "root") == 0) { return TRUE; } return FALSE; } # endif /* CLI option processing*/ # ifdef HAVE_GETOPT_H # include # else # define no_argument 0 # define required_argument 1 # endif # define pcmk_option_default 0x00000 # define pcmk_option_hidden 0x00001 # define pcmk_option_paragraph 0x00002 # define pcmk_option_example 0x00004 struct crm_option { /* Fields from 'struct option' in getopt.h */ /* name of long option */ const char *name; /* * one of no_argument, required_argument, and optional_argument: * whether option takes an argument */ int has_arg; /* if not NULL, set *flag to val when option found */ int *flag; /* if flag not NULL, value to set *flag to; else return value */ int val; /* Custom fields */ const char *desc; long flags; }; void crm_set_options(const char *short_options, const char *usage, struct crm_option *long_options, const char *app_desc); int crm_get_option(int argc, char **argv, int *index); int crm_get_option_long(int argc, char **argv, int *index, const char **longname); int crm_help(char cmd, int exit_code); /* Cluster Option Processing */ typedef struct pe_cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; gboolean(*is_valid) (const char *); const char *description_short; const char *description_long; } pe_cluster_option; const char *cluster_option(GHashTable * options, gboolean(*validate) (const char *), const char *name, const char *old_name, const char *def_value); const char *get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name); void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option * option_list, int len); void verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len); gboolean check_time(const char *value); gboolean check_timer(const char *value); gboolean check_boolean(const char *value); gboolean check_number(const char *value); gboolean check_positive_number(const char *value); gboolean check_quorum(const char *value); gboolean check_script(const char *value); gboolean check_utilization(const char *value); long crm_get_sbd_timeout(void); gboolean check_sbd_timeout(const char *value); /* Shared PE/crmd functionality */ void filter_action_parameters(xmlNode * param_set, const char *version); /* Resource operation updates */ xmlNode *create_operation_update(xmlNode * parent, lrmd_event_data_t * event, const char * caller_version, int target_rc, const char * node, const char * origin, int level); /* char2score */ extern int node_score_red; extern int node_score_green; extern int node_score_yellow; extern int node_score_infinity; /* Assorted convenience functions */ int crm_pid_active(long pid, const char *daemon); void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile); char *generate_op_key(const char *rsc_id, const char *op_type, int interval); char *generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type); char *generate_transition_magic_v202(const char *transition_key, int op_status); char *generate_transition_magic(const char *transition_key, int op_status, int op_rc); char *generate_transition_key(int action, int transition_id, int target_rc, const char *node); static inline long long crm_clear_bit(const char *function, int line, const char *target, long long word, long long bit) { long long rc = (word & ~bit); if (rc == word) { /* Unchanged */ } else if (target) { crm_trace("Bit 0x%.8llx for %s cleared by %s:%d", bit, target, function, line); } else { crm_trace("Bit 0x%.8llx cleared by %s:%d", bit, function, line); } return rc; } static inline long long crm_set_bit(const char *function, int line, const char *target, long long word, long long bit) { long long rc = (word | bit); if (rc == word) { /* Unchanged */ } else if (target) { crm_trace("Bit 0x%.8llx for %s set by %s:%d", bit, target, function, line); } else { crm_trace("Bit 0x%.8llx set by %s:%d", bit, function, line); } return rc; } # define set_bit(word, bit) word = crm_set_bit(__FUNCTION__, __LINE__, NULL, word, bit) # define clear_bit(word, bit) word = crm_clear_bit(__FUNCTION__, __LINE__, NULL, word, bit) char *generate_hash_key(const char *crm_msg_reference, const char *sys); /*! remote tcp/tls helper functions */ typedef struct crm_remote_s crm_remote_t; int crm_remote_send(crm_remote_t * remote, xmlNode * msg); int crm_remote_ready(crm_remote_t * remote, int total_timeout /*ms */ ); gboolean crm_remote_recv(crm_remote_t * remote, int total_timeout /*ms */ , int *disconnected); xmlNode *crm_remote_parse_buffer(crm_remote_t * remote); int crm_remote_tcp_connect(const char *host, int port); int crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ int *timer_id, void *userdata, void (*callback) (void *userdata, int sock)); int crm_remote_accept(int ssock); void crm_sockaddr2str(void *sa, char *s); # ifdef HAVE_GNUTLS_GNUTLS_H /*! * \internal * \brief Initiate the client handshake after establishing the tcp socket. * \note This is a blocking function, it will block until the entire handshake * is complete or until the timeout period is reached. * \retval 0 success * \retval negative, failure */ int crm_initiate_client_tls_handshake(crm_remote_t * remote, int timeout_ms); /*! * \internal * \brief Create client or server session for anon DH encryption credentials * \param sock, the socket the session will use for transport * \param type, GNUTLS_SERVER or GNUTLS_CLIENT * \param credentials, gnutls_anon_server_credentials_t or gnutls_anon_client_credentials_t * * \retval gnutls_session_t * on success * \retval NULL on failure */ void *crm_create_anon_tls_session(int sock, int type, void *credentials); /*! * \internal * \brief Create client or server session for PSK credentials * \param sock, the socket the session will use for transport * \param type, GNUTLS_SERVER or GNUTLS_CLIENT * \param credentials, gnutls_psk_server_credentials_t or gnutls_osk_client_credentials_t * * \retval gnutls_session_t * on success * \retval NULL on failure */ void *create_psk_tls_session(int csock, int type, void *credentials); # endif # define REMOTE_MSG_TERMINATOR "\r\n\r\n" const char *daemon_option(const char *option); void set_daemon_option(const char *option, const char *value); gboolean daemon_option_enabled(const char *daemon, const char *option); void strip_text_nodes(xmlNode * xml); void pcmk_panic(const char *origin); void sysrq_init(void); pid_t pcmk_locate_sbd(void); long crm_pidfile_inuse(const char *filename, long mypid, const char *daemon); long crm_read_pidfile(const char *filename); # define crm_config_err(fmt...) { crm_config_error = TRUE; crm_err(fmt); } # define crm_config_warn(fmt...) { crm_config_warning = TRUE; crm_warn(fmt); } # define attrd_channel T_ATTRD # define F_ATTRD_KEY "attr_key" # define F_ATTRD_ATTRIBUTE "attr_name" # define F_ATTRD_REGEX "attr_regex" # define F_ATTRD_TASK "task" # define F_ATTRD_VALUE "attr_value" # define F_ATTRD_SET "attr_set" # define F_ATTRD_IS_REMOTE "attr_is_remote" # define F_ATTRD_IS_PRIVATE "attr_is_private" # define F_ATTRD_SECTION "attr_section" # define F_ATTRD_DAMPEN "attr_dampening" # define F_ATTRD_IGNORE_LOCALLY "attr_ignore_locally" # define F_ATTRD_HOST "attr_host" # define F_ATTRD_HOST_ID "attr_host_id" # define F_ATTRD_USER "attr_user" # define F_ATTRD_WRITER "attr_writer" # define F_ATTRD_VERSION "attr_version" # define F_ATTRD_RESOURCE "attr_resource" # define F_ATTRD_OPERATION "attr_clear_operation" # define F_ATTRD_INTERVAL "attr_clear_interval" /* attrd operations */ # define ATTRD_OP_PEER_REMOVE "peer-remove" # define ATTRD_OP_UPDATE "update" # define ATTRD_OP_UPDATE_BOTH "update-both" # define ATTRD_OP_UPDATE_DELAY "update-delay" # define ATTRD_OP_QUERY "query" # define ATTRD_OP_REFRESH "refresh" # define ATTRD_OP_FLUSH "flush" # define ATTRD_OP_SYNC "sync" # define ATTRD_OP_SYNC_RESPONSE "sync-response" # define ATTRD_OP_CLEAR_FAILURE "clear-failure" # if SUPPORT_COROSYNC # if CS_USES_LIBQB # include # include typedef struct qb_ipc_request_header cs_ipc_header_request_t; typedef struct qb_ipc_response_header cs_ipc_header_response_t; # else # include # include # include typedef coroipc_request_header_t cs_ipc_header_request_t; typedef coroipc_response_header_t cs_ipc_header_response_t; # endif # else typedef struct { int size __attribute__ ((aligned(8))); int id __attribute__ ((aligned(8))); } __attribute__ ((aligned(8))) cs_ipc_header_request_t; typedef struct { int size __attribute__ ((aligned(8))); int id __attribute__ ((aligned(8))); int error __attribute__ ((aligned(8))); } __attribute__ ((aligned(8))) cs_ipc_header_response_t; # endif void attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); qb_ipcs_service_t * crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb); void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb); void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm); static inline void *realloc_safe(void *ptr, size_t size) { void *ret = realloc(ptr, size); if (ret == NULL) { free(ptr); /* make coverity happy */ abort(); } return ret; } const char *crm_xml_add_last_written(xmlNode *xml_node); void crm_xml_dump(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth); void crm_buffer_add_char(char **buffer, int *offset, int *max, char c); gboolean crm_digest_verify(xmlNode *input, const char *expected); /* cross-platform compatibility functions */ char *crm_compat_realpath(const char *path); /* IPC Proxy Backend Shared Functions */ typedef struct remote_proxy_s { char *node_name; char *session_id; gboolean is_local; crm_ipc_t *ipc; mainloop_io_t *source; uint32_t last_request_id; lrmd_t *lrm; } remote_proxy_t; remote_proxy_t *remote_proxy_new( lrmd_t *lrmd, struct ipc_client_callbacks *proxy_callbacks, const char *node_name, const char *session_id, const char *channel); int remote_proxy_check(lrmd_t *lrmd, GHashTable *hash); void remote_proxy_cb(lrmd_t *lrmd, const char *node_name, xmlNode *msg); void remote_proxy_ack_shutdown(lrmd_t *lrmd); void remote_proxy_nack_shutdown(lrmd_t *lrmd); int remote_proxy_dispatch(const char *buffer, ssize_t length, gpointer userdata); void remote_proxy_disconnected(gpointer data); void remote_proxy_free(gpointer data); void remote_proxy_relay_event(remote_proxy_t *proxy, xmlNode *msg); void remote_proxy_relay_response(remote_proxy_t *proxy, xmlNode *msg, int msg_id); - -#ifdef ENABLE_VERSIONED_ATTRS -char* crm_versioned_param_summary(xmlNode *versioned_params, const char *name); -void crm_summarize_versioned_params(xmlNode *param_set, xmlNode *versioned_params); -#endif #endif /* CRM_INTERNAL__H */ diff --git a/lib/common/digest.c b/lib/common/digest.c index b96ed0030d..fb2b8517bf 100644 --- a/lib/common/digest.c +++ b/lib/common/digest.c @@ -1,311 +1,243 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #define BEST_EFFORT_STATUS 0 /*! * \brief Dump XML in a format used with v1 digests * * \param[in] an_xml_node Root of XML to dump * * \return Newly allocated buffer containing dumped XML */ static char * dump_xml_for_digest(xmlNode * an_xml_node) { char *buffer = NULL; int offset = 0, max = 0; /* for compatibility with the old result which is used for v1 digests */ crm_buffer_add_char(&buffer, &offset, &max, ' '); crm_xml_dump(an_xml_node, 0, &buffer, &offset, &max, 0); crm_buffer_add_char(&buffer, &offset, &max, '\n'); return buffer; } /*! * \brief Calculate and return v1 digest of XML tree * * \param[in] input Root of XML to digest * \param[in] sort Whether to sort the XML before calculating digest * \param[in] ignored Not used * * \return Newly allocated string containing digest * \note Example return value: "c048eae664dba840e1d2060f00299e9d" */ static char * calculate_xml_digest_v1(xmlNode * input, gboolean sort, gboolean ignored) { char *digest = NULL; char *buffer = NULL; xmlNode *copy = NULL; if (sort) { crm_trace("Sorting xml..."); copy = sorted_xml(input, NULL, TRUE); crm_trace("Done"); input = copy; } buffer = dump_xml_for_digest(input); CRM_CHECK(buffer != NULL && strlen(buffer) > 0, free_xml(copy); free(buffer); return NULL); digest = crm_md5sum(buffer); crm_log_xml_trace(input, "digest:source"); free(buffer); free_xml(copy); return digest; } /*! * \brief Calculate and return v2 digest of XML tree * * \param[in] source Root of XML to digest * \param[in] do_filter Whether to filter certain XML attributes * * \return Newly allocated string containing digest */ static char * calculate_xml_digest_v2(xmlNode * source, gboolean do_filter) { char *digest = NULL; char *buffer = NULL; int offset, max; static struct qb_log_callsite *digest_cs = NULL; crm_trace("Begin digest %s", do_filter?"filtered":""); if (do_filter && BEST_EFFORT_STATUS) { /* Exclude the status calculation from the digest * * This doesn't mean it won't be sync'd, we just won't be paranoid * about it being an _exact_ copy * * We don't need it to be exact, since we throw it away and regenerate * from our peers whenever a new DC is elected anyway * * Importantly, this reduces the amount of XML to copy+export as * well as the amount of data for MD5 needs to operate on */ } else { crm_xml_dump(source, do_filter ? xml_log_option_filtered : 0, &buffer, &offset, &max, 0); } CRM_ASSERT(buffer != NULL); digest = crm_md5sum(buffer); if (digest_cs == NULL) { digest_cs = qb_log_callsite_get(__func__, __FILE__, "cib-digest", LOG_TRACE, __LINE__, crm_trace_nonlog); } if (digest_cs && digest_cs->targets) { char *trace_file = crm_concat("/tmp/digest", digest, '-'); crm_trace("Saving %s.%s.%s to %s", crm_element_value(source, XML_ATTR_GENERATION_ADMIN), crm_element_value(source, XML_ATTR_GENERATION), crm_element_value(source, XML_ATTR_NUMUPDATES), trace_file); save_xml_to_file(source, "digest input", trace_file); free(trace_file); } free(buffer); crm_trace("End digest"); return digest; } /*! * \brief Calculate and return digest of XML tree, suitable for storing on disk * * \param[in] input Root of XML to digest * * \return Newly allocated string containing digest */ char * calculate_on_disk_digest(xmlNode * input) { /* Always use the v1 format for on-disk digests * a) it's a compatibility nightmare * b) we only use this once at startup, all other * invocations are in a separate child process */ return calculate_xml_digest_v1(input, FALSE, FALSE); } /*! * \brief Calculate and return digest of XML operation * * \param[in] input Root of XML to digest * \param[in] version Not used * * \return Newly allocated string containing digest */ char * calculate_operation_digest(xmlNode *input, const char *version) { /* We still need the sorting for operation digests */ return calculate_xml_digest_v1(input, TRUE, FALSE); } /*! * \brief Calculate and return digest of XML tree * * \param[in] input Root of XML to digest * \param[in] sort Whether to sort XML before calculating digest * \param[in] do_filter Whether to filter certain XML attributes * \param[in] version CRM feature set version (used to select v1/v2 digest) * * \return Newly allocated string containing digest */ char * calculate_xml_versioned_digest(xmlNode * input, gboolean sort, gboolean do_filter, const char *version) { /* * The sorting associated with v1 digest creation accounted for 23% of * the CIB's CPU usage on the server. v2 drops this. * * The filtering accounts for an additional 2.5% and we may want to * remove it in future. * * v2 also uses the xmlBuffer contents directly to avoid additional copying */ if (version == NULL || compare_version("3.0.5", version) > 0) { crm_trace("Using v1 digest algorithm for %s", crm_str(version)); return calculate_xml_digest_v1(input, sort, do_filter); } crm_trace("Using v2 digest algorithm for %s", crm_str(version)); return calculate_xml_digest_v2(input, do_filter); } /*! * \internal * \brief Return whether calculated digest of XML tree matches expected digest * * \param[in] input Root of XML to digest * \param[in] expected Expected digest in on-disk format * * \return TRUE if digests match, FALSE otherwise or on error */ gboolean crm_digest_verify(xmlNode *input, const char *expected) { char *calculated = NULL; gboolean passed; if (input != NULL) { calculated = calculate_on_disk_digest(input); if (calculated == NULL) { crm_perror(LOG_ERR, "Could not calculate digest for comparison"); return FALSE; } } passed = safe_str_eq(expected, calculated); if (passed) { crm_trace("Digest comparison passed: %s", calculated); } else { crm_err("Digest comparison failed: expected %s, calculated %s", expected, calculated); } free(calculated); return passed; } - -#ifdef ENABLE_VERSIONED_ATTRS -char* -crm_versioned_param_summary(xmlNode *versioned_params, const char *name) -{ - xmlNode *attrs = NULL; - xmlNode *attr = NULL; - char *summary = NULL; - gboolean found = FALSE; - - if (!name) { - return NULL; - } - - for (attrs = __xml_first_child(versioned_params); attrs != NULL; attrs = __xml_next_element(attrs)) { - for (attr = __xml_first_child(attrs); attr != NULL; attr = __xml_next_element(attr)) { - if (safe_str_eq((const char*) attr->name, XML_TAG_RULE)) { - const char *boolean_op = crm_element_value(attr, XML_RULE_ATTR_BOOLEAN_OP); - xmlNode *expr = NULL; - - if (boolean_op == NULL) { - boolean_op = "and"; - } - summary = add_list_element(summary, boolean_op); - - for (expr = __xml_first_child(attr); expr != NULL; expr = __xml_next_element(expr)) { - summary = add_list_element(summary, crm_element_value(expr, XML_EXPR_ATTR_OPERATION)); - summary = add_list_element(summary, crm_element_value(expr, XML_EXPR_ATTR_VALUE)); - } - } else if (safe_str_eq(crm_element_value(attr, XML_NVPAIR_ATTR_NAME), name)) { - found = TRUE; - summary = add_list_element(summary, crm_element_value(attr, XML_NVPAIR_ATTR_VALUE)); - break; - } - } - } - - if (!found) { - free(summary); - return NULL; - } - - return summary; -} - -void -crm_summarize_versioned_params(xmlNode *param_set, xmlNode *versioned_params) -{ - xmlNode *attrs = NULL; - xmlNode *attr = NULL; - - if (!param_set) { - return; - } - - for (attrs = __xml_first_child(versioned_params); attrs != NULL; attrs = __xml_next_element(attrs)) { - for (attr = __xml_first_child(attrs); attr != NULL; attr = __xml_next_element(attr)) { - const char *attr_name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); - - if (attr_name) { - char *summary = crm_versioned_param_summary(versioned_params, attr_name); - crm_xml_replace(param_set, attr_name, summary); - free(summary); - } - } - } -} -#endif diff --git a/lib/common/utils.c b/lib/common/utils.c index c53f73255e..9b8ba863f2 100644 --- a/lib/common/utils.c +++ b/lib/common/utils.c @@ -1,2009 +1,2006 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MAXLINE # define MAXLINE 512 #endif #ifdef HAVE_GETOPT_H # include #endif #ifndef PW_BUFFER_LEN # define PW_BUFFER_LEN 500 #endif CRM_TRACE_INIT_DATA(common); gboolean crm_config_error = FALSE; gboolean crm_config_warning = FALSE; char *crm_system_name = NULL; int node_score_red = 0; int node_score_green = 0; int node_score_yellow = 0; int node_score_infinity = INFINITY; static struct crm_option *crm_long_options = NULL; static const char *crm_app_description = NULL; static char *crm_short_options = NULL; static const char *crm_app_usage = NULL; int crm_exit(int rc) { mainloop_cleanup(); #if HAVE_LIBXML2 crm_trace("cleaning up libxml"); crm_xml_cleanup(); #endif crm_trace("exit %d", rc); qb_log_fini(); free(crm_short_options); free(crm_system_name); exit(ABS(rc)); /* Always exit with a positive value so that it can be passed to crm_error * * Otherwise the system wraps it around and people * have to jump through hoops figuring out what the * error was */ return rc; /* Can never happen, but allows return crm_exit(rc) * where "return rc" was used previously - which * keeps compilers happy. */ } gboolean check_time(const char *value) { if (crm_get_msec(value) < 5000) { return FALSE; } return TRUE; } gboolean check_timer(const char *value) { if (crm_get_msec(value) < 0) { return FALSE; } return TRUE; } gboolean check_boolean(const char *value) { int tmp = FALSE; if (crm_str_to_boolean(value, &tmp) != 1) { return FALSE; } return TRUE; } gboolean check_number(const char *value) { errno = 0; if (value == NULL) { return FALSE; } else if (safe_str_eq(value, MINUS_INFINITY_S)) { } else if (safe_str_eq(value, INFINITY_S)) { } else { crm_int_helper(value, NULL); } if (errno != 0) { return FALSE; } return TRUE; } gboolean check_positive_number(const char* value) { if (safe_str_eq(value, INFINITY_S) || (crm_int_helper(value, NULL))) { return TRUE; } return FALSE; } gboolean check_quorum(const char *value) { if (safe_str_eq(value, "stop")) { return TRUE; } else if (safe_str_eq(value, "freeze")) { return TRUE; } else if (safe_str_eq(value, "ignore")) { return TRUE; } else if (safe_str_eq(value, "suicide")) { return TRUE; } return FALSE; } gboolean check_script(const char *value) { struct stat st; if(safe_str_eq(value, "/dev/null")) { return TRUE; } if(stat(value, &st) != 0) { crm_err("Script %s does not exist", value); return FALSE; } if(S_ISREG(st.st_mode) == 0) { crm_err("Script %s is not a regular file", value); return FALSE; } if( (st.st_mode & (S_IXUSR | S_IXGRP )) == 0) { crm_err("Script %s is not executable", value); return FALSE; } return TRUE; } gboolean check_utilization(const char *value) { char *end = NULL; long number = strtol(value, &end, 10); if(end && end[0] != '%') { return FALSE; } else if(number < 0) { return FALSE; } return TRUE; } int char2score(const char *score) { int score_f = 0; if (score == NULL) { } else if (safe_str_eq(score, MINUS_INFINITY_S)) { score_f = -node_score_infinity; } else if (safe_str_eq(score, INFINITY_S)) { score_f = node_score_infinity; } else if (safe_str_eq(score, "+" INFINITY_S)) { score_f = node_score_infinity; } else if (safe_str_eq(score, "red")) { score_f = node_score_red; } else if (safe_str_eq(score, "yellow")) { score_f = node_score_yellow; } else if (safe_str_eq(score, "green")) { score_f = node_score_green; } else { score_f = crm_parse_int(score, NULL); if (score_f > 0 && score_f > node_score_infinity) { score_f = node_score_infinity; } else if (score_f < 0 && score_f < -node_score_infinity) { score_f = -node_score_infinity; } } return score_f; } char * score2char_stack(int score, char *buf, size_t len) { if (score >= node_score_infinity) { strncpy(buf, INFINITY_S, 9); } else if (score <= -node_score_infinity) { strncpy(buf, MINUS_INFINITY_S , 10); } else { return crm_itoa_stack(score, buf, len); } return buf; } char * score2char(int score) { if (score >= node_score_infinity) { return strdup(INFINITY_S); } else if (score <= -node_score_infinity) { return strdup("-" INFINITY_S); } return crm_itoa(score); } const char * cluster_option(GHashTable * options, gboolean(*validate) (const char *), const char *name, const char *old_name, const char *def_value) { const char *value = NULL; CRM_ASSERT(name != NULL); if (options != NULL) { value = g_hash_table_lookup(options, name); } if (value == NULL && old_name && options != NULL) { value = g_hash_table_lookup(options, old_name); if (value != NULL) { crm_config_warn("Using deprecated name '%s' for" " cluster option '%s'", old_name, name); g_hash_table_insert(options, strdup(name), strdup(value)); value = g_hash_table_lookup(options, old_name); } } if (value == NULL) { crm_trace("Using default value '%s' for cluster option '%s'", def_value, name); if (options == NULL) { return def_value; } else if(def_value == NULL) { return def_value; } g_hash_table_insert(options, strdup(name), strdup(def_value)); value = g_hash_table_lookup(options, name); } if (validate && validate(value) == FALSE) { crm_config_err("Value '%s' for cluster option '%s' is invalid." " Defaulting to %s", value, name, def_value); g_hash_table_replace(options, strdup(name), strdup(def_value)); value = g_hash_table_lookup(options, name); } return value; } const char * get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name) { int lpc = 0; const char *value = NULL; gboolean found = FALSE; for (lpc = 0; lpc < len; lpc++) { if (safe_str_eq(name, option_list[lpc].name)) { found = TRUE; value = cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } CRM_CHECK(found, crm_err("No option named: %s", name)); return value; } void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option * option_list, int len) { int lpc = 0; fprintf(stdout, "" "\n" "\n" " %s\n" " %s\n" " %s\n" " \n", name, version, desc_long, desc_short); for (lpc = 0; lpc < len; lpc++) { if (option_list[lpc].description_long == NULL && option_list[lpc].description_short == NULL) { continue; } fprintf(stdout, " \n" " %s\n" " \n" " %s%s%s\n" " \n", option_list[lpc].name, option_list[lpc].description_short, option_list[lpc].type, option_list[lpc].default_value, option_list[lpc].description_long ? option_list[lpc]. description_long : option_list[lpc].description_short, option_list[lpc].values ? " Allowed values: " : "", option_list[lpc].values ? option_list[lpc].values : ""); } fprintf(stdout, " \n\n"); } void verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len) { int lpc = 0; for (lpc = 0; lpc < len; lpc++) { cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } char * generate_hash_key(const char *crm_msg_reference, const char *sys) { char *hash_key = crm_concat(sys ? sys : "none", crm_msg_reference, '_'); crm_trace("created hash key: (%s)", hash_key); return hash_key; } int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) { int rc = -1; char *buffer = NULL; struct passwd pwd; struct passwd *pwentry = NULL; buffer = calloc(1, PW_BUFFER_LEN); getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry); if (pwentry) { rc = 0; if (uid) { *uid = pwentry->pw_uid; } if (gid) { *gid = pwentry->pw_gid; } crm_trace("Cluster user %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid); } else { crm_err("Cluster user %s does not exist", name); } free(buffer); return rc; } static int crm_version_helper(const char *text, char **end_text) { int atoi_result = -1; CRM_ASSERT(end_text != NULL); errno = 0; if (text != NULL && text[0] != 0) { atoi_result = (int)strtol(text, end_text, 10); if (errno == EINVAL) { crm_err("Conversion of '%s' %c failed", text, text[0]); atoi_result = -1; } } return atoi_result; } /* * version1 < version2 : -1 * version1 = version2 : 0 * version1 > version2 : 1 */ int compare_version(const char *version1, const char *version2) { int rc = 0; int lpc = 0; char *ver1_copy = NULL, *ver2_copy = NULL; char *rest1 = NULL, *rest2 = NULL; if (version1 == NULL && version2 == NULL) { return 0; } else if (version1 == NULL) { return -1; } else if (version2 == NULL) { return 1; } ver1_copy = strdup(version1); ver2_copy = strdup(version2); rest1 = ver1_copy; rest2 = ver2_copy; while (1) { int digit1 = 0; int digit2 = 0; lpc++; if (rest1 == rest2) { break; } if (rest1 != NULL) { digit1 = crm_version_helper(rest1, &rest1); } if (rest2 != NULL) { digit2 = crm_version_helper(rest2, &rest2); } if (digit1 < digit2) { rc = -1; break; } else if (digit1 > digit2) { rc = 1; break; } if (rest1 != NULL && rest1[0] == '.') { rest1++; } if (rest1 != NULL && rest1[0] == 0) { rest1 = NULL; } if (rest2 != NULL && rest2[0] == '.') { rest2++; } if (rest2 != NULL && rest2[0] == 0) { rest2 = NULL; } } free(ver1_copy); free(ver2_copy); if (rc == 0) { crm_trace("%s == %s (%d)", version1, version2, lpc); } else if (rc < 0) { crm_trace("%s < %s (%d)", version1, version2, lpc); } else if (rc > 0) { crm_trace("%s > %s (%d)", version1, version2, lpc); } return rc; } gboolean do_stderr = FALSE; #ifndef NUMCHARS # define NUMCHARS "0123456789." #endif #ifndef WHITESPACE # define WHITESPACE " \t\n\r\f" #endif unsigned long long crm_get_interval(const char *input) { unsigned long long msec = 0; if (input == NULL) { return msec; } else if (input[0] != 'P') { long long tmp = crm_get_msec(input); if(tmp > 0) { msec = tmp; } } else { crm_time_t *interval = crm_time_parse_duration(input); msec = 1000 * crm_time_get_seconds(interval); crm_time_free(interval); } return msec; } long long crm_get_msec(const char *input) { const char *cp = input; const char *units; long long multiplier = 1000; long long divisor = 1; long long msec = -1; char *end_text = NULL; /* double dret; */ if (input == NULL) { return msec; } cp += strspn(cp, WHITESPACE); units = cp + strspn(cp, NUMCHARS); units += strspn(units, WHITESPACE); if (strchr(NUMCHARS, *cp) == NULL) { return msec; } if (strncasecmp(units, "ms", 2) == 0 || strncasecmp(units, "msec", 4) == 0) { multiplier = 1; divisor = 1; } else if (strncasecmp(units, "us", 2) == 0 || strncasecmp(units, "usec", 4) == 0) { multiplier = 1; divisor = 1000; } else if (strncasecmp(units, "s", 1) == 0 || strncasecmp(units, "sec", 3) == 0) { multiplier = 1000; divisor = 1; } else if (strncasecmp(units, "m", 1) == 0 || strncasecmp(units, "min", 3) == 0) { multiplier = 60 * 1000; divisor = 1; } else if (strncasecmp(units, "h", 1) == 0 || strncasecmp(units, "hr", 2) == 0) { multiplier = 60 * 60 * 1000; divisor = 1; } else if (*units != EOS && *units != '\n' && *units != '\r') { return msec; } msec = crm_int_helper(cp, &end_text); if (msec > LLONG_MAX/multiplier) { /* arithmetics overflow while multiplier/divisor mutually exclusive */ return LLONG_MAX; } msec *= multiplier; msec /= divisor; /* dret += 0.5; */ /* msec = (long long)dret; */ return msec; } /*! * \brief Generate an operation key * * \param[in] rsc_id ID of resource being operated on * \param[in] op_type Operation name * \param[in] interval Operation interval * * \return Newly allocated memory containing operation key as string * * \note It is the caller's responsibility to free() the result. */ char * generate_op_key(const char *rsc_id, const char *op_type, int interval) { CRM_ASSERT(rsc_id != NULL); CRM_ASSERT(op_type != NULL); CRM_ASSERT(interval >= 0); return crm_strdup_printf("%s_%s_%d", rsc_id, op_type, interval); } gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval) { char *notify = NULL; char *mutable_key = NULL; char *mutable_key_ptr = NULL; int len = 0, offset = 0, ch = 0; CRM_CHECK(key != NULL, return FALSE); *interval = 0; len = strlen(key); offset = len - 1; crm_trace("Source: %s", key); while (offset > 0 && isdigit(key[offset])) { int digits = len - offset; ch = key[offset] - '0'; CRM_CHECK(ch < 10, return FALSE); CRM_CHECK(ch >= 0, return FALSE); while (digits > 1) { digits--; ch = ch * 10; } *interval += ch; offset--; } crm_trace(" Interval: %d", *interval); CRM_CHECK(key[offset] == '_', return FALSE); mutable_key = strdup(key); mutable_key[offset] = 0; offset--; while (offset > 0 && key[offset] != '_') { offset--; } CRM_CHECK(key[offset] == '_', free(mutable_key); return FALSE); mutable_key_ptr = mutable_key + offset + 1; crm_trace(" Action: %s", mutable_key_ptr); *op_type = strdup(mutable_key_ptr); mutable_key[offset] = 0; offset--; CRM_CHECK(mutable_key != mutable_key_ptr, free(mutable_key); return FALSE); notify = strstr(mutable_key, "_post_notify"); if (notify && safe_str_eq(notify, "_post_notify")) { notify[0] = 0; } notify = strstr(mutable_key, "_pre_notify"); if (notify && safe_str_eq(notify, "_pre_notify")) { notify[0] = 0; } crm_trace(" Resource: %s", mutable_key); *rsc_id = mutable_key; return TRUE; } char * generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type) { int len = 12; char *op_id = NULL; CRM_CHECK(rsc_id != NULL, return NULL); CRM_CHECK(op_type != NULL, return NULL); CRM_CHECK(notify_type != NULL, return NULL); len += strlen(op_type); len += strlen(rsc_id); len += strlen(notify_type); if(len > 0) { op_id = malloc(len); } if (op_id != NULL) { sprintf(op_id, "%s_%s_notify_%s_0", rsc_id, notify_type, op_type); } return op_id; } char * generate_transition_magic_v202(const char *transition_key, int op_status) { int len = 80; char *fail_state = NULL; CRM_CHECK(transition_key != NULL, return NULL); len += strlen(transition_key); fail_state = malloc(len); if (fail_state != NULL) { snprintf(fail_state, len, "%d:%s", op_status, transition_key); } return fail_state; } char * generate_transition_magic(const char *transition_key, int op_status, int op_rc) { int len = 80; char *fail_state = NULL; CRM_CHECK(transition_key != NULL, return NULL); len += strlen(transition_key); fail_state = malloc(len); if (fail_state != NULL) { snprintf(fail_state, len, "%d:%d;%s", op_status, op_rc, transition_key); } return fail_state; } gboolean decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc) { int res = 0; char *key = NULL; gboolean result = TRUE; CRM_CHECK(magic != NULL, return FALSE); CRM_CHECK(op_rc != NULL, return FALSE); CRM_CHECK(op_status != NULL, return FALSE); key = calloc(1, strlen(magic) + 1); res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key); if (res != 3) { crm_warn("Only found %d items in: '%s'", res, magic); free(key); return FALSE; } CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc), result = FALSE); free(key); return result; } char * generate_transition_key(int transition_id, int action_id, int target_rc, const char *node) { int len = 40; char *fail_state = NULL; CRM_CHECK(node != NULL, return NULL); len += strlen(node); fail_state = malloc(len); if (fail_state != NULL) { snprintf(fail_state, len, "%d:%d:%d:%-*s", action_id, transition_id, target_rc, 36, node); } return fail_state; } gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc) { int res = 0; gboolean done = FALSE; CRM_CHECK(uuid != NULL, return FALSE); CRM_CHECK(target_rc != NULL, return FALSE); CRM_CHECK(action_id != NULL, return FALSE); CRM_CHECK(transition_id != NULL, return FALSE); *uuid = calloc(1, 37); res = sscanf(key, "%d:%d:%d:%36s", action_id, transition_id, target_rc, *uuid); switch (res) { case 4: /* Post Pacemaker 0.6 */ done = TRUE; break; case 3: case 2: /* this can be tricky - the UUID might start with an integer */ /* Until Pacemaker 0.6 */ done = TRUE; *target_rc = -1; res = sscanf(key, "%d:%d:%36s", action_id, transition_id, *uuid); if (res == 2) { *action_id = -1; res = sscanf(key, "%d:%36s", transition_id, *uuid); CRM_CHECK(res == 2, done = FALSE); } else if (res != 3) { CRM_CHECK(res == 3, done = FALSE); } break; case 1: /* Prior to Heartbeat 2.0.8 */ done = TRUE; *action_id = -1; *target_rc = -1; res = sscanf(key, "%d:%36s", transition_id, *uuid); CRM_CHECK(res == 2, done = FALSE); break; default: crm_crit("Unhandled sscanf result (%d) for %s", res, key); } if (strlen(*uuid) != 36) { crm_warn("Bad UUID (%s) in sscanf result (%d) for %s", *uuid, res, key); } if (done == FALSE) { crm_err("Cannot decode '%s' rc=%d", key, res); free(*uuid); *uuid = NULL; *target_rc = -1; *action_id = -1; *transition_id = -1; } return done; } void filter_action_parameters(xmlNode * param_set, const char *version) { char *key = NULL; char *timeout = NULL; char *interval = NULL; const char *attr_filter[] = { XML_ATTR_ID, XML_ATTR_CRM_VERSION, XML_LRM_ATTR_OP_DIGEST, XML_LRM_ATTR_TARGET, XML_LRM_ATTR_TARGET_UUID, "pcmk_external_ip" }; gboolean do_delete = FALSE; int lpc = 0; static int meta_len = 0; if (meta_len == 0) { meta_len = strlen(CRM_META); } if (param_set == NULL) { return; } for (lpc = 0; lpc < DIMOF(attr_filter); lpc++) { xml_remove_prop(param_set, attr_filter[lpc]); } key = crm_meta_name(XML_LRM_ATTR_INTERVAL); interval = crm_element_value_copy(param_set, key); free(key); key = crm_meta_name(XML_ATTR_TIMEOUT); timeout = crm_element_value_copy(param_set, key); if (param_set) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; xIter = xIter->next; do_delete = FALSE; if (strncasecmp(prop_name, CRM_META, meta_len) == 0) { do_delete = TRUE; } if (do_delete) { xml_remove_prop(param_set, prop_name); } } } if (crm_get_msec(interval) > 0 && compare_version(version, "1.0.8") > 0) { /* Re-instate the operation's timeout value */ if (timeout != NULL) { crm_xml_add(param_set, key, timeout); } } free(interval); free(timeout); free(key); } extern bool crm_is_daemon; /* coverity[+kill] */ void crm_abort(const char *file, const char *function, int line, const char *assert_condition, gboolean do_core, gboolean do_fork) { int rc = 0; int pid = 0; int status = 0; /* Implied by the parent's error logging below */ /* crm_write_blackbox(0); */ if(crm_is_daemon == FALSE) { /* This is a command line tool - do not fork */ /* crm_add_logfile(NULL); * Record it to a file? */ crm_enable_stderr(TRUE); /* Make sure stderr is enabled so we can tell the caller */ do_fork = FALSE; /* Just crash if needed */ } if (do_core == FALSE) { crm_err("%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition); return; } else if (do_fork) { pid = fork(); } else { crm_err("%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition); } if (pid == -1) { crm_crit("%s: Cannot create core for non-fatal assert at %s:%d : %s", function, file, line, assert_condition); return; } else if(pid == 0) { /* Child process */ abort(); return; } /* Parent process */ crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s", function, pid, file, line, assert_condition); crm_write_blackbox(SIGTRAP, NULL); do { rc = waitpid(pid, &status, 0); if(rc == pid) { return; /* Job done */ } } while(errno == EINTR); if (errno == ECHILD) { /* crm_mon does this */ crm_trace("Cannot wait on forked child %d - SIGCHLD is probably set to SIG_IGN", pid); return; } crm_perror(LOG_ERR, "Cannot wait on forked child %d", pid); } int crm_pid_active(long pid, const char *daemon) { static int have_proc_pid = 0; if(have_proc_pid == 0) { char proc_path[PATH_MAX], exe_path[PATH_MAX]; /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid()); have_proc_pid = 1; if(readlink(proc_path, exe_path, PATH_MAX - 1) < 0) { have_proc_pid = -1; } } if (pid <= 0) { return -1; } else if (kill(pid, 0) < 0 && errno == ESRCH) { return 0; } else if(daemon == NULL || have_proc_pid == -1) { return 1; } else { int rc = 0; char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX]; /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid); rc = readlink(proc_path, exe_path, PATH_MAX - 1); if (rc < 0 && errno == EACCES) { crm_perror(LOG_INFO, "Could not read from %s", proc_path); return 1; } else if (rc < 0) { crm_perror(LOG_ERR, "Could not read from %s", proc_path); return 0; } exe_path[rc] = 0; if(daemon[0] != '/') { rc = snprintf(myexe_path, sizeof(proc_path), CRM_DAEMON_DIR"/%s", daemon); myexe_path[rc] = 0; } else { rc = snprintf(myexe_path, sizeof(proc_path), "%s", daemon); myexe_path[rc] = 0; } if (strcmp(exe_path, myexe_path) == 0) { return 1; } } return 0; } #define LOCKSTRLEN 11 long crm_read_pidfile(const char *filename) { int fd; struct stat sbuf; long pid = -ENOENT; char buf[LOCKSTRLEN + 1]; if ((fd = open(filename, O_RDONLY)) < 0) { goto bail; } if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) { sleep(2); /* if someone was about to create one, * give'm a sec to do so */ } if (read(fd, buf, sizeof(buf)) < 1) { goto bail; } if (sscanf(buf, "%lu", &pid) > 0) { if (pid <= 0) { pid = -ESRCH; } else { crm_trace("Got pid %lu from %s\n", pid, filename); } } bail: if (fd >= 0) { close(fd); } return pid; } long crm_pidfile_inuse(const char *filename, long mypid, const char *daemon) { long pid = crm_read_pidfile(filename); if (pid < 2) { /* Invalid pid */ pid = -ENOENT; unlink(filename); } else if (mypid && pid == mypid) { /* In use by us */ pid = pcmk_ok; } else if (crm_pid_active(pid, daemon) == FALSE) { /* Contains a stale value */ unlink(filename); pid = -ENOENT; } else if (mypid && pid != mypid) { /* locked by existing process - give up */ pid = -EEXIST; } return pid; } static int crm_lock_pidfile(const char *filename, const char *name) { long mypid = 0; int fd = 0, rc = 0; char buf[LOCKSTRLEN + 1]; mypid = (unsigned long)getpid(); rc = crm_pidfile_inuse(filename, 0, name); if (rc == -ENOENT) { /* exists but the process is not active */ } else if (rc != pcmk_ok) { /* locked by existing process - give up */ return rc; } if ((fd = open(filename, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) { /* Hmmh, why did we fail? Anyway, nothing we can do about it */ return -errno; } snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN - 1, mypid); rc = write(fd, buf, LOCKSTRLEN); close(fd); if (rc != LOCKSTRLEN) { crm_perror(LOG_ERR, "Incomplete write to %s", filename); return -errno; } return crm_pidfile_inuse(filename, mypid, name); } void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) { int rc; long pid; const char *devnull = "/dev/null"; if (daemonize == FALSE) { return; } /* Check before we even try... */ rc = crm_pidfile_inuse(pidfile, 1, name); if(rc < pcmk_ok && rc != -ENOENT) { pid = crm_read_pidfile(pidfile); crm_err("%s: already running [pid %ld in %s]", name, pid, pidfile); printf("%s: already running [pid %ld in %s]\n", name, pid, pidfile); crm_exit(rc); } pid = fork(); if (pid < 0) { fprintf(stderr, "%s: could not start daemon\n", name); crm_perror(LOG_ERR, "fork"); crm_exit(EINVAL); } else if (pid > 0) { crm_exit(pcmk_ok); } rc = crm_lock_pidfile(pidfile, name); if(rc < pcmk_ok) { crm_err("Could not lock '%s' for %s: %s (%d)", pidfile, name, pcmk_strerror(rc), rc); printf("Could not lock '%s' for %s: %s (%d)\n", pidfile, name, pcmk_strerror(rc), rc); crm_exit(rc); } umask(S_IWGRP | S_IWOTH | S_IROTH); close(STDIN_FILENO); (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ close(STDOUT_FILENO); (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ close(STDERR_FILENO); (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ } char * crm_meta_name(const char *field) { int lpc = 0; int max = 0; char *crm_name = NULL; CRM_CHECK(field != NULL, return NULL); crm_name = crm_concat(CRM_META, field, '_'); /* Massage the names so they can be used as shell variables */ max = strlen(crm_name); for (; lpc < max; lpc++) { switch (crm_name[lpc]) { case '-': crm_name[lpc] = '_'; break; } } return crm_name; } const char * crm_meta_value(GHashTable * hash, const char *field) { char *key = NULL; const char *value = NULL; key = crm_meta_name(field); if (key) { value = g_hash_table_lookup(hash, key); free(key); } return value; } static struct option * crm_create_long_opts(struct crm_option *long_options) { struct option *long_opts = NULL; #ifdef HAVE_GETOPT_H int index = 0, lpc = 0; /* * A previous, possibly poor, choice of '?' as the short form of --help * means that getopt_long() returns '?' for both --help and for "unknown option" * * This dummy entry allows us to differentiate between the two in crm_get_option() * and exit with the correct error code */ long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); long_opts[index].name = "__dummmy__"; long_opts[index].has_arg = 0; long_opts[index].flag = 0; long_opts[index].val = '_'; index++; for (lpc = 0; long_options[lpc].name != NULL; lpc++) { if (long_options[lpc].name[0] == '-') { continue; } long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); /*fprintf(stderr, "Creating %d %s = %c\n", index, * long_options[lpc].name, long_options[lpc].val); */ long_opts[index].name = long_options[lpc].name; long_opts[index].has_arg = long_options[lpc].has_arg; long_opts[index].flag = long_options[lpc].flag; long_opts[index].val = long_options[lpc].val; index++; } /* Now create the list terminator */ long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); long_opts[index].name = NULL; long_opts[index].has_arg = 0; long_opts[index].flag = 0; long_opts[index].val = 0; #endif return long_opts; } void crm_set_options(const char *short_options, const char *app_usage, struct crm_option *long_options, const char *app_desc) { if (short_options) { crm_short_options = strdup(short_options); } else if (long_options) { int lpc = 0; int opt_string_len = 0; char *local_short_options = NULL; for (lpc = 0; long_options[lpc].name != NULL; lpc++) { if (long_options[lpc].val && long_options[lpc].val != '-' && long_options[lpc].val < UCHAR_MAX) { local_short_options = realloc_safe(local_short_options, opt_string_len + 4); local_short_options[opt_string_len++] = long_options[lpc].val; /* getopt(3) says: Two colons mean an option takes an optional arg; */ if (long_options[lpc].has_arg == optional_argument) { local_short_options[opt_string_len++] = ':'; } if (long_options[lpc].has_arg >= required_argument) { local_short_options[opt_string_len++] = ':'; } local_short_options[opt_string_len] = 0; } } crm_short_options = local_short_options; crm_trace("Generated short option string: '%s'", local_short_options); } if (long_options) { crm_long_options = long_options; } if (app_desc) { crm_app_description = app_desc; } if (app_usage) { crm_app_usage = app_usage; } } int crm_get_option(int argc, char **argv, int *index) { return crm_get_option_long(argc, argv, index, NULL); } int crm_get_option_long(int argc, char **argv, int *index, const char **longname) { #ifdef HAVE_GETOPT_H static struct option *long_opts = NULL; if (long_opts == NULL && crm_long_options) { long_opts = crm_create_long_opts(crm_long_options); } *index = 0; if (long_opts) { int flag = getopt_long(argc, argv, crm_short_options, long_opts, index); switch (flag) { case 0: if (long_opts[*index].val) { return long_opts[*index].val; } else if (longname) { *longname = long_opts[*index].name; } else { crm_notice("Unhandled option --%s", long_opts[*index].name); return flag; } case -1: /* End of option processing */ break; case ':': crm_trace("Missing argument"); crm_help('?', 1); break; case '?': crm_help('?', *index ? 0 : 1); break; } return flag; } #endif if (crm_short_options) { return getopt(argc, argv, crm_short_options); } return -1; } int crm_help(char cmd, int exit_code) { int i = 0; FILE *stream = (exit_code ? stderr : stdout); if (cmd == 'v' || cmd == '$') { fprintf(stream, "Pacemaker %s\n", PACEMAKER_VERSION); fprintf(stream, "Written by Andrew Beekhof\n"); goto out; } if (cmd == '!') { fprintf(stream, "Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); goto out; } fprintf(stream, "%s - %s\n", crm_system_name, crm_app_description); if (crm_app_usage) { fprintf(stream, "Usage: %s %s\n", crm_system_name, crm_app_usage); } if (crm_long_options) { fprintf(stream, "Options:\n"); for (i = 0; crm_long_options[i].name != NULL; i++) { if (crm_long_options[i].flags & pcmk_option_hidden) { } else if (crm_long_options[i].flags & pcmk_option_paragraph) { fprintf(stream, "%s\n\n", crm_long_options[i].desc); } else if (crm_long_options[i].flags & pcmk_option_example) { fprintf(stream, "\t#%s\n\n", crm_long_options[i].desc); } else if (crm_long_options[i].val == '-' && crm_long_options[i].desc) { fprintf(stream, "%s\n", crm_long_options[i].desc); } else { /* is val printable as char ? */ if (crm_long_options[i].val && crm_long_options[i].val <= UCHAR_MAX) { fprintf(stream, " -%c,", crm_long_options[i].val); } else { fputs(" ", stream); } fprintf(stream, " --%s%s\t%s\n", crm_long_options[i].name, crm_long_options[i].has_arg == optional_argument ? "[=value]" : crm_long_options[i].has_arg == required_argument ? "=value" : "", crm_long_options[i].desc ? crm_long_options[i].desc : ""); } } } else if (crm_short_options) { fprintf(stream, "Usage: %s - %s\n", crm_system_name, crm_app_description); for (i = 0; crm_short_options[i] != 0; i++) { int has_arg = no_argument /* 0 */; if (crm_short_options[i + 1] == ':') { if (crm_short_options[i + 2] == ':') has_arg = optional_argument /* 2 */; else has_arg = required_argument /* 1 */; } fprintf(stream, " -%c %s\n", crm_short_options[i], has_arg == optional_argument ? "[value]" : has_arg == required_argument ? "{value}" : ""); i += has_arg; } } fprintf(stream, "\nReport bugs to %s\n", PACKAGE_BUGREPORT); out: return crm_exit(exit_code); } void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create cib servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(DAEMON_RESPAWN_STOP); } } void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } qb_ipcs_service_t * crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } void attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create attrd servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(DAEMON_RESPAWN_STOP); } } void stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create stonith-ng servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(DAEMON_RESPAWN_STOP); } } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" static void append_digest(lrmd_event_data_t * op, xmlNode * update, const char *version, const char *magic, int level) { /* this will enable us to later determine that the * resource's parameters have changed and we should force * a restart */ char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); filter_action_parameters(args_xml, version); -#ifdef ENABLE_VERSIONED_ATTRS - crm_summarize_versioned_params(args_xml, op->versioned_params); -#endif digest = calculate_operation_digest(args_xml, version); #if 0 if (level < get_crm_log_level() && op->interval == 0 && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) { char *digest_source = dump_xml_unformatted(args_xml); do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", digest, ID(update), magic, digest_source); free(digest_source); } #endif crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); free_xml(args_xml); free(digest); } int rsc_op_expected_rc(lrmd_event_data_t * op) { int rc = 0; if (op && op->user_data) { int dummy = 0; char *uuid = NULL; decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &rc); free(uuid); } return rc; } gboolean did_rsc_op_fail(lrmd_event_data_t * op, int target_rc) { switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: case PCMK_LRM_OP_PENDING: return FALSE; break; case PCMK_LRM_OP_NOTSUPPORTED: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_ERROR: return TRUE; break; default: if (target_rc != op->rc) { return TRUE; } } return FALSE; } xmlNode * create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * caller_version, int target_rc, const char * node, const char * origin, int level) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; gboolean dc_munges_migrate_ops = (compare_version(caller_version, "3.0.3") < 0); gboolean dc_needs_unique_ops = (compare_version(caller_version, "3.0.6") < 0); CRM_CHECK(op != NULL, return NULL); do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%d)", origin, op->rsc_id, op->op_type, services_lrm_status_str(op->op_status), op->interval); crm_trace("DC version: %s", caller_version); task = op->op_type; /* remap the task name under various scenarios * this makes life easier for the PE when trying determine the current state */ if (crm_str_eq(task, "reload", TRUE)) { if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_START; } else { task = CRMD_ACTION_STATUS; } } else if (dc_munges_migrate_ops && crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { /* if the migrate_from fails it will have enough info to do the right thing */ if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_STOP; } else { task = CRMD_ACTION_STATUS; } } else if (dc_munges_migrate_ops && op->op_status == PCMK_LRM_OP_DONE && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { task = CRMD_ACTION_START; } key = generate_op_key(op->rsc_id, task, op->interval); if (dc_needs_unique_ops && op->interval > 0) { op_id = strdup(key); } else if (crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); /* these are not yet allowed to fail */ op->op_status = PCMK_LRM_OP_DONE; op->rc = 0; } else if (did_rsc_op_fail(op, target_rc)) { op_id = generate_op_key(op->rsc_id, "last_failure", 0); if (op->interval == 0) { /* Ensure 'last' gets updated too in case recording-pending="true" */ op_id_additional = generate_op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval > 0) { op_id = strdup(key); } else { op_id = generate_op_key(op->rsc_id, "last", 0); } again: xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id); if (xml_op == NULL) { xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for:" " %s_%s_%d %d from %s", op->rsc_id, op->op_type, op->interval, op->call_id, origin); local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if(magic == NULL) { magic = generate_transition_magic(op->user_data, op->op_status, op->rc); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason); crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */ crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); if (compare_version("2.1", caller_version) <= 0) { if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_trace("Timing data (%s_%s_%d): last=%u change=%u exec=%u queue=%u", op->rsc_id, op->op_type, op->interval, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); if (op->interval == 0) { /* The values are the same for non-recurring ops */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_RUN, op->t_run); crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); } else if(op->t_rcchange) { /* last-run is not accurate for recurring ops */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_rcchange); } else { /* ...but is better than nothing otherwise */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); } crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); } } if (crm_str_eq(op->op_type, CRMD_ACTION_MIGRATE, TRUE) || crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) { /* * Record migrate_source and migrate_target always for migrate ops. */ const char *name = XML_LRM_ATTR_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = XML_LRM_ATTR_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } append_digest(op, xml_op, caller_version, magic, LOG_DEBUG); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } bool pcmk_acl_required(const char *user) { #if ENABLE_ACL if(user == NULL || strlen(user) == 0) { crm_trace("no user set"); return FALSE; } else if (strcmp(user, CRM_DAEMON_USER) == 0) { return FALSE; } else if (strcmp(user, "root") == 0) { return FALSE; } crm_trace("acls required for %s", user); return TRUE; #else crm_trace("acls not supported"); return FALSE; #endif } #if ENABLE_ACL char * uid2username(uid_t uid) { struct passwd *pwent = getpwuid(uid); if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get password entry of uid: %d", uid); return NULL; } else { return strdup(pwent->pw_name); } } const char * crm_acl_get_set_user(xmlNode * request, const char *field, const char *peer_user) { /* field is only checked for backwards compatibility */ static const char *effective_user = NULL; const char *requested_user = NULL; const char *user = NULL; if(effective_user == NULL) { effective_user = uid2username(geteuid()); } requested_user = crm_element_value(request, XML_ACL_TAG_USER); if(requested_user == NULL) { requested_user = crm_element_value(request, field); } if (is_privileged(effective_user) == FALSE) { /* We're not running as a privileged user, set or overwrite any existing value for $XML_ACL_TAG_USER */ user = effective_user; } else if(peer_user == NULL && requested_user == NULL) { /* No user known or requested, use 'effective_user' and make sure one is set for the request */ user = effective_user; } else if(peer_user == NULL) { /* No user known, trusting 'requested_user' */ user = requested_user; } else if (is_privileged(peer_user) == FALSE) { /* The peer is not a privileged user, set or overwrite any existing value for $XML_ACL_TAG_USER */ user = peer_user; } else if (requested_user == NULL) { /* Even if we're privileged, make sure there is always a value set */ user = peer_user; } else { /* Legal delegation to 'requested_user' */ user = requested_user; } /* Yes, pointer comparision */ if(user != crm_element_value(request, XML_ACL_TAG_USER)) { crm_xml_add(request, XML_ACL_TAG_USER, user); } if(field != NULL && user != crm_element_value(request, field)) { crm_xml_add(request, field, user); } return requested_user; } void determine_request_user(const char *user, xmlNode * request, const char *field) { /* Get our internal validation out of the way first */ CRM_CHECK(user != NULL && request != NULL && field != NULL, return); /* If our peer is a privileged user, we might be doing something on behalf of someone else */ if (is_privileged(user) == FALSE) { /* We're not a privileged user, set or overwrite any existing value for $field */ crm_xml_replace(request, field, user); } else if (crm_element_value(request, field) == NULL) { /* Even if we're privileged, make sure there is always a value set */ crm_xml_replace(request, field, user); /* } else { Legal delegation */ } crm_trace("Processing msg as user '%s'", crm_element_value(request, field)); } #endif void * find_library_function(void **handle, const char *lib, const char *fn, gboolean fatal) { char *error; void *a_function; if (*handle == NULL) { *handle = dlopen(lib, RTLD_LAZY); } if (!(*handle)) { crm_err("%sCould not open %s: %s", fatal ? "Fatal: " : "", lib, dlerror()); if (fatal) { crm_exit(DAEMON_RESPAWN_STOP); } return NULL; } a_function = dlsym(*handle, fn); if (a_function == NULL) { error = dlerror(); crm_err("%sCould not find %s in %s: %s", fatal ? "Fatal: " : "", fn, lib, error); if (fatal) { crm_exit(DAEMON_RESPAWN_STOP); } } return a_function; } void * convert_const_pointer(const void *ptr) { /* Worst function ever */ return (void *)ptr; } #ifdef HAVE_UUID_UUID_H # include #endif char * crm_generate_uuid(void) { unsigned char uuid[16]; char *buffer = malloc(37); /* Including NUL byte */ uuid_generate(uuid); uuid_unparse(uuid, buffer); return buffer; } #include char * crm_md5sum(const char *buffer) { int lpc = 0, len = 0; char *digest = NULL; unsigned char raw_digest[MD5_DIGEST_SIZE]; if (buffer == NULL) { buffer = ""; } len = strlen(buffer); crm_trace("Beginning digest of %d bytes", len); digest = malloc(2 * MD5_DIGEST_SIZE + 1); if(digest) { md5_buffer(buffer, len, raw_digest); for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) { sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]); } digest[(2 * MD5_DIGEST_SIZE)] = 0; crm_trace("Digest %s.", digest); } else { crm_err("Could not create digest"); } return digest; } #ifdef HAVE_GNUTLS_GNUTLS_H void crm_gnutls_global_init(void) { signal(SIGPIPE, SIG_IGN); gnutls_global_init(); } #endif char * crm_generate_ra_key(const char *class, const char *provider, const char *type) { if (!class && !provider && !type) { return NULL; } return crm_strdup_printf("%s:%s:%s", class ? class : "", provider ? provider : "", type ? type : ""); } diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index 12eba3b765..cc37c0e4b7 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2261 +1,2232 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; int lrmd_tls_set_key(gnutls_datum_t * key); static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type); static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { enum client_type type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; crm_remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; /* while the async connection is occuring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); /* This will get all the int values. * we just have to be careful not to leave any * dangling pointers to strings. */ memcpy(copy, event, sizeof(lrmd_event_data_t)); copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL; copy->op_type = event->op_type ? strdup(event->op_type) : NULL; copy->user_data = event->user_data ? strdup(event->user_data) : NULL; copy->output = event->output ? strdup(event->output) : NULL; copy->exit_reason = event->exit_reason ? strdup(event->exit_reason) : NULL; copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL; if (event->params) { copy->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (copy->params != NULL) { g_hash_table_foreach(event->params, dup_attr, copy->params); } } -#ifdef ENABLE_VERSIONED_ATTRS - if (event->versioned_params) { - copy->versioned_params = copy_xml(event->versioned_params); - } -#endif - return copy; } void lrmd_free_event(lrmd_event_data_t * event) { if (!event) { return; } /* free gives me grief if i try to cast */ free((char *)event->rsc_id); free((char *)event->op_type); free((char *)event->user_data); free((char *)event->output); free((char *)event->exit_reason); free((char *)event->remote_nodename); if (event->params) { g_hash_table_destroy(event->params); } -#ifdef ENABLE_VERSIONED_ATTRS - if (event->versioned_params) { - free_xml(event->versioned_params); - } -#endif free(event); } static int lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return 1; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return 1; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) { event.type = lrmd_event_register; } else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) { event.type = lrmd_event_unregister; } else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) { crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run); crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange); crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT); event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON); event.type = lrmd_event_exec_complete; event.params = xml2list(msg); -#ifdef ENABLE_VERSIONED_ATTRS - event.versioned_params = first_named_child(msg, XML_TAG_VER_ATTRS); -#endif } else if (crm_str_eq(type, LRMD_OP_NEW_CLIENT, TRUE)) { event.type = lrmd_event_new_client; } else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) { event.type = lrmd_event_poke; } else { return 1; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } return 1; } static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *msg; int rc; if (!native->callback) { /* no callback set */ return 1; } msg = string2xml(buffer); rc = lrmd_dispatch_internal(lrmd, msg); free_xml(msg); return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static int lrmd_tls_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { return TRUE; } return FALSE; } static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; int rc = 0; int disconnected = 0; if (lrmd_tls_connected(lrmd) == FALSE) { crm_trace("tls dispatch triggered after disconnect"); return 0; } crm_trace("tls_dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ rc = crm_remote_ready(native->remote, 0); if (rc == 0) { /* nothing to read, see if any full messages are already in buffer. */ xml = crm_remote_parse_buffer(native->remote); } else if (rc < 0) { disconnected = 1; } else { crm_remote_recv(native->remote, -1, &disconnected); xml = crm_remote_parse_buffer(native->remote); } while (xml) { const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (safe_str_eq(msg_type, "notify")) { lrmd_dispatch_internal(lrmd, xml); } else if (safe_str_eq(msg_type, "reply")) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { int reply_id = 0; crm_element_value_int(xml, F_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated reply %d", reply_id); } } free_xml(xml); xml = crm_remote_parse_buffer(native->remote); } if (disconnected) { crm_info("Server disconnected while reading remote server msg."); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: if (native->pending_notify) { return 1; } return crm_remote_ready(native->remote, 0); #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->private; switch (private->type) { case CRM_CLIENT_IPC: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_trace("Sending call options: %.8lx, %d", (long)options, options); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("IPC connection destroyed"); /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); native->remote->buffer = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type) { int rc = -1; crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); rc = crm_remote_send(session, msg); if (rc < 0) { crm_err("Failed to send remote lrmd tls msg, rc = %d", rc); return rc; } return rc; } static xmlNode * lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected) { lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } while (!xml) { xml = crm_remote_parse_buffer(native->remote); if (!xml) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { crm_err("Never received the expected reply during the timeout period, disconnecting."); *disconnected = TRUE; return NULL; } crm_remote_recv(native->remote, remaining_timeout, disconnected); xml = crm_remote_parse_buffer(native->remote); if (!xml) { crm_err("Unable to receive expected reply, disconnecting."); *disconnected = TRUE; return NULL; } else if (*disconnected) { return NULL; } } CRM_ASSERT(xml != NULL); crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(xml); xml = NULL; } else if (safe_str_eq(msg_type, "notify")) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, xml); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } xml = NULL; } else if (safe_str_neq(msg_type, "reply")) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(xml); xml = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } free_xml(xml); xml = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return xml; } static int lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg) { int rc = 0; lrmd_private_t *native = lrmd->private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request"); if (rc <= 0) { crm_err("Remote lrmd send failed, disconnecting"); lrmd_tls_disconnect(lrmd); return -ENOTCONN; } return pcmk_ok; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; int disconnected = 0; xmlNode *xml = NULL; if (lrmd_tls_connected(lrmd) == FALSE) { return -1; } rc = lrmd_tls_send(lrmd, msg); if (rc < 0) { return rc; } xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected); if (disconnected) { crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ", global_remote_msg_id); lrmd_tls_disconnect(lrmd); rc = -ENOTCONN; } else if (!xml) { crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ", global_remote_msg_id, timeout); rc = -ECOMM; } if (reply) { *reply = xml; } else { free_xml(xml); } return rc; } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send(lrmd, msg); if (rc == pcmk_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_connected(native->ipc); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: return lrmd_tls_connected(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } static int lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout, /* ms. defaults to 1000 if set to 0 */ enum lrmd_call_options options, gboolean expect_reply) { /* TODO we need to reduce usage of this boolean */ int rc = pcmk_ok; int reply_id = -1; lrmd_private_t *native = lrmd->private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("sending %s op to lrmd", op); op_msg = lrmd_create_op(native->token, op, data, options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); rc = -ECOMM; goto done; } else if(op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id); crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("LRMD disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE); free_xml(data); return rc < 0 ? rc : pcmk_ok; } int remote_proxy_check(lrmd_t * lrmd, GHashTable *hash) { int rc; const char *value; lrmd_private_t *native = lrmd->private; xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); crm_xml_add(data, F_LRMD_WATCHDOG, value); rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE); free_xml(data); return rc < 0 ? rc : pcmk_ok; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); crm_element_value_int(reply, F_LRMD_RC, &rc); if (rc == -EPROTO) { crm_err("LRMD protocol mismatch client version %s, server version %s", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; static struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to lrmd"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_perror(LOG_ERR, "Connection to local resource manager failed"); rc = -ENOTCONN; } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the LRMD API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static int set_key(gnutls_datum_t * key, const char *location) { FILE *stream; int read_len = 256; int cur_len = 0; int buf_len = read_len; static char *key_cache = NULL; static size_t key_cache_len = 0; static time_t key_cache_updated; if (location == NULL) { return -1; } if (key_cache) { time_t now = time(NULL); if ((now - key_cache_updated) < 60) { key->data = gnutls_malloc(key_cache_len + 1); key->size = key_cache_len; memcpy(key->data, key_cache, key_cache_len); crm_debug("using cached LRMD key"); return 0; } else { key_cache_len = 0; key_cache_updated = 0; free(key_cache); key_cache = NULL; crm_debug("clearing lrmd key cache"); } } stream = fopen(location, "r"); if (!stream) { return -1; } key->data = gnutls_malloc(read_len); while (!feof(stream)) { int next; if (cur_len == buf_len) { buf_len = cur_len + read_len; key->data = gnutls_realloc(key->data, buf_len); } next = fgetc(stream); if (next == EOF && feof(stream)) { break; } key->data[cur_len] = next; cur_len++; } fclose(stream); key->size = cur_len; if (!cur_len) { gnutls_free(key->data); key->data = 0; return -1; } if (!key_cache) { key_cache = calloc(1, key->size + 1); memcpy(key_cache, key->data, key->size); key_cache_len = key->size; key_cache_updated = time(NULL); } return 0; } int lrmd_tls_set_key(gnutls_datum_t * key) { int rc = 0; const char *specific_location = getenv("PCMK_authkey_location"); if (set_key(key, specific_location) == 0) { crm_debug("Using custom authkey location %s", specific_location); return 0; } else if (specific_location) { crm_err("No valid lrmd remote key found at %s, trying default location", specific_location); } if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION) != 0) { rc = set_key(key, ALT_REMOTE_KEY_LOCATION); } if (rc) { crm_err("No valid lrmd remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION); return -1; } return rc; } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { crm_gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tcp_connect_cb(void *userdata, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; char name[256] = { 0, }; static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; int rc = sock; gnutls_datum_t psk_key = { NULL, 0 }; native->async_timer = 0; if (rc < 0) { lrmd_tls_connection_destroy(lrmd); crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port); report_async_connection_result(lrmd, rc); return; } /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon * to avoid all blocking code in the client. */ native->sock = sock; if (lrmd_tls_set_key(&psk_key) != 0) { lrmd_tls_connection_destroy(lrmd); return; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -1); return; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); rc = lrmd_handshake(lrmd, name); report_async_connection_result(lrmd, rc); return; } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc = -1; int sock = 0; int timer_id = 0; lrmd_private_t *native = lrmd->private; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect_async(native->server, native->port, timeout, &timer_id, lrmd, lrmd_tcp_connect_cb); if (sock != -1) { native->sock = sock; rc = 0; native->async_timer = timer_id; } return rc; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; lrmd_private_t *native = lrmd->private; int sock; gnutls_datum_t psk_key = { NULL, 0 }; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect(native->server, native->port); if (sock < 0) { crm_warn("Could not establish remote lrmd connection to %s", native->server); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } native->sock = sock; if (lrmd_tls_set_key(&psk_key) != 0) { lrmd_tls_connection_destroy(lrmd); return -1; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -1; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); if (fd) { *fd = sock; } else { char name[256] = { 0, }; snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = 0; lrmd_private_t *native = lrmd->private; if (!native->callback) { crm_err("Async connect not possible, no lrmd client callback set."); return -1; } switch (native->type) { case CRM_CLIENT_IPC: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); native->sock = 0; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; crm_info("Disconnecting from %d lrmd service", native->type); switch (native->type) { case CRM_CLIENT_IPC: lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return 0; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF) && !provider) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { lrmd_rsc_info_t *copy = NULL; copy = calloc(1, sizeof(lrmd_rsc_info_t)); copy->id = strdup(rsc_info->id); copy->type = strdup(rsc_info->type); copy->class = strdup(rsc_info->class); if (rsc_info->provider) { copy->provider = strdup(rsc_info->provider); } return copy; } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->class); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF) && !provider) { free_xml(output); return NULL; } rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); rsc_info->id = strdup(rsc_id); rsc_info->class = strdup(class); if (provider) { rsc_info->provider = strdup(provider); } rsc_info->type = strdup(type); free_xml(output); return rsc_info; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if(stonith_api) { stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); stonith_api->cmds->free(stonith_api); } if (*output == NULL) { rc = -EIO; } return rc; } #define lsb_metadata_template \ "\n" \ "\n" \ "\n" \ " 1.0\n" \ " \n" \ " %s\n" \ " \n" \ " %s\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " \n" \ "\n" #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO" #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO" #define PROVIDES "# Provides:" #define REQ_START "# Required-Start:" #define REQ_STOP "# Required-Stop:" #define SHLD_START "# Should-Start:" #define SHLD_STOP "# Should-Stop:" #define DFLT_START "# Default-Start:" #define DFLT_STOP "# Default-Stop:" #define SHORT_DSCR "# Short-Description:" #define DESCRIPTION "# Description:" #define lsb_meta_helper_free_value(m) \ do { \ if ((m) != NULL) { \ xmlFree(m); \ (m) = NULL; \ } \ } while(0) /*! * \internal * \brief Grab an LSB header value * * \param[in] line Line read from LSB init script * \param[in/out] value If not set, will be set to XML-safe copy of value * \param[in] prefix Set value if line starts with this pattern * * \return TRUE if value was set, FALSE otherwise */ static inline gboolean lsb_meta_helper_get_value(const char *line, char **value, const char *prefix) { if (!*value && !strncasecmp(line, prefix, strlen(prefix))) { *value = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST line+strlen(prefix)); return TRUE; } return FALSE; } static int lsb_get_metadata(const char *type, char **output) { char ra_pathname[PATH_MAX] = { 0, }; FILE *fp; char buffer[1024]; char *provides = NULL; char *req_start = NULL; char *req_stop = NULL; char *shld_start = NULL; char *shld_stop = NULL; char *dflt_start = NULL; char *dflt_stop = NULL; char *s_dscrpt = NULL; char *xml_l_dscrpt = NULL; int offset = 0; int max = 2048; char description[max]; if(type[0] == '/') { snprintf(ra_pathname, sizeof(ra_pathname), "%s", type); } else { snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type); } crm_trace("Looking into %s", ra_pathname); if (!(fp = fopen(ra_pathname, "r"))) { return -errno; } /* Enter into the lsb-compliant comment block */ while (fgets(buffer, sizeof(buffer), fp)) { /* Now suppose each of the following eight arguments contain only one line */ if (lsb_meta_helper_get_value(buffer, &provides, PROVIDES)) { continue; } if (lsb_meta_helper_get_value(buffer, &req_start, REQ_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &req_stop, REQ_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &shld_start, SHLD_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &shld_stop, SHLD_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &dflt_start, DFLT_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &dflt_stop, DFLT_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &s_dscrpt, SHORT_DSCR)) { continue; } /* Long description may cross multiple lines */ if (offset == 0 && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) { /* Between # and keyword, more than one space, or a tab * character, indicates the continuation line. * * Extracted from LSB init script standard */ while (fgets(buffer, sizeof(buffer), fp)) { if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) { buffer[0] = ' '; offset += snprintf(description+offset, max-offset, "%s", buffer); } else { fputs(buffer, fp); break; /* Long description ends */ } } continue; } if (xml_l_dscrpt == NULL && offset > 0) { xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(description)); } if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) { /* Get to the out border of LSB comment block */ break; } if (buffer[0] != '#') { break; /* Out of comment block in the beginning */ } } fclose(fp); *output = crm_strdup_printf(lsb_metadata_template, type, (xml_l_dscrpt == NULL) ? type : xml_l_dscrpt, (s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides, (req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop, (shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop, (dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop); lsb_meta_helper_free_value(xml_l_dscrpt); lsb_meta_helper_free_value(s_dscrpt); lsb_meta_helper_free_value(provides); lsb_meta_helper_free_value(req_start); lsb_meta_helper_free_value(req_stop); lsb_meta_helper_free_value(shld_start); lsb_meta_helper_free_value(shld_stop); lsb_meta_helper_free_value(dflt_start); lsb_meta_helper_free_value(dflt_stop); crm_trace("Created fake metadata: %llu", (unsigned long long) strlen(*output)); return pcmk_ok; } #if SUPPORT_NAGIOS static int nagios_get_metadata(const char *type, char **output) { int rc = pcmk_ok; FILE *file_strm = NULL; int start = 0, length = 0, read_len = 0; char *metadata_file = NULL; int len = 36; len += strlen(NAGIOS_METADATA_DIR); len += strlen(type); metadata_file = calloc(1, len); CRM_CHECK(metadata_file != NULL, return -ENOMEM); sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type); file_strm = fopen(metadata_file, "r"); if (file_strm == NULL) { crm_err("Metadata file %s does not exist", metadata_file); free(metadata_file); return -EIO; } /* see how big the file is */ start = ftell(file_strm); fseek(file_strm, 0L, SEEK_END); length = ftell(file_strm); fseek(file_strm, 0L, start); CRM_ASSERT(length >= 0); CRM_ASSERT(start == ftell(file_strm)); if (length <= 0) { crm_info("%s was not valid", metadata_file); free(*output); *output = NULL; rc = -EIO; } else { crm_trace("Reading %d bytes from file", length); *output = calloc(1, (length + 1)); read_len = fread(*output, 1, length, file_strm); if (read_len != length) { crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len); free(*output); *output = NULL; rc = -EIO; } } fclose(file_strm); free(metadata_file); return rc; } #endif #if SUPPORT_HEARTBEAT /* strictly speaking, support for class=heartbeat style scripts * does not require "heartbeat support" to be enabled. * But since those scripts are part of the "heartbeat" package usually, * and are very unlikely to be present in any other deployment, * I leave it inside this ifdef. * * Yes, I know, these are legacy and should die, * or at least be rewritten to be a proper OCF style agent. * But they exist, and custom scripts following these rules do, too. * * Taken from the old "glue" lrmd, see * http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l49 * http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l393 */ static const char hb_metadata_template[] = "\n" "\n" "\n" "1.0\n" "\n" "%s" "\n" "%s\n" "\n" "\n" "\n" "This argument will be passed as the first argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[1]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the second argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[2]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the third argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[3]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the fourth argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[4]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the fifth argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[5]\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n"; static int heartbeat_get_metadata(const char *type, char **output) { *output = crm_strdup_printf(hb_metadata_template, type, type, type); crm_trace("Created fake metadata: %llu", (unsigned long long) strlen(*output)); return pcmk_ok; } #endif static int generic_get_metadata(const char *standard, const char *provider, const char *type, char **output) { svc_action_t *action; action = resources_action_create(type, standard, provider, type, "meta-data", 0, 30000, NULL, 0); if (action == NULL) { crm_err("Unable to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EINVAL; } if (!(services_action_sync(action))) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_get_metadata(lrmd_t * lrmd, const char *class, const char *provider, const char *type, char **output, enum lrmd_call_options options) { if (!class || !type) { return -EINVAL; } if (safe_str_eq(class, PCMK_RESOURCE_CLASS_SERVICE)) { class = resources_find_service_class(type); } if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) { return stonith_get_metadata(provider, type, output); } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_LSB)) { return lsb_get_metadata(type, output); #if SUPPORT_NAGIOS } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_NAGIOS)) { return nagios_get_metadata(type, output); #endif #if SUPPORT_HEARTBEAT } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_HB)) { return heartbeat_get_metadata(type, output); #endif } return generic_get_metadata(class, provider, type, output); } static int lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; -#ifdef ENABLE_VERSIONED_ATTRS - const char *versioned_args_key = "#" XML_TAG_VER_ATTRS; -#endif crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { -#ifdef ENABLE_VERSIONED_ATTRS - if (safe_str_eq(tmp->key, versioned_args_key)) { - xmlNode *versioned_args = string2xml(tmp->value); - - if (versioned_args) { - add_node_nocopy(data, NULL, versioned_args); - } - } else { -#endif - hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); -#ifdef ENABLE_VERSIONED_ATTRS - } -#endif + hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if(stonith_api) { stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); } for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) { rc += list_stonith_agents(resources); } else { GListPtr gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { rc += list_stonith_agents(resources); } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static int does_provider_have_agent(const char *agent, const char *provider, const char *class) { int found = 0; GList *agents = NULL; GListPtr gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (safe_str_eq(agent, gIter2->data)) { found = 1; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GListPtr gIter = NULL; ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GListPtr gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } lrmd_t * lrmd_api_new(void) { lrmd_t *new_lrmd = NULL; lrmd_private_t *pvt = NULL; new_lrmd = calloc(1, sizeof(lrmd_t)); pvt = calloc(1, sizeof(lrmd_private_t)); pvt->remote = calloc(1, sizeof(crm_remote_t)); new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t)); pvt->type = CRM_CLIENT_IPC; new_lrmd->private = pvt; new_lrmd->cmds->connect = lrmd_api_connect; new_lrmd->cmds->connect_async = lrmd_api_connect_async; new_lrmd->cmds->is_connected = lrmd_api_is_connected; new_lrmd->cmds->poke_connection = lrmd_api_poke_connection; new_lrmd->cmds->disconnect = lrmd_api_disconnect; new_lrmd->cmds->register_rsc = lrmd_api_register_rsc; new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc; new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info; new_lrmd->cmds->set_callback = lrmd_api_set_callback; new_lrmd->cmds->get_metadata = lrmd_api_get_metadata; new_lrmd->cmds->exec = lrmd_api_exec; new_lrmd->cmds->cancel = lrmd_api_cancel; new_lrmd->cmds->list_agents = lrmd_api_list_agents; new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; new_lrmd->cmds->list_standards = lrmd_api_list_standards; return new_lrmd; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { #ifdef HAVE_GNUTLS_GNUTLS_H lrmd_t *new_lrmd = lrmd_api_new(); lrmd_private_t *native = new_lrmd->private; if (!nodename && !server) { lrmd_api_delete(new_lrmd); return NULL; } native->type = CRM_CLIENT_TLS; native->remote_nodename = nodename ? strdup(nodename) : strdup(server); native->server = server ? strdup(server) : strdup(nodename); native->port = port; if (native->port == 0) { const char *remote_port_str = getenv("PCMK_remote_port"); native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; } return new_lrmd; #else crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created"); return NULL; #endif } void lrmd_api_delete(lrmd_t * lrmd) { if (!lrmd) { return; } lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */ free(lrmd->cmds); if (lrmd->private) { lrmd_private_t *native = lrmd->private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); } free(lrmd->private); free(lrmd); } diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c index be9158b2ca..4561c319d8 100644 --- a/lib/pengine/rules.c +++ b/lib/pengine/rules.c @@ -1,1035 +1,1023 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_rules); crm_time_t *parse_xml_duration(crm_time_t * start, xmlNode * duration_spec); gboolean test_date_expression(xmlNode * time_expr, crm_time_t * now); gboolean cron_range_satisfied(crm_time_t * now, xmlNode * cron_spec); gboolean test_attr_expression(xmlNode * expr, GHashTable * hash, crm_time_t * now); gboolean pe_test_attr_expression_full(xmlNode * expr, GHashTable * hash, crm_time_t * now, pe_match_data_t * match_data); gboolean test_role_expression(xmlNode * expr, enum rsc_role_e role, crm_time_t * now); gboolean test_ruleset(xmlNode * ruleset, GHashTable * node_hash, crm_time_t * now) { gboolean ruleset_default = TRUE; xmlNode *rule = NULL; for (rule = __xml_first_child(ruleset); rule != NULL; rule = __xml_next_element(rule)) { if (crm_str_eq((const char *)rule->name, XML_TAG_RULE, TRUE)) { ruleset_default = FALSE; if (test_rule(rule, node_hash, RSC_ROLE_UNKNOWN, now)) { return TRUE; } } } return ruleset_default; } gboolean test_rule(xmlNode * rule, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now) { return pe_test_rule_full(rule, node_hash, role, now, NULL); } gboolean pe_test_rule_re(xmlNode * rule, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_re_match_data_t * re_match_data) { pe_match_data_t match_data = { .re = re_match_data, .params = NULL, .meta = NULL, }; return pe_test_rule_full(rule, node_hash, role, now, &match_data); } gboolean pe_test_rule_full(xmlNode * rule, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_match_data_t * match_data) { xmlNode *expr = NULL; gboolean test = TRUE; gboolean empty = TRUE; gboolean passed = TRUE; gboolean do_and = TRUE; const char *value = NULL; rule = expand_idref(rule, NULL); value = crm_element_value(rule, XML_RULE_ATTR_BOOLEAN_OP); if (safe_str_eq(value, "or")) { do_and = FALSE; passed = FALSE; } crm_trace("Testing rule %s", ID(rule)); for (expr = __xml_first_child(rule); expr != NULL; expr = __xml_next_element(expr)) { test = pe_test_expression_full(expr, node_hash, role, now, match_data); empty = FALSE; if (test && do_and == FALSE) { crm_trace("Expression %s/%s passed", ID(rule), ID(expr)); return TRUE; } else if (test == FALSE && do_and) { crm_trace("Expression %s/%s failed", ID(rule), ID(expr)); return FALSE; } } if (empty) { crm_err("Invalid Rule %s: rules must contain at least one expression", ID(rule)); } crm_trace("Rule %s %s", ID(rule), passed ? "passed" : "failed"); return passed; } gboolean test_expression(xmlNode * expr, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now) { return pe_test_expression_full(expr, node_hash, role, now, NULL); } gboolean pe_test_expression_re(xmlNode * expr, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_re_match_data_t * re_match_data) { pe_match_data_t match_data = { .re = re_match_data, .params = NULL, .meta = NULL, }; return pe_test_expression_full(expr, node_hash, role, now, &match_data); } gboolean pe_test_expression_full(xmlNode * expr, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_match_data_t * match_data) { gboolean accept = FALSE; const char *uname = NULL; switch (find_expression_type(expr)) { case nested_rule: accept = pe_test_rule_full(expr, node_hash, role, now, match_data); break; case attr_expr: case loc_expr: /* these expressions can never succeed if there is * no node to compare with */ if (node_hash != NULL) { accept = pe_test_attr_expression_full(expr, node_hash, now, match_data); } break; case time_expr: accept = test_date_expression(expr, now); break; case role_expr: accept = test_role_expression(expr, role, now); break; #ifdef ENABLE_VERSIONED_ATTRS case version_expr: if (node_hash && g_hash_table_lookup_extended(node_hash, "#ra-version", NULL, NULL)) { accept = test_attr_expression(expr, node_hash, now); } else { // we are going to test it when we have ra-version accept = TRUE; } break; #endif default: CRM_CHECK(FALSE /* bad type */ , return FALSE); accept = FALSE; } if (node_hash) { uname = g_hash_table_lookup(node_hash, "#uname"); } crm_trace("Expression %s %s on %s", ID(expr), accept ? "passed" : "failed", uname ? uname : "all nodes"); return accept; } enum expression_type find_expression_type(xmlNode * expr) { const char *tag = NULL; const char *attr = NULL; attr = crm_element_value(expr, XML_EXPR_ATTR_ATTRIBUTE); tag = crm_element_name(expr); if (safe_str_eq(tag, "date_expression")) { return time_expr; } else if (safe_str_eq(tag, XML_TAG_RULE)) { return nested_rule; } else if (safe_str_neq(tag, "expression")) { return not_expr; } else if (safe_str_eq(attr, "#uname") || safe_str_eq(attr, "#kind") || safe_str_eq(attr, "#id")) { return loc_expr; } else if (safe_str_eq(attr, "#role")) { return role_expr; #ifdef ENABLE_VERSIONED_ATTRS } else if (safe_str_eq(attr, "#ra-version")) { return version_expr; #endif } return attr_expr; } gboolean test_role_expression(xmlNode * expr, enum rsc_role_e role, crm_time_t * now) { gboolean accept = FALSE; const char *op = NULL; const char *value = NULL; if (role == RSC_ROLE_UNKNOWN) { return accept; } value = crm_element_value(expr, XML_EXPR_ATTR_VALUE); op = crm_element_value(expr, XML_EXPR_ATTR_OPERATION); if (safe_str_eq(op, "defined")) { if (role > RSC_ROLE_STARTED) { accept = TRUE; } } else if (safe_str_eq(op, "not_defined")) { if (role < RSC_ROLE_SLAVE && role > RSC_ROLE_UNKNOWN) { accept = TRUE; } } else if (safe_str_eq(op, "eq")) { if (text2role(value) == role) { accept = TRUE; } } else if (safe_str_eq(op, "ne")) { /* we will only test "ne" wtih master/slave roles style */ if (role < RSC_ROLE_SLAVE && role > RSC_ROLE_UNKNOWN) { accept = FALSE; } else if (text2role(value) != role) { accept = TRUE; } } return accept; } gboolean test_attr_expression(xmlNode * expr, GHashTable * hash, crm_time_t * now) { return pe_test_attr_expression_full(expr, hash, now, NULL); } gboolean pe_test_attr_expression_full(xmlNode * expr, GHashTable * hash, crm_time_t * now, pe_match_data_t * match_data) { gboolean accept = FALSE; gboolean attr_allocated = FALSE; int cmp = 0; const char *h_val = NULL; GHashTable *table = NULL; const char *op = NULL; const char *type = NULL; const char *attr = NULL; const char *value = NULL; const char *value_source = NULL; attr = crm_element_value(expr, XML_EXPR_ATTR_ATTRIBUTE); op = crm_element_value(expr, XML_EXPR_ATTR_OPERATION); value = crm_element_value(expr, XML_EXPR_ATTR_VALUE); type = crm_element_value(expr, XML_EXPR_ATTR_TYPE); value_source = crm_element_value(expr, XML_EXPR_ATTR_VALUE_SOURCE); if (attr == NULL || op == NULL) { pe_err("Invalid attribute or operation in expression" " (\'%s\' \'%s\' \'%s\')", crm_str(attr), crm_str(op), crm_str(value)); return FALSE; } if (match_data) { if (match_data->re) { char *resolved_attr = pe_expand_re_matches(attr, match_data->re); if (resolved_attr) { attr = (const char *) resolved_attr; attr_allocated = TRUE; } } if (safe_str_eq(value_source, "param")) { table = match_data->params; } else if (safe_str_eq(value_source, "meta")) { table = match_data->meta; } } if (table) { const char *param_name = value; const char *param_value = NULL; if (param_name && param_name[0]) { if ((param_value = (const char *)g_hash_table_lookup(table, param_name))) { value = param_value; } } } if (hash != NULL) { h_val = (const char *)g_hash_table_lookup(hash, attr); } if (attr_allocated) { free((char *)attr); attr = NULL; } if (value != NULL && h_val != NULL) { if (type == NULL) { if (safe_str_eq(op, "lt") || safe_str_eq(op, "lte") || safe_str_eq(op, "gt") || safe_str_eq(op, "gte")) { type = "number"; } else { type = "string"; } crm_trace("Defaulting to %s based comparison for '%s' op", type, op); } if (safe_str_eq(type, "string")) { cmp = strcasecmp(h_val, value); } else if (safe_str_eq(type, "number")) { int h_val_f = crm_parse_int(h_val, NULL); int value_f = crm_parse_int(value, NULL); if (h_val_f < value_f) { cmp = -1; } else if (h_val_f > value_f) { cmp = 1; } else { cmp = 0; } } else if (safe_str_eq(type, "version")) { cmp = compare_version(h_val, value); } } else if (value == NULL && h_val == NULL) { cmp = 0; } else if (value == NULL) { cmp = 1; } else { cmp = -1; } if (safe_str_eq(op, "defined")) { if (h_val != NULL) { accept = TRUE; } } else if (safe_str_eq(op, "not_defined")) { if (h_val == NULL) { accept = TRUE; } } else if (safe_str_eq(op, "eq")) { if ((h_val == value) || cmp == 0) { accept = TRUE; } } else if (safe_str_eq(op, "ne")) { if ((h_val == NULL && value != NULL) || (h_val != NULL && value == NULL) || cmp != 0) { accept = TRUE; } } else if (value == NULL || h_val == NULL) { /* the comparision is meaningless from this point on */ accept = FALSE; } else if (safe_str_eq(op, "lt")) { if (cmp < 0) { accept = TRUE; } } else if (safe_str_eq(op, "lte")) { if (cmp <= 0) { accept = TRUE; } } else if (safe_str_eq(op, "gt")) { if (cmp > 0) { accept = TRUE; } } else if (safe_str_eq(op, "gte")) { if (cmp >= 0) { accept = TRUE; } } return accept; } /* As per the nethack rules: * * moon period = 29.53058 days ~= 30, year = 365.2422 days * days moon phase advances on first day of year compared to preceding year * = 365.2422 - 12*29.53058 ~= 11 * years in Metonic cycle (time until same phases fall on the same days of * the month) = 18.6 ~= 19 * moon phase on first day of year (epact) ~= (11*(year%19) + 29) % 30 * (29 as initial condition) * current phase in days = first day phase + days elapsed in year * 6 moons ~= 177 days * 177 ~= 8 reported phases * 22 * + 11/22 for rounding * * 0-7, with 0: new, 4: full */ static int phase_of_the_moon(crm_time_t * now) { uint32_t epact, diy, goldn; uint32_t y; crm_time_get_ordinal(now, &y, &diy); goldn = (y % 19) + 1; epact = (11 * goldn + 18) % 30; if ((epact == 25 && goldn > 11) || epact == 24) epact++; return ((((((diy + epact) * 6) + 11) % 177) / 22) & 7); } static gboolean decodeNVpair(const char *srcstring, char separator, char **name, char **value) { int lpc = 0; int len = 0; const char *temp = NULL; CRM_ASSERT(name != NULL && value != NULL); *name = NULL; *value = NULL; crm_trace("Attempting to decode: [%s]", srcstring); if (srcstring != NULL) { len = strlen(srcstring); while (lpc <= len) { if (srcstring[lpc] == separator) { *name = calloc(1, lpc + 1); if (*name == NULL) { break; /* and return FALSE */ } memcpy(*name, srcstring, lpc); (*name)[lpc] = '\0'; /* this sucks but as the strtok manpage says.. * it *is* a bug */ len = len - lpc; len--; if (len <= 0) { *value = NULL; } else { *value = calloc(1, len + 1); if (*value == NULL) { break; /* and return FALSE */ } temp = srcstring + lpc + 1; memcpy(*value, temp, len); (*value)[len] = '\0'; } return TRUE; } lpc++; } } if (*name != NULL) { free(*name); *name = NULL; } *name = NULL; *value = NULL; return FALSE; } #define cron_check(xml_field, time_field) \ value = crm_element_value(cron_spec, xml_field); \ if(value != NULL) { \ gboolean pass = TRUE; \ decodeNVpair(value, '-', &value_low, &value_high); \ if(value_low == NULL) { \ value_low = strdup(value); \ } \ value_low_i = crm_parse_int(value_low, "0"); \ value_high_i = crm_parse_int(value_high, "-1"); \ if(value_high_i < 0) { \ if(value_low_i != time_field) { \ pass = FALSE; \ } \ } else if(value_low_i > time_field) { \ pass = FALSE; \ } else if(value_high_i < time_field) { \ pass = FALSE; \ } \ free(value_low); \ free(value_high); \ if(pass == FALSE) { \ crm_debug("Condition '%s' in %s: failed", value, xml_field); \ return pass; \ } \ crm_debug("Condition '%s' in %s: passed", value, xml_field); \ } gboolean cron_range_satisfied(crm_time_t * now, xmlNode * cron_spec) { const char *value = NULL; char *value_low = NULL; char *value_high = NULL; int value_low_i = 0; int value_high_i = 0; uint32_t h, m, s, y, d, w; CRM_CHECK(now != NULL, return FALSE); crm_time_get_timeofday(now, &h, &m, &s); cron_check("seconds", s); cron_check("minutes", m); cron_check("hours", h); crm_time_get_gregorian(now, &y, &m, &d); cron_check("monthdays", d); cron_check("months", m); cron_check("years", y); crm_time_get_ordinal(now, &y, &d); cron_check("yeardays", d); crm_time_get_isoweek(now, &y, &w, &d); cron_check("weekyears", y); cron_check("weeks", w); cron_check("weekdays", d); cron_check("moon", phase_of_the_moon(now)); return TRUE; } #define update_field(xml_field, time_fn) \ value = crm_element_value(duration_spec, xml_field); \ if(value != NULL) { \ int value_i = crm_parse_int(value, "0"); \ time_fn(end, value_i); \ } crm_time_t * parse_xml_duration(crm_time_t * start, xmlNode * duration_spec) { crm_time_t *end = NULL; const char *value = NULL; end = crm_time_new(NULL); crm_time_set(end, start); update_field("years", crm_time_add_years); update_field("months", crm_time_add_months); update_field("weeks", crm_time_add_weeks); update_field("days", crm_time_add_days); update_field("hours", crm_time_add_hours); update_field("minutes", crm_time_add_minutes); update_field("seconds", crm_time_add_seconds); return end; } gboolean test_date_expression(xmlNode * time_expr, crm_time_t * now) { crm_time_t *start = NULL; crm_time_t *end = NULL; const char *value = NULL; const char *op = crm_element_value(time_expr, "operation"); xmlNode *duration_spec = NULL; xmlNode *date_spec = NULL; gboolean passed = FALSE; crm_trace("Testing expression: %s", ID(time_expr)); duration_spec = first_named_child(time_expr, "duration"); date_spec = first_named_child(time_expr, "date_spec"); value = crm_element_value(time_expr, "start"); if (value != NULL) { start = crm_time_new(value); } value = crm_element_value(time_expr, "end"); if (value != NULL) { end = crm_time_new(value); } if (start != NULL && end == NULL && duration_spec != NULL) { end = parse_xml_duration(start, duration_spec); } if (op == NULL) { op = "in_range"; } if (safe_str_eq(op, "date_spec") || safe_str_eq(op, "in_range")) { if (start != NULL && crm_time_compare(start, now) > 0) { passed = FALSE; } else if (end != NULL && crm_time_compare(end, now) < 0) { passed = FALSE; } else if (safe_str_eq(op, "in_range")) { passed = TRUE; } else { passed = cron_range_satisfied(now, date_spec); } } else if (safe_str_eq(op, "gt") && crm_time_compare(start, now) < 0) { passed = TRUE; } else if (safe_str_eq(op, "lt") && crm_time_compare(end, now) > 0) { passed = TRUE; } else if (safe_str_eq(op, "eq") && crm_time_compare(start, now) == 0) { passed = TRUE; } else if (safe_str_eq(op, "neq") && crm_time_compare(start, now) != 0) { passed = TRUE; } crm_time_free(start); crm_time_free(end); return passed; } typedef struct sorted_set_s { int score; const char *name; const char *special_name; xmlNode *attr_set; } sorted_set_t; static gint sort_pairs(gconstpointer a, gconstpointer b) { const sorted_set_t *pair_a = a; const sorted_set_t *pair_b = b; if (a == NULL && b == NULL) { return 0; } else if (a == NULL) { return 1; } else if (b == NULL) { return -1; } if (safe_str_eq(pair_a->name, pair_a->special_name)) { return -1; } else if (safe_str_eq(pair_b->name, pair_a->special_name)) { return 1; } if (pair_a->score < pair_b->score) { return 1; } else if (pair_a->score > pair_b->score) { return -1; } return 0; } static void populate_hash(xmlNode * nvpair_list, GHashTable * hash, gboolean overwrite, xmlNode * top) { const char *name = NULL; const char *value = NULL; const char *old_value = NULL; xmlNode *list = nvpair_list; xmlNode *an_attr = NULL; name = crm_element_name(list->children); if (safe_str_eq(XML_TAG_ATTRS, name)) { list = list->children; } for (an_attr = __xml_first_child(list); an_attr != NULL; an_attr = __xml_next_element(an_attr)) { if (crm_str_eq((const char *)an_attr->name, XML_CIB_TAG_NVPAIR, TRUE)) { xmlNode *ref_nvpair = expand_idref(an_attr, top); name = crm_element_value(an_attr, XML_NVPAIR_ATTR_NAME); if (name == NULL) { name = crm_element_value(ref_nvpair, XML_NVPAIR_ATTR_NAME); } crm_trace("Setting attribute: %s", name); value = crm_element_value(an_attr, XML_NVPAIR_ATTR_VALUE); if (value == NULL) { value = crm_element_value(ref_nvpair, XML_NVPAIR_ATTR_VALUE); } if (name == NULL || value == NULL) { continue; } old_value = g_hash_table_lookup(hash, name); if (safe_str_eq(value, "#default")) { if (old_value) { crm_trace("Removing value for %s (%s)", name, value); g_hash_table_remove(hash, name); } continue; } else if (old_value == NULL) { g_hash_table_insert(hash, strdup(name), strdup(value)); } else if (overwrite) { crm_debug("Overwriting value of %s: %s -> %s", name, old_value, value); g_hash_table_replace(hash, strdup(name), strdup(value)); } } } } #ifdef ENABLE_VERSIONED_ATTRS static xmlNode* get_versioned_rule(xmlNode * attr_set) { xmlNode * rule = NULL; xmlNode * expr = NULL; for (rule = __xml_first_child(attr_set); rule != NULL; rule = __xml_next_element(rule)) { if (crm_str_eq((const char *)rule->name, XML_TAG_RULE, TRUE)) { for (expr = __xml_first_child(rule); expr != NULL; expr = __xml_next_element(expr)) { if (find_expression_type(expr) == version_expr) { return rule; } } } } return NULL; } -static gboolean -versioned_attr(xmlNode * versioned_attrs, const char * name) -{ - xmlNode *attrs = NULL; - xmlNode *attr = NULL; - - if (!name) { - return FALSE; - } - - for (attrs = __xml_first_child(versioned_attrs); attrs != NULL; attrs = __xml_next_element(attrs)) { - for (attr = __xml_first_child(attrs); attr != NULL; attr = __xml_next_element(attr)) { - if (safe_str_eq(crm_element_value(attr, XML_NVPAIR_ATTR_NAME), name)) { - return TRUE; - } - } - } - - return FALSE; -} - static void add_versioned_attributes(xmlNode * attr_set, xmlNode * versioned_attrs) { xmlNode *attr_set_copy = NULL; xmlNode *rule = NULL; + xmlNode *expr = NULL; - CRM_CHECK(versioned_attrs != NULL, return); + if (!attr_set || !versioned_attrs) { + return; + } attr_set_copy = copy_xml(attr_set); rule = get_versioned_rule(attr_set_copy); - if (rule) { - xmlNode *expr = __xml_first_child(rule); - - while (expr != NULL) { - if (find_expression_type(expr) != version_expr) { - xmlNode *node = expr; - expr = __xml_next_element(expr); - free_xml(node); - } else { - expr = __xml_next_element(expr); - } - } - } else { - xmlNode *attr = __xml_first_child(attr_set_copy); - - while (attr != NULL) { - if (safe_str_eq((const char*) attr->name, XML_TAG_RULE)) { - free_xml(attr_set_copy); - return; - } else if (!versioned_attr(versioned_attrs, crm_element_value(attr, XML_NVPAIR_ATTR_NAME))) { - xmlNode *node = attr; - attr = __xml_next_element(attr); - free_xml(node); - } else { - attr = __xml_next_element(attr); - } - } + if (!rule) { + free_xml(attr_set_copy); + return; + } + + expr = __xml_first_child(rule); + while (expr != NULL) { + if (find_expression_type(expr) != version_expr) { + xmlNode *node = expr; - if (!xml_has_children(attr_set_copy)) { - free_xml(attr_set_copy); - return; + expr = __xml_next_element(expr); + free_xml(node); + } else { + expr = __xml_next_element(expr); } } add_node_nocopy(versioned_attrs, NULL, attr_set_copy); } #endif typedef struct unpack_data_s { gboolean overwrite; GHashTable *node_hash; void *hash; crm_time_t *now; xmlNode *top; } unpack_data_t; static void unpack_attr_set(gpointer data, gpointer user_data) { sorted_set_t *pair = data; unpack_data_t *unpack_data = user_data; if (test_ruleset(pair->attr_set, unpack_data->node_hash, unpack_data->now) == FALSE) { return; } #ifdef ENABLE_VERSIONED_ATTRS if (get_versioned_rule(pair->attr_set) && !(unpack_data->node_hash && g_hash_table_lookup_extended(unpack_data->node_hash, "#ra-version", NULL, NULL))) { // we haven't actually tested versioned expressions yet return; } #endif crm_trace("Adding attributes from %s", pair->name); populate_hash(pair->attr_set, unpack_data->hash, unpack_data->overwrite, unpack_data->top); } #ifdef ENABLE_VERSIONED_ATTRS static void unpack_versioned_attr_set(gpointer data, gpointer user_data) { sorted_set_t *pair = data; unpack_data_t *unpack_data = user_data; if (test_ruleset(pair->attr_set, unpack_data->node_hash, unpack_data->now) == FALSE) { return; } add_versioned_attributes(pair->attr_set, unpack_data->hash); } #endif static GListPtr make_pairs_and_populate_data(xmlNode * top, xmlNode * xml_obj, const char *set_name, GHashTable * node_hash, void * hash, const char *always_first, gboolean overwrite, crm_time_t * now, unpack_data_t * data) { GListPtr unsorted = NULL; const char *score = NULL; sorted_set_t *pair = NULL; xmlNode *attr_set = NULL; if (xml_obj == NULL) { crm_trace("No instance attributes"); return NULL; } crm_trace("Checking for attributes"); for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) { /* Uncertain if set_name == NULL check is strictly necessary here */ if (set_name == NULL || crm_str_eq((const char *)attr_set->name, set_name, TRUE)) { pair = NULL; attr_set = expand_idref(attr_set, top); if (attr_set == NULL) { continue; } pair = calloc(1, sizeof(sorted_set_t)); pair->name = ID(attr_set); pair->special_name = always_first; pair->attr_set = attr_set; score = crm_element_value(attr_set, XML_RULE_ATTR_SCORE); pair->score = char2score(score); unsorted = g_list_prepend(unsorted, pair); } } if (pair != NULL) { data->hash = hash; data->node_hash = node_hash; data->now = now; data->overwrite = overwrite; data->top = top; } if (unsorted) { return g_list_sort(unsorted, sort_pairs); } return NULL; } void unpack_instance_attributes(xmlNode * top, xmlNode * xml_obj, const char *set_name, GHashTable * node_hash, GHashTable * hash, const char *always_first, gboolean overwrite, crm_time_t * now) { unpack_data_t data; GListPtr pairs = make_pairs_and_populate_data(top, xml_obj, set_name, node_hash, hash, always_first, overwrite, now, &data); if (pairs) { g_list_foreach(pairs, unpack_attr_set, &data); g_list_free_full(pairs, free); } } #ifdef ENABLE_VERSIONED_ATTRS void pe_unpack_versioned_attributes(xmlNode * top, xmlNode * xml_obj, const char *set_name, GHashTable * node_hash, xmlNode * hash, crm_time_t * now) { unpack_data_t data; GListPtr pairs = make_pairs_and_populate_data(top, xml_obj, set_name, node_hash, hash, NULL, FALSE, now, &data); if (pairs) { g_list_foreach(pairs, unpack_versioned_attr_set, &data); g_list_free_full(pairs, free); } } #endif char * pe_expand_re_matches(const char *string, pe_re_match_data_t *match_data) { size_t len = 0; int i; const char *p, *last_match_index; char *p_dst, *result = NULL; if (!string || string[0] == '\0' || !match_data) { return NULL; } p = last_match_index = string; while (*p) { if (*p == '%' && *(p + 1) && isdigit(*(p + 1))) { i = *(p + 1) - '0'; if (match_data->nregs >= i && match_data->pmatch[i].rm_so != -1 && match_data->pmatch[i].rm_eo > match_data->pmatch[i].rm_so) { len += p - last_match_index + (match_data->pmatch[i].rm_eo - match_data->pmatch[i].rm_so); last_match_index = p + 2; } p++; } p++; } len += p - last_match_index + 1; /* FIXME: Excessive? */ if (len - 1 <= 0) { return NULL; } p_dst = result = calloc(1, len); p = string; while (*p) { if (*p == '%' && *(p + 1) && isdigit(*(p + 1))) { i = *(p + 1) - '0'; if (match_data->nregs >= i && match_data->pmatch[i].rm_so != -1 && match_data->pmatch[i].rm_eo > match_data->pmatch[i].rm_so) { /* rm_eo can be equal to rm_so, but then there is nothing to do */ int match_len = match_data->pmatch[i].rm_eo - match_data->pmatch[i].rm_so; memcpy(p_dst, match_data->string + match_data->pmatch[i].rm_so, match_len); p_dst += match_len; } p++; } else { *(p_dst) = *(p); p_dst++; } p++; } return result; } + +GHashTable* +pe_unpack_versioned_parameters(xmlNode *versioned_params, const char *ra_version) +{ + GHashTable *hash = g_hash_table_new_full(crm_str_hash, g_str_equal, + g_hash_destroy_str, g_hash_destroy_str); + + if (versioned_params && ra_version) { + GHashTable *node_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, + g_hash_destroy_str, g_hash_destroy_str); + xmlNode *attr_set = __xml_first_child(versioned_params); + + if (attr_set) { + g_hash_table_insert(node_hash, strdup("#" XML_ATTR_RA_VERSION), strdup(ra_version)); + unpack_instance_attributes(NULL, versioned_params, crm_element_name(attr_set), + node_hash, hash, NULL, FALSE, NULL); + } + + g_hash_table_destroy(node_hash); + } + + return hash; +} diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index ff142f078d..81b0291a43 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,1960 +1,1975 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include pe_working_set_t *pe_dataset = NULL; extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set); static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled); /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return TRUE if node can be fenced, FALSE otherwise * * \note This function should only be called for cluster nodes and baremetal * remote nodes; guest nodes are fenced by stopping their container * resource, so fence execution requirements do not apply to them. */ bool pe_can_fence(pe_working_set_t * data_set, node_t *node) { if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { return FALSE; /* Turned off */ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { return FALSE; /* No devices */ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { return TRUE; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return TRUE; } else if(node == NULL) { return FALSE; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return TRUE; } crm_trace("Cannot fence %s", node->details->uname); return FALSE; } node_t * node_copy(const node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); new_node = calloc(1, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = merge_weights(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } GHashTable * node_hash_from_list(GListPtr list) { GListPtr gIter = list; GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *n = node_copy(node); g_hash_table_insert(result, (gpointer) n->details->id, n); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; GListPtr gIter = list1; for (; gIter != NULL; gIter = gIter->next) { node_t *new_node = NULL; node_t *this_node = (node_t *) gIter->data; if (filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if (reset) { new_node->weight = 0; } if (new_node != NULL) { result = g_list_prepend(result, new_node); } } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { const node_t *node_a = a; const node_t *node_b = b; return strcmp(node_a->details->uname, node_b->details->uname); } void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes) { GHashTable *hash = nodes; GHashTableIter iter; node_t *node = NULL; if (rsc) { hash = rsc->allowed_nodes; } if (rsc && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't show the allocation scores for orphans */ return; } if (level == 0) { char score[128]; int len = sizeof(score); /* For now we want this in sorted order to keep the regression tests happy */ GListPtr gIter = NULL; GListPtr list = g_hash_table_get_values(hash); list = g_list_sort(list, sort_node_uname); gIter = list; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } } g_list_free(list); } else if (hash) { char score[128]; int len = sizeof(score); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { do_crm_log_alias(LOG_TRACE, file, function, line, "%s: %s allocation score on %s: %s", comment, rsc->id, node->details->uname, score); } else { do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment, node->details->uname, score); } } } if (rsc && rsc->children) { GListPtr gIter = NULL; gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; dump_node_scores_worker(level, file, function, line, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; int len = 0; char *new_text = NULL; len = strlen(*dump_text) + strlen(" ") + strlen(key) + strlen("=") + strlen(value) + 1; new_text = calloc(1, len); sprintf(new_text, "%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(node->details->uname) + strlen(" capacity:") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(rsc->id) + strlen(" utilization on ") + strlen(node->details->uname) + strlen(":") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, free(key); return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } else if(save_action) { #if 0 action = g_hash_table_lookup(data_set->singletons, key); #else /* More expensive but takes 'node' into account */ possible_matches = find_actions(data_set->actions, key, on_node); #endif } if(data_set->singletons == NULL) { data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); } if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); pe_rsc_trace(rsc, "Found existing action (%d) %s for %s on %s", action->id, task, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { pe_rsc_trace(rsc, "Creating%s action %d: %s for %s on %s %d", optional ? "" : " mandatory", data_set->action_id, key, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", optional); } action = calloc(1, sizeof(action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = node_copy(on_node); } action->uuid = strdup(key); pe_set_action_bit(action, pe_action_runnable); if (optional) { pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); } else { pe_clear_action_bit(action, pe_action_optional); pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); } /* Implied by calloc()... action->actions_before = NULL; action->actions_after = NULL; action->pseudo = FALSE; action->dumped = FALSE; action->processed = FALSE; action->seen_count = 0; */ action->extra = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); action->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); if(rsc == NULL) { g_hash_table_insert(data_set->singletons, action->uuid, action); } } if (rsc != NULL) { action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); unpack_operation(action, action->op_entry, rsc->container, data_set); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { pe_rsc_trace(rsc, "Action %d created", action->id); } } if (optional == FALSE) { pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); pe_clear_action_bit(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_TRACE; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_set_action_bit(action, pe_action_have_node_attrs); unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid); pe_clear_action_bit(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); /* action->runnable = FALSE; */ } else if (action->node->details->online == FALSE && (!is_container_remote_node(action->node) || action->node->details->remote_requires_reset)) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { pe_fence_node(data_set, action->node, "because node is unclean"); } } else if (action->node->details->pending) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid); pe_set_action_bit(action, pe_action_runnable); #if 0 /* * No point checking this * - if we don't have quorum we can't stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_stop) { pe_clear_action_bit(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role)); if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) { pe_clear_action_bit(action, pe_action_runnable); pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { pe_rsc_trace(rsc, "Action %s is runnable", action->uuid); pe_set_action_bit(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } static const char * unpack_operation_on_fail(action_t * action) { const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) { crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id); return NULL; } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) { /* demote on_fail defaults to master monitor value if present */ xmlNode *operation = NULL; const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval = NULL; const char *enabled = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = __xml_first_child(action->rsc->ops_xml); operation && !value; operation = __xml_next_element(operation)) { if (!crm_str_eq((const char *)operation->name, "op", TRUE)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); enabled = crm_element_value(operation, "enabled"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (enabled && !crm_is_true(enabled)) { continue; } else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) { continue; } else if (crm_get_interval(interval) <= 0) { continue; } value = on_fail; } } return value; } static xmlNode * find_min_interval_mon(resource_t * rsc, gboolean include_disabled) { int number = 0; int min_interval = -1; const char *name = NULL; const char *value = NULL; const char *interval = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } if (safe_str_neq(name, RSC_STATUS)) { continue; } number = crm_get_interval(interval); if (number < 0) { continue; } if (min_interval < 0 || number < min_interval) { min_interval = number; op = operation; } } } return op; } void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set) { int value_i = 0; unsigned long long interval = 0; unsigned long long start_delay = 0; char *value_ms = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); if (xml_obj) { xmlAttrPtr xIter = NULL; for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); g_hash_table_remove(action->meta, "id"); field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { interval = crm_get_interval(value); if (interval > 0) { value_ms = crm_itoa(interval); g_hash_table_replace(action->meta, strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } /* Begin compatibility code ("requires" set on start action not resource) */ value = g_hash_table_lookup(action->meta, "requires"); if (safe_str_neq(action->task, RSC_START) && safe_str_neq(action->task, RSC_PROMOTE)) { action->needs = rsc_req_nothing; value = "nothing (not start/promote)"; } else if (safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if (safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if (safe_str_eq(value, "unfencing")) { action->needs = rsc_req_stonith; set_bit(action->rsc->flags, pe_rsc_needs_unfencing); if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires unfencing but fencing is disabled", action->rsc->id); } } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires fencing but fencing is disabled", action->rsc->id); } /* End compatibility code */ } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing (resource)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum (resource)"; } else { action->needs = rsc_req_nothing; value = "nothing (resource)"; } pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->task, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (safe_str_eq(value, "block")) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); } else if (safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (safe_str_eq(value, "standby")) { action->on_fail = action_fail_standby; value = "node standby"; } else if (safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (safe_str_eq(value, "restart-container")) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* for baremetal remote nodes, ensure that any failure that results in * dropping an active connection to a remote node results in fencing of * the remote node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) && (is_rsc_baremetal_remote_node(action->rsc, data_set) && !(safe_str_eq(action->task, CRMD_ACTION_STATUS) && interval == 0) && (safe_str_neq(action->task, CRMD_ACTION_START)))) { if (!is_set(action->rsc->flags, pe_rsc_managed)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop unmanaged baremetal remote node (enforcing default)"; } else { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence baremetal remote node (default)"; } else { value = "recover baremetal remote node connection (default)"; } if (action->rsc->remote_reconnect_interval) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } else if (interval > 0 && g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN)) { crm_time_t *origin = NULL; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); origin = crm_time_new(value); if (origin == NULL) { crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s", ID(xml_obj), value); } else { crm_time_t *delay = NULL; int rc = crm_time_compare(origin, data_set->now); long long delay_s = 0; int interval_s = (interval / 1000); crm_trace("Origin: %s, interval: %d", value, interval_s); /* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */ while(rc > 0) { crm_time_add_seconds(origin, -interval_s); rc = crm_time_compare(origin, data_set->now); } /* Now find the first "multiple" that occurs after 'now' */ while (rc < 0) { crm_time_add_seconds(origin, interval_s); rc = crm_time_compare(origin, data_set->now); } delay = crm_time_calculate_duration(origin, data_set->now); crm_time_log(LOG_TRACE, "origin", origin, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "now", data_set->now, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration); delay_s = crm_time_get_seconds(delay); CRM_CHECK(delay_s >= 0, delay_s = 0); start_delay = delay_s * 1000; crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj)); g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); crm_time_free(origin); crm_time_free(delay); } } field = XML_ATTR_TIMEOUT; value = g_hash_table_lookup(action->meta, field); if (value == NULL && xml_obj == NULL && safe_str_eq(action->task, RSC_STATUS) && interval == 0) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); pe_rsc_trace(action->rsc, "\t%s uses the timeout value '%s' from the minimum interval monitor", action->uuid, value); } } if (value == NULL) { value = pe_pref(data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } static xmlNode * find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled) { unsigned long long number = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } number = crm_get_interval(interval); match_key = generate_op_key(rsc->id, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = generate_op_key(rsc->clone_name, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = generate_op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = generate_op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } void print_node(const char *pre_text, node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } CRM_ASSERT(node->details); crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details) { long options = pe_print_log | pe_print_pending; if (rsc == NULL) { do_crm_log(log_level - 1, "%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* action_wrapper_t* */ g_list_free_full(action->actions_after, free); /* action_wrapper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } free(action->cancel_task); free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if (value == NULL) { /* skip */ } else if (safe_str_eq(value, "0")) { /* skip */ } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; break; default: break; } } return task; } action_t * find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (uuid != NULL && safe_str_neq(uuid, action->uuid)) { continue; } else if (task != NULL && safe_str_neq(task, action->task)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (safe_str_neq(key, action->uuid)) { crm_trace("%s does not match action %s", key, action->uuid); continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, on_node->details->uname); action->node = node_copy(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { crm_trace("Action %s on %s matches", key, on_node->details->uname); result = g_list_prepend(result, action); } else { crm_trace("Action %s on node %s does not match requested node %s", key, action->node->details->uname, on_node->details->uname); } } return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { crm_trace("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if (on_node == NULL || action->node == NULL) { crm_trace("on_node=%p, action->node=%p", on_node, action->node); continue; } else if (safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_prepend(result, action); } crm_trace("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); } return result; } static void resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag) { node_t *match = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = node_copy(node); match->weight = merge_weights(score, node->weight); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node_iter = (node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); if (safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_const_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_const_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ int last_a = -1; int last_b = -1; crm_element_value_const_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_const_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %d vs %d", last_a, last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; int dummy = -1; const char *a_magic = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if(!decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic a"); } if(!decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (ie. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } gboolean get_target_role(resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (value == NULL || safe_str_eq("started", value) || safe_str_eq("default", value)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (uber_parent(rsc)->variant == pe_master) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { crm_config_err("%s is not part of a master/slave resource, a %s of '%s' makes no sense", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... its happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *after = (action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { action_t *op = NULL; if(data_set->singletons) { op = g_hash_table_lookup(data_set->singletons, name); } if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_pseudo); set_bit(op->flags, pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { ticket_t *ticket = NULL; if (ticket_id == NULL || strlen(ticket_id) == 0) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } static void filter_parameters(xmlNode * param_set, const char *param_string, bool need_present) { int len = 0; char *name = NULL; char *match = NULL; if (param_set == NULL) { return; } if (param_set) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; xIter = xIter->next; name = NULL; len = strlen(prop_name) + 3; name = malloc(len); if(name) { sprintf(name, " %s ", prop_name); name[len - 1] = 0; match = strstr(param_string, name); } if (need_present && match == NULL) { crm_trace("%s not found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } else if (need_present == FALSE && match) { crm_trace("%s found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } free(name); } } } bool fix_remote_addr(resource_t * rsc) { const char *name; const char *value; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; const char *value_list[] = { "remote", "ocf", "pacemaker" }; name = "addr"; value = g_hash_table_lookup(rsc->parameters, name); if (safe_str_eq(value, "#uname") == FALSE) { return FALSE; } for (int lpc = 0; rsc && lpc < DIMOF(attr_list); lpc++) { name = attr_list[lpc]; value = crm_element_value(rsc->xml, attr_list[lpc]); if (safe_str_eq(value, value_list[lpc]) == FALSE) { return FALSE; } } return TRUE; } +static void +append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNode *params) +{ + GHashTable *hash = pe_unpack_versioned_parameters(versioned_params, ra_version); + char *key = NULL; + char *value = NULL; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, hash); + while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { + crm_xml_add(params, key, value); + } + g_hash_table_destroy(hash); +} + op_digest_cache_t * rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; GHashTable *local_rsc_params = NULL; #ifdef ENABLE_VERSIONED_ATTRS xmlNode *local_versioned_params = NULL; #endif action_t *action = NULL; char *key = NULL; int interval = 0; const char *op_id = ID(xml_op); const char *interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_all; const char *digest_restart; const char *secure_list; const char *restart_list; const char *op_version; + const char *ra_version; data = g_hash_table_lookup(node->details->digest_cache, op_id); if (data) { return data; } data = calloc(1, sizeof(op_digest_cache_t)); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); + ra_version = crm_element_value(xml_op, XML_ATTR_RA_VERSION); /* key is freed in custom_action */ interval = crm_parse_int(interval_s, "0"); key = generate_op_key(rsc->id, task, interval); action = custom_action(rsc, key, task, node, TRUE, FALSE, data_set); key = NULL; local_rsc_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(local_rsc_params, rsc, node, data_set); #ifdef ENABLE_VERSIONED_ATTRS local_versioned_params = create_xml_node(NULL, XML_TAG_VER_ATTRS); pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set); #endif data->params_all = create_xml_node(NULL, XML_TAG_PARAMS); if(fix_remote_addr(rsc) && node) { // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside crm_xml_add(data->params_all, "addr", node->details->uname); crm_trace("Fixing addr for %s on %s", rsc->id, node->details->uname); } g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); g_hash_table_foreach(action->extra, hash2field, data->params_all); g_hash_table_foreach(rsc->parameters, hash2field, data->params_all); g_hash_table_foreach(action->meta, hash2metafield, data->params_all); + append_versioned_params(local_versioned_params, ra_version, data->params_all); + append_versioned_params(rsc->versioned_parameters, ra_version, data->params_all); filter_action_parameters(data->params_all, op_version); -#ifdef ENABLE_VERSIONED_ATTRS - crm_summarize_versioned_params(data->params_all, rsc->versioned_parameters); - crm_summarize_versioned_params(data->params_all, local_versioned_params); -#endif data->digest_all_calc = calculate_operation_digest(data->params_all, op_version); if (secure_list && is_set(data_set->flags, pe_flag_sanitized)) { data->params_secure = copy_xml(data->params_all); if (secure_list) { filter_parameters(data->params_secure, secure_list, FALSE); } data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version); } if (digest_restart) { data->params_restart = copy_xml(data->params_all); if (restart_list) { filter_parameters(data->params_restart, restart_list, TRUE); } data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version); } data->rc = RSC_DIGEST_MATCH; if (digest_restart && strcmp(data->digest_restart_calc, digest_restart) != 0) { data->rc = RSC_DIGEST_RESTART; } else if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strcmp(digest_all, data->digest_all_calc) != 0) { data->rc = RSC_DIGEST_ALL; } g_hash_table_insert(node->details->digest_cache, strdup(op_id), data); g_hash_table_destroy(local_rsc_params); #ifdef ENABLE_VERSIONED_ATTRS free_xml(local_versioned_params); #endif pe_free_action(action); return data; } const char *rsc_printable_id(resource_t *rsc) { if (is_not_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } void clear_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; clear_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; clear_bit_recursive(child_rsc, flag); } } void set_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; set_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; set_bit_recursive(child_rsc, flag); } } action_t * pe_fence_op(node_t * node, const char *op, bool optional, pe_working_set_t * data_set) { char *key = NULL; action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op); if(data_set->singletons) { stonith_op = g_hash_table_lookup(data_set->singletons, key); } if(stonith_op == NULL) { stonith_op = custom_action(NULL, key, CRM_OP_FENCE, node, optional, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); } else { free(key); } if(optional == FALSE) { crm_trace("%s is no longer optional", stonith_op->uuid); pe_clear_action_bit(stonith_op, pe_action_optional); } return stonith_op; } void trigger_unfencing( resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set) { if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { action_t *unfence = pe_fence_op(node, "on", FALSE, data_set); crm_notice("Unfencing %s: %s", node->details->uname, reason); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { tag_t *tag = NULL; GListPtr gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (crm_str_eq(existing_ref, obj_ref, TRUE)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am index 54e73220ed..1d50e1150b 100644 --- a/lrmd/Makefile.am +++ b/lrmd/Makefile.am @@ -1,76 +1,71 @@ # Copyright (c) 2012 David Vossel # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # include $(top_srcdir)/Makefile.common lrmdlibdir = $(CRM_DAEMON_DIR) lrmdlib_PROGRAMS = lrmd lrmd_internal_ctl # Test components lrmdlib_PROGRAMS += lrmd_test testdir = $(datadir)/$(PACKAGE)/tests/lrmd test_SCRIPTS = regression.py initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote sbin_PROGRAMS = pacemaker_remoted if BUILD_SYSTEMD systemdunit_DATA = pacemaker_remote.service endif lrmd_CFLAGS = $(CFLAGS_HARDENED_EXE) lrmd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -VERSIONED_LDADD = $(top_builddir)/lib/lrmd/liblrmd.la \ - $(top_builddir)/lib/pengine/libpe_rules.la -#VERSIONED_LDADD = - lrmd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ - $(top_builddir)/lib/fencing/libstonithd.la \ - $(VERSIONED_LDADD) ${COMPAT_LIBS} + $(top_builddir)/lib/fencing/libstonithd.la ${COMPAT_LIBS} lrmd_SOURCES = main.c lrmd.c pacemaker_remoted_CPPFLAGS = -DSUPPORT_REMOTE $(AM_CPPFLAGS) pacemaker_remoted_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_remoted_LDADD = $(lrmd_LDADD) \ $(top_builddir)/lib/lrmd/liblrmd.la pacemaker_remoted_SOURCES = main.c lrmd.c tls_backend.c ipc_proxy.c lrmd_internal_ctl_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la lrmd_internal_ctl_SOURCES = remote_ctl.c lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la lrmd_test_SOURCES = test.c noinst_HEADERS = lrmd_private.h CLEANFILES = $(man8_MANS) diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 1985385392..245149b8f7 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -1,1843 +1,1717 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include -#ifdef ENABLE_VERSIONED_ATTRS -#include -#endif #include #ifdef HAVE_SYS_TIMEB_H # include #endif #define EXIT_REASON_MAX_LEN 128 GHashTable *rsc_list = NULL; -#ifdef ENABLE_VERSIONED_ATTRS -regex_t *version_format_regex = NULL; -GHashTable *ra_version_hash = NULL; -#endif typedef struct lrmd_cmd_s { int timeout; int interval; int start_delay; int timeout_orig; int call_id; int exec_rc; int lrmd_op_status; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *exit_reason; char *output; char *userdata_str; /* when set, this cmd should go through a container wrapper */ const char *isolation_wrapper; #ifdef HAVE_SYS_TIMEB_H /* recurring and systemd operations may involve more than one lrmd command * per operation, so they need info about original and most recent */ struct timeb t_first_run; /* Timestamp of when op first ran */ struct timeb t_run; /* Timestamp of when op most recently ran */ struct timeb t_first_queue; /* Timestamp of when op first was queued */ struct timeb t_queue; /* Timestamp of when op most recently was queued */ struct timeb t_rcchange; /* Timestamp of last rc change */ #endif int first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; - xmlNode *versioned_attrs; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); static void log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time) { char pid_str[32] = { 0, }; int log_level = LOG_INFO; if (cmd->last_pid) { snprintf(pid_str, 32, "%d", cmd->last_pid); } if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } #ifdef HAVE_SYS_TIMEB_H do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc, exec_time, queue_time); #else do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc); #endif } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (safe_str_eq(action, "monitor") && (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_LSB) || safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE) || safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SYSTEMD))) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); return rsc; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } -#ifdef ENABLE_VERSIONED_ATTRS -static gboolean -valid_version_format(const char *version) -{ - if (version == NULL) { - return FALSE; - } - - if (version_format_regex == NULL) { - const char *regex_string = "^[[:digit:]]+([.][[:digit:]]+)*$"; - version_format_regex = calloc(1, sizeof(regex_t)); - regcomp(version_format_regex, regex_string, REG_EXTENDED); - } - - return regexec(version_format_regex, version, 0, NULL, 0) == 0; -} - -static const char* -get_ra_version(const char *class, const char *provider, const char *type, gboolean update_hash) -{ - int len = 0; - char *key = NULL; - static const char *default_version = "0.1"; - const char *version = NULL; - - CRM_CHECK(type != NULL, return default_version); - CRM_CHECK(class != NULL, return default_version); - - if (!provider) { - provider = "heartbeat"; - } - - len = strlen(class) + strlen(provider) + strlen(type) + 3; - key = calloc(len, sizeof(char)); - - if (!key) { - return default_version; - } - - if (!ra_version_hash) { - ra_version_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); - } - - snprintf(key, len, "%s:%s:%s", class, provider, type); - - version = g_hash_table_lookup(ra_version_hash, key); - - if (!version || update_hash) { - char *metadata = NULL; - lrmd_t *lrmd = lrmd_api_new(); - - lrmd->cmds->get_metadata(lrmd, class, provider, type, &metadata, 0); - lrmd_api_delete(lrmd); - - if (metadata) { - xmlNode *metadata_xml = string2xml(metadata); - xmlNode *version_xml = get_xpath_object("//resource-agent/@version", metadata_xml, LOG_TRACE); - - if (version_xml) { - version = crm_element_value(version_xml, XML_ATTR_VERSION); - - if (valid_version_format(version)) { - crm_info("Resource agent version for %s:%s:%s is %s", class, provider, type, version); - version = strdup(version); - g_hash_table_replace(ra_version_hash, strdup(key), (gpointer) version); - } else { - crm_notice("Resource agent version for %s:%s:%s has unrecognized format: %s", class, provider, type, version); - version = default_version; - } - } else { - crm_trace("Resource agent version for %s:%s:%s does not specified", class, provider, type); - version = default_version; - } - - free_xml(metadata_xml); - free(metadata); - } else { - version = default_version; - } - } - - free(key); - - return version; -} -#endif - static lrmd_cmd_t * create_lrmd_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); - -#ifdef ENABLE_VERSIONED_ATTRS - cmd->versioned_attrs = first_named_child(rsc_xml, XML_TAG_VER_ATTRS); - - if (cmd->versioned_attrs) { - const char *ra_version = get_ra_version(rsc->class, rsc->provider, rsc->type, safe_str_eq(cmd->action, "start")); - GHashTable *node_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); - GHashTable *hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); - - cmd->versioned_attrs = copy_xml(cmd->versioned_attrs); - g_hash_table_insert(node_hash, strdup("#ra-version"), strdup(ra_version)); - unpack_instance_attributes(NULL, cmd->versioned_attrs, XML_TAG_ATTR_SETS, node_hash, hash, NULL, FALSE, NULL); - g_hash_table_foreach(hash, dup_attr, cmd->params); - - g_hash_table_destroy(node_hash); - g_hash_table_destroy(hash); - } -#endif - cmd->isolation_wrapper = g_hash_table_lookup(cmd->params, "CRM_meta_isolation_wrapper"); if (cmd->isolation_wrapper) { if (g_hash_table_lookup(cmd->params, "CRM_meta_isolation_instance") == NULL) { g_hash_table_insert(cmd->params, strdup("CRM_meta_isolation_instance"), strdup(rsc->rsc_id)); } if (rsc->provider) { g_hash_table_insert(cmd->params, strdup("CRM_meta_provider"), strdup(rsc->provider)); } g_hash_table_insert(cmd->params, strdup("CRM_meta_class"), strdup(rsc->class)); g_hash_table_insert(cmd->params, strdup("CRM_meta_type"), strdup(rsc->type)); } if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) { crm_debug("Setting flag to leave pid group on timeout and only kill action pid for %s_%s_%d", cmd->rsc_id, cmd->action, cmd->interval); cmd->service_flags |= SVC_ACTION_LEAVE_GROUP; } return cmd; } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } -#ifdef ENABLE_VERSIONED_ATTRS - if (cmd->versioned_attrs) { - free_xml(cmd->versioned_attrs); - } -#endif free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); free(cmd->exit_reason); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); return FALSE; } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } static gboolean merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { GListPtr gIter = NULL; lrmd_cmd_t * dup = NULL; gboolean dup_pending = FALSE; if (cmd->interval == 0) { return 0; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { dup_pending = TRUE; goto merge_dup; } } /* if dup is in recurring_ops list, that means it has already executed * and is in the interval loop. we can't just remove it in this case. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { goto merge_dup; } } return FALSE; merge_dup: /* This should not occur, if it does we need to investigate in the crmd * how something like this is possible */ crm_warn("Duplicate recurring op entry detected (%s_%s_%d), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); /* merge */ dup->first_notify_sent = 0; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* if we are waiting for the next interval, kick it off now */ if (dup_pending == TRUE) { g_source_remove(cmd->stonith_recurring_id); cmd->stonith_recurring_id = 0; stonith_recurring_op_helper(cmd); } } else if (dup_pending == FALSE) { /* if we've already handed this to the service lib, kick off an early execution */ services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); } free_lrmd_cmd(cmd); return TRUE; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { gboolean dup_processed = FALSE; CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); dup_processed = merge_recurring_duplicate(rsc, cmd); if (dup_processed) { /* duplicate recurring cmd found, cmds merged */ return; } /* crmd expects lrmd to automatically cancel recurring ops before rsc stops. */ if (rsc && safe_str_eq(cmd->action, "stop")) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static void send_reply(crm_client_t * client, int rc, uint32_t id, int call_id) { int send_rc = 0; xmlNode *reply = NULL; reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; if (client == NULL) { crm_err("Asked to send event to NULL client"); return; } else if (client->name == NULL) { crm_trace("Asked to send event to client with no name"); return; } if (lrmd_server_send_notify(client, update_msg) <= 0) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } } #ifdef HAVE_SYS_TIMEB_H /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or has time zero) */ static int time_diff_ms(struct timeb *now, struct timeb *old) { struct timeb local_now = { 0, }; if (now == NULL) { ftime(&local_now); now = &local_now; } if ((old == NULL) || (old->time == 0)) { return 0; } return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * /param[in] cmd LRMD command object to reset * * /note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { int exec_time = 0; int queue_time = 0; xmlNode *notify = NULL; #ifdef HAVE_SYS_TIMEB_H exec_time = time_diff_ms(NULL, &cmd->t_run); queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue); #endif log_finished(cmd, exec_time, queue_time); /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if (cmd->last_notify_rc == cmd->exec_rc && cmd->last_notify_op_status == cmd->lrmd_op_status) { /* only send changes */ return; } } cmd->first_notify_sent = 1; cmd->last_notify_rc = cmd->exec_rc; cmd->last_notify_op_status = cmd->lrmd_op_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); #ifdef HAVE_SYS_TIMEB_H crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time); crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time); crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } -#ifdef ENABLE_VERSIONED_ATTRS - - if (cmd->versioned_attrs) { - add_node_copy(notify, cmd->versioned_attrs); - } - -#endif if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { crm_client_t *client = crm_client_get_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else if (client_connections != NULL) { g_hash_table_foreach(client_connections, send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (client_connections != NULL) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); g_hash_table_foreach(client_connections, send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->lrmd_op_status = 0; cmd->last_pid = 0; memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); free(cmd->exit_reason); cmd->exit_reason = NULL; free(cmd->output); cmd->output = NULL; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } #if SUPPORT_HEARTBEAT static int pattern_matched(const char *pat, const char *str) { if (g_pattern_match_simple(pat, str)) { crm_debug("RA output matched stopped pattern [%s]", pat); return TRUE; } return FALSE; } static int hb2uniform_rc(const char *action, int rc, const char *stdout_data) { const char *stop_pattern[] = { "*stopped*", "*not*running*" }; const char *running_pattern[] = { "*running*", "*OK*" }; char *lower_std_output = NULL; int result; if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } /* Treat class heartbeat the same as class lsb. */ if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) { return services_get_ocf_exitcode(action, rc); } /* for status though, exit code is ignored, * and the stdout is scanned for specific strings */ if (stdout_data == NULL) { crm_warn("No status output from the (hb) resource agent, assuming stopped"); return PCMK_OCF_NOT_RUNNING; } lower_std_output = g_ascii_strdown(stdout_data, -1); if (pattern_matched(stop_pattern[0], lower_std_output) || pattern_matched(stop_pattern[1], lower_std_output)) { result = PCMK_OCF_NOT_RUNNING; } else if (pattern_matched(running_pattern[0], lower_std_output) || pattern_matched(running_pattern[1], stdout_data)) { /* "OK" is matched case sensitive */ result = PCMK_OCF_OK; } else { /* It didn't say it was running - must be stopped */ crm_debug("RA output did not match any pattern, assuming stopped"); result = PCMK_OCF_NOT_RUNNING; } free(lower_std_output); return result; } #endif static int ocf2uniform_rc(int rc) { if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) { return PCMK_OCF_UNKNOWN_ERROR; } return rc; } static int stonith2uniform_rc(const char *action, int rc) { if (rc == -ENODEV) { if (safe_str_eq(action, "stop")) { rc = PCMK_OCF_OK; } else if (safe_str_eq(action, "start")) { rc = PCMK_OCF_NOT_INSTALLED; } else { rc = PCMK_OCF_NOT_RUNNING; } } else if (rc != 0) { rc = PCMK_OCF_UNKNOWN_ERROR; } return rc; } #if SUPPORT_NAGIOS static int nagios2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } switch (rc) { case NAGIOS_STATE_OK: return PCMK_OCF_OK; case NAGIOS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case NAGIOS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case NAGIOS_STATE_WARNING: case NAGIOS_STATE_CRITICAL: case NAGIOS_STATE_UNKNOWN: case NAGIOS_STATE_DEPENDENT: default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } #endif static int get_uniform_rc(const char *standard, const char *action, int rc) { if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) { return ocf2uniform_rc(rc); } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) { return stonith2uniform_rc(action, rc); } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) { return rc; } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) { return rc; #if SUPPORT_NAGIOS } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) { return nagios2uniform_rc(action, rc); #endif } else { return services_get_ocf_exitcode(action, rc); } } static int action_get_uniform_rc(svc_action_t * action) { lrmd_cmd_t *cmd = action->cb_data; #if SUPPORT_HEARTBEAT if (safe_str_eq(action->standard, PCMK_RESOURCE_CLASS_HB)) { return hb2uniform_rc(cmd->action, action->rc, action->stdout_data); } #endif return get_uniform_rc(action->standard, cmd->action, action->rc); } void notify_of_new_client(crm_client_t *new_client) { crm_client_t *client = NULL; GHashTableIter iter; xmlNode *notify = NULL; char *key = NULL; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) { if (safe_str_eq(client->id, new_client->id)) { continue; } send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify); } free_xml(notify); } static char * parse_exit_reason(const char *output) { const char *cur = NULL; const char *last = NULL; char *reason = NULL; static int cookie_len = 0; char *eol = NULL; if (output == NULL) { return NULL; } if (!cookie_len) { cookie_len = strlen(PCMK_OCF_REASON_PREFIX); } cur = strstr(output, PCMK_OCF_REASON_PREFIX); for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { /* skip over the cookie delimiter string */ cur += cookie_len; last = cur; } if (last == NULL) { return NULL; } /* make our own copy */ reason = calloc(1, (EXIT_REASON_MAX_LEN+1)); CRM_ASSERT(reason); /* limit reason string size */ strncpy(reason, last, EXIT_REASON_MAX_LEN); /* truncate everything after a new line */ eol = strchr(reason, '\n'); if (eol != NULL) { *eol = '\0'; } return reason; } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; const char *rclass = NULL; bool goagain = false; if (!cmd) { crm_err("LRMD action (%s) completed does not match any known operations.", action->id); return; } #ifdef HAVE_SYS_TIMEB_H if (cmd->exec_rc != action->rc) { ftime(&cmd->t_rcchange); } #endif cmd->last_pid = action->pid; cmd->exec_rc = action_get_uniform_rc(action); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) { rclass = resources_find_service_class(rsc->class); } else if(rsc) { rclass = rsc->class; } if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) { if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) { /* systemd I curse thee! * * systemd returns from start actions after the start _begins_ * not after it completes. * * So we have to jump through a few hoops so that we don't * report 'complete' to the rest of pacemaker until, you know, * it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) { goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->real_action) { /* Ok, so this is the follow up monitor action to check if start actually completed */ if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) { goagain = true; } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) { goagain = true; } else { #ifdef HAVE_SYS_TIMEB_H int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc); cmd_original_times(cmd); #endif if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_NOT_RUNNING && safe_str_eq(cmd->real_action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; } } } } #if SUPPORT_NAGIOS if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) { if (safe_str_eq(cmd->action, "monitor") && cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) { /* Successfully executed --version for the nagios plugin */ cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) { goagain = true; } } #endif /* Wrapping this section in ifdef implies that systemd resources are not * fully supported on platforms without sys/timeb.h. Since timeb is * obsolete, we should eventually prefer a clock_gettime() implementation * (wrapped in its own ifdef) with timeb as a fallback. */ #ifdef HAVE_SYS_TIMEB_H if(goagain) { int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if(cmd->exec_rc == PCMK_OCF_OK) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if(cmd->exec_rc == PCMK_OCF_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left); cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; cmd->exec_rc = PCMK_OCF_TIMEOUT; cmd_original_times(cmd); } } #endif if (action->stderr_data) { cmd->output = strdup(action->stderr_data); cmd->exit_reason = parse_exit_reason(action->stderr_data); } else if (action->stdout_data) { cmd->output = strdup(action->stdout_data); } cmd_finalize(cmd, rsc); } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { int recurring = cmd->interval; lrmd_rsc_t *rsc = NULL; cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, rc); rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) { recurring = 0; /* do nothing */ } else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) { /* Not registered == inactive */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (rc) { /* Attempt to map return codes to op status if possible */ switch (rc) { case -EPROTONOSUPPORT: cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED; break; case -ETIME: cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; break; default: /* TODO: This looks wrong. Status should be _DONE and exec_rc set to an error */ cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } } else { /* command successful */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; if (safe_str_eq(cmd->action, "start") && rsc) { rsc->stonith_started = 1; } } if (recurring && rsc) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { if (rsc->active) { cmd_list = g_list_append(cmd_list, rsc->active); } if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("STONITH connection failed, finalizing %d pending operations.", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } static int lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; int do_monitor = 0; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, -ENOTCONN); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; cmd_finalize(cmd, rsc); return -EUNATCH; } if (safe_str_eq(cmd->action, "start")) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* Stonith automatically registers devices from the IPC when changes occur, * but to avoid a possible race condition between stonith receiving the IPC update * and the lrmd requesting that resource, the lrmd still registers the device as well. * Stonith knows how to handle duplicate device registrations correctly. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); if (rc == 0) { do_monitor = 1; } } else if (safe_str_eq(cmd->action, "stop")) { rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id); rsc->stonith_started = 0; } else if (safe_str_eq(cmd->action, "monitor")) { if (cmd->interval) { do_monitor = 1; } else { rc = rsc->stonith_started ? 0 : -ENODEV; } } if (!do_monitor) { goto cleanup_stonith_exec; } rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); /* don't cleanup yet, we will find out the result of the monitor later */ if (rc > 0) { rsc->active = cmd; return rc; } else if (rc == 0) { rc = -1; } cleanup_stonith_exec: stonith_action_complete(cmd, rc); return rc; } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS) && safe_str_eq(cmd->action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; goto exec_done; } #endif if (cmd->params) { params_copy = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (params_copy != NULL) { g_hash_table_foreach(cmd->params, dup_attr, params_copy); } } if (cmd->isolation_wrapper) { g_hash_table_remove(params_copy, "CRM_meta_isolation_wrapper"); action = resources_action_create(rsc->rsc_id, PCMK_RESOURCE_CLASS_OCF, LRMD_ISOLATION_PROVIDER, cmd->isolation_wrapper, cmd->action, /*action will be normalized in wrapper*/ cmd->interval, cmd->timeout, params_copy, cmd->service_flags); } else { action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval, cmd->timeout, params_copy, cmd->service_flags); } if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; /* 'cmd' may not be valid after this point if * services_action_async() returned TRUE * * Upstart and systemd both synchronously determine monitor/status * results and call action_complete (which may free 'cmd') if necessary. */ if (services_action_async(action, action_complete)) { return TRUE; } cmd->exec_rc = action->rc; if(action->status != PCMK_LRM_OP_DONE) { cmd->lrmd_op_status = action->status; } else { cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef HAVE_SYS_TIMEB_H if (cmd->t_first_run.time == 0) { ftime(&cmd->t_first_run); } ftime(&cmd->t_run); #endif } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH); gIter = rsc->pending_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *reply = create_xml_node(NULL, "reply"); const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_PROTOCOL_VERSION, protocol_version); crm_xml_add_int(reply, F_LRMD_RC, -EPROTO); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); } crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_LRMD_CLIENTID, client->id); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); lrmd_server_send_reply(client, id, reply); if (crm_is_true(is_ipc_provider)) { /* this is a remote connection from a cluster nodes crmd */ #ifdef SUPPORT_REMOTE ipc_proxy_add_provider(client); #endif } free_xml(reply); return pcmk_ok; } static int process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && safe_str_eq(rsc->class, dup->class) && safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) { crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Added '%s' to the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); return rc; } static void process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int send_rc = 0; int call_id = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (!rsc_id) { rc = -ENODEV; goto get_rsc_done; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; goto get_rsc_done; } get_rsc_done: reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } send_rc = lrmd_server_send_reply(client, id, reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } free_xml(reply); } static int process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation still in progress: %p", rsc->active); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client, rsc); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, int interval) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval == 0) { continue; } if (client_id && safe_str_neq(cmd->client_id, client_id)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); int interval = 0; crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval); } void process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) { #ifdef SUPPORT_REMOTE ipc_proxy_forward_client(client, request); #endif do_reply = 1; } else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { rc = process_lrmd_signon(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) { process_lrmd_get_rsc_info(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) { rc = process_lrmd_rsc_exec(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) { rc = process_lrmd_rsc_cancel(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) { do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG); CRM_LOG_ASSERT(data != NULL); check_sbd_timeout(timeout); } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown %s from %s", op, client->name); crm_log_xml_warn(request, "UnknownOp"); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { send_reply(client, rc, id, call_id); } if (do_notify) { send_generic_notify(rc, request); } } diff --git a/lrmd/lrmd_private.h b/lrmd/lrmd_private.h index 7f06337910..edb12f3bf4 100644 --- a/lrmd/lrmd_private.h +++ b/lrmd/lrmd_private.h @@ -1,122 +1,115 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef LRMD_PVT__H # define LRMD_PVT__H # include -#ifdef ENABLE_VERSIONED_ATTRS -# include -#endif # include # include # include # ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include # endif #define LRMD_ISOLATION_PROVIDER ".isolation" GHashTable *rsc_list; -#ifdef ENABLE_VERSIONED_ATTRS -extern regex_t *version_format_regex; -extern GHashTable *ra_version_hash; -#endif typedef struct lrmd_rsc_s { char *rsc_id; char *class; char *provider; char *type; int call_opts; /* NEVER dereference this pointer, * It simply exists as a switch to let us know * when the currently active operation has completed */ void *active; /* Operations in this list * have not been executed yet. */ GList *pending_ops; /* Operations in this list are recurring operations * that have been handed off from the pending ops list. */ GList *recurring_ops; int stonith_started; crm_trigger_t *work; } lrmd_rsc_t; # ifdef HAVE_GNUTLS_GNUTLS_H /* in remote_tls.c */ int lrmd_init_remote_tls_server(int port); void lrmd_tls_server_destroy(void); /* Hidden in lrmd client lib */ extern int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type); extern int lrmd_tls_set_key(gnutls_datum_t * key); # endif int lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply); int lrmd_server_send_notify(crm_client_t * client, xmlNode * msg); void notify_of_new_client(crm_client_t *new_client); void process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request); void free_rsc(gpointer data); void handle_shutdown_ack(void); void handle_shutdown_nack(void); void lrmd_client_destroy(crm_client_t *client); void client_disconnect_cleanup(const char *client_id); /*! * \brief Don't worry about freeing this connection. It is * taken care of after mainloop exits by the main() function. */ stonith_t *get_stonith_connection(void); /*! * \brief This is a callback that tells the lrmd * the current stonith connection has gone away. This allows * us to timeout any pending stonith commands */ void stonith_connection_failed(void); #ifdef SUPPORT_REMOTE void ipc_proxy_init(void); void ipc_proxy_cleanup(void); void ipc_proxy_add_provider(crm_client_t *client); void ipc_proxy_remove_provider(crm_client_t *client); void ipc_proxy_forward_client(crm_client_t *client, xmlNode *xml); crm_client_t *ipc_proxy_get_provider(void); int ipc_proxy_shutdown_req(crm_client_t *ipc_proxy); #endif #endif diff --git a/lrmd/main.c b/lrmd/main.c index 995ff3c4d9..f1dc1f9a90 100644 --- a/lrmd/main.c +++ b/lrmd/main.c @@ -1,642 +1,633 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(HAVE_GNUTLS_GNUTLS_H) && defined(SUPPORT_REMOTE) # define ENABLE_PCMK_REMOTE #endif GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; stonith_t *stonith_api = NULL; int lrmd_call_id = 0; #ifdef ENABLE_PCMK_REMOTE /* whether shutdown request has been sent */ static volatile sig_atomic_t shutting_down = FALSE; /* timer for waiting for acknowledgment of shutdown request */ static volatile guint shutdown_ack_timer = 0; static gboolean lrmd_exit(gpointer data); #endif static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; crm_err("LRMD lost STONITH connection"); stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { stonith_api_delete(stonith_api); stonith_api = NULL; } if (!stonith_api) { int rc = 0; int tries = 10; stonith_api = stonith_api_new(); do { rc = stonith_api->cmds->connect(stonith_api, "lrmd", NULL); if (rc == pcmk_ok) { stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); break; } sleep(1); tries--; } while (tries); if (rc) { crm_err("Unable to connect to stonith daemon to execute command. error: %s", pcmk_strerror(rc)); stonith_api_delete(stonith_api); stonith_api = NULL; } } return stonith_api; } static int32_t lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void lrmd_ipc_created(qb_ipcs_connection_t * c) { crm_client_t *new_client = crm_client_get(c); crm_trace("Connection %p", c); CRM_ASSERT(new_client != NULL); /* Now that the connection is offically established, alert * the other clients a new connection exists. */ notify_of_new_client(new_client); } static int32_t lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *request = crm_ipcs_recv(client, data, size, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); return FALSE); CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); return FALSE); if (!request) { return 0; } if (!client->name) { const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); if (value == NULL) { client->name = crm_itoa(crm_ipcs_client_pid(c)); } else { client->name = strdup(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, F_LRMD_CLIENTID, client->id); crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); free_xml(request); return 0; } /*! * \internal * \brief Free a client connection, and exit if appropriate * * \param[in] client Client connection to free */ void lrmd_client_destroy(crm_client_t *client) { crm_client_destroy(client); #ifdef ENABLE_PCMK_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { lrmd_exit(NULL); } #endif } static int32_t lrmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); client_disconnect_cleanup(client->id); #ifdef ENABLE_PCMK_REMOTE ipc_proxy_remove_provider(client); #endif lrmd_client_destroy(client); return 0; } static void lrmd_ipc_destroy(qb_ipcs_connection_t * c) { lrmd_ipc_closed(c); crm_trace("Connection %p", c); } static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { .connection_accept = lrmd_ipc_accept, .connection_created = lrmd_ipc_created, .msg_process = lrmd_ipc_dispatch, .connection_closed = lrmd_ipc_closed, .connection_destroyed = lrmd_ipc_destroy }; int lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply) { crm_trace("sending reply to client (%s) with msg id %d", client->id, id); switch (client->kind) { case CRM_CLIENT_IPC: return crm_ipcs_send(client, id, reply, FALSE); #ifdef ENABLE_PCMK_REMOTE case CRM_CLIENT_TLS: return lrmd_tls_send_msg(client->remote, reply, id, "reply"); #endif default: crm_err("Unknown lrmd client type %d", client->kind); } return -1; } int lrmd_server_send_notify(crm_client_t * client, xmlNode * msg) { crm_trace("sending notify to client (%s)", client->id); switch (client->kind) { case CRM_CLIENT_IPC: if (client->ipcs == NULL) { crm_trace("Asked to send event to disconnected local client"); return -1; } return crm_ipcs_send(client, 0, msg, crm_ipc_server_event); #ifdef ENABLE_PCMK_REMOTE case CRM_CLIENT_TLS: if (client->remote == NULL) { crm_trace("Asked to send event to disconnected remote client"); return -1; } return lrmd_tls_send_msg(client->remote, msg, 0, "notify"); #endif default: crm_err("Unknown lrmd client type %d", client->kind); } return -1; } /*! * \internal * \brief Clean up and exit immediately * * \param[in] data Ignored * * \return Doesn't return * \note This can be used as a timer callback. */ static gboolean lrmd_exit(gpointer data) { crm_info("Terminating with %d clients", crm_hash_table_size(client_connections)); if (stonith_api) { stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); stonith_api->cmds->disconnect(stonith_api); stonith_api_delete(stonith_api); } if (ipcs) { mainloop_del_ipc_server(ipcs); } -#ifdef ENABLE_VERSIONED_ATTRS - if (version_format_regex) { - regfree(version_format_regex); - free(version_format_regex); - } - if (ra_version_hash) { - g_hash_table_destroy(ra_version_hash); - } -#endif #ifdef ENABLE_PCMK_REMOTE lrmd_tls_server_destroy(); ipc_proxy_cleanup(); #endif crm_client_cleanup(); g_hash_table_destroy(rsc_list); crm_exit(pcmk_ok); return FALSE; } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef ENABLE_PCMK_REMOTE crm_client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host (which ensures the proxy host * supports shutdown requests), then wait for all proxy hosts to * disconnect (which ensures that all resources have been stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ lrmd_tls_server_destroy(); /* Older crmd versions will never acknowledge our request, so set a * fairly short timeout to exit quickly in that case. If we get the * ack, we'll defuse this timer. */ shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif lrmd_exit(NULL); } /*! * \internal * \brief Defuse short exit timer if shutting down */ void handle_shutdown_ack() { #ifdef ENABLE_PCMK_REMOTE if (shutting_down) { crm_info("Received shutdown ack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = 0; } return; } #endif crm_debug("Ignoring unexpected shutdown ack"); } /*! * \internal * \brief Make short exit timer fire immediately */ void handle_shutdown_nack() { #ifdef ENABLE_PCMK_REMOTE if (shutting_down) { crm_info("Received shutdown nack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL); } return; } #endif crm_debug("Ignoring unexpected shutdown nack"); } static pid_t main_pid = 0; static void sigdone(void) { exit(0); } static void sigreap(void) { pid_t pid = 0; int status; do { /* * Opinions seem to differ as to what to put here: * -1, any child process * 0, any child process whose process group ID is equal to that of the calling process */ pid = waitpid(-1, &status, WNOHANG); if(pid == main_pid) { /* Exit when pacemaker-remote exits and use the same return code */ if (WIFEXITED(status)) { exit(WEXITSTATUS(status)); } exit(1); } } while (pid > 0); } static struct { int sig; void (*handler)(void); } sigmap[] = { { SIGCHLD, sigreap }, { SIGINT, sigdone }, }; static void spawn_pidone(int argc, char **argv, char **envp) { sigset_t set; if (getpid() != 1) { return; } sigfillset(&set); sigprocmask(SIG_BLOCK, &set, 0); main_pid = fork(); switch (main_pid) { case 0: sigprocmask(SIG_UNBLOCK, &set, NULL); setsid(); setpgid(0, 0); /* Child remains as pacemaker_remoted */ return; case -1: perror("fork"); } /* Parent becomes the reaper of zombie processes */ /* Safe to initialize logging now if needed */ #ifdef HAVE___PROGNAME /* Differentiate ourselves in the 'ps' output */ { char *p; int i, maxlen; char *LastArgv = NULL; const char *name = "pcmk-init"; for(i = 0; i < argc; i++) { if(!i || (LastArgv + 1 == argv[i])) LastArgv = argv[i] + strlen(argv[i]); } for(i = 0; envp[i] != NULL; i++) { if((LastArgv + 1) == envp[i]) { LastArgv = envp[i] + strlen(envp[i]); } } maxlen = (LastArgv - argv[0]) - 2; i = strlen(name); /* We can overwrite individual argv[] arguments */ snprintf(argv[0], maxlen, "%s", name); /* Now zero out everything else */ p = &argv[0][i]; while(p < LastArgv) *p++ = '\0'; argv[1] = NULL; } #endif /* HAVE___PROGNAME */ while (1) { int sig; size_t i; sigwait(&set, &sig); for (i = 0; i < DIMOF(sigmap); i++) { if (sigmap[i].sig == sig) { sigmap[i].handler(); break; } } } } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"logfile", 1, 0, 'l', "\tSend logs to the additional named logfile"}, #ifdef ENABLE_PCMK_REMOTE {"port", 1, 0, 'p', "\tPort to listen on"}, #endif /* For compatibility with the original lrmd */ {"dummy", 0, 0, 'r', NULL, 1}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv, char **envp) { int flag = 0; int index = 0; int bump_log_num = 0; const char *option = NULL; /* If necessary, create PID1 now before any FDs are opened */ spawn_pidone(argc, argv, envp); #ifndef ENABLE_PCMK_REMOTE crm_log_preinit("lrmd", argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for controlling services confirming to different standards"); #else crm_log_preinit("pacemaker_remoted", argc, argv); crm_set_options(NULL, "[options]", long_options, "Pacemaker Remote daemon for extending pacemaker functionality to remote nodes."); #endif while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) { break; } switch (flag) { case 'r': break; case 'l': crm_add_logfile(optarg); break; case 'p': setenv("PCMK_remote_port", optarg, 1); break; case 'V': bump_log_num++; break; case '?': case '$': crm_help(flag, EX_OK); break; default: crm_help('?', EX_USAGE); break; } } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); while (bump_log_num > 0) { crm_bump_log_level(argc, argv); bump_log_num--; } option = daemon_option("logfacility"); if(option && safe_str_neq(option, "none")) { setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */ } option = daemon_option("logfile"); if(option && safe_str_neq(option, "none")) { setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */ if (daemon_option_enabled(crm_system_name, "debug")) { setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */ } } /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); /* Used by RAs - Leave owned by root */ crm_build_path(CRM_RSCTMP_DIR, 0755); /* Legacy: Used by RAs - Leave owned by root */ crm_build_path(HA_STATE_DIR"/heartbeat/rsctmp", 0755); rsc_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); crm_exit(DAEMON_RESPAWN_STOP); } #ifdef ENABLE_PCMK_REMOTE { const char *remote_port_str = getenv("PCMK_remote_port"); int remote_port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; if (lrmd_init_remote_tls_server(remote_port) < 0) { crm_err("Failed to create TLS server on port %d: shutting down and inhibiting respawn", remote_port); crm_exit(DAEMON_RESPAWN_STOP); } ipc_proxy_init(); } #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_new(FALSE); crm_info("Starting"); g_main_run(mainloop); /* should never get here */ lrmd_exit(NULL); return pcmk_ok; } diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in index 51f649c0bb..ddc0cb7a34 100755 --- a/lrmd/regression.py.in +++ b/lrmd/regression.py.in @@ -1,1293 +1,1246 @@ #!/usr/bin/python """ Regression tests for Pacemaker's lrmd """ # Pacemaker targets compatibility with Python 2.6+ and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright (C) 2012-2016 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import sys import subprocess import shlex import time # Where to find test binaries # Prefer the source tree if available BUILD_DIR = "@abs_top_builddir@" TEST_DIR = sys.path[0] def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/regression.py.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) new_path = "%s/lrmd:%s" % (BUILD_DIR, new_path) # For lrmd, lrmd_test and pacemaker_remoted new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For crm_resource new_path = "%s/fencing:%s" % (BUILD_DIR, new_path) # For stonithd else: print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) # For stonithd, lrmd, lrmd_test and pacemaker_remoted print(new_path) os.environ['PATH'] = new_path def shlex_split(command): """ Wrapper for shlex.split() that works around Python 2.6 bug """ if sys.version_info < (2, 7,): return shlex.split(command.encode('ascii')) else: return shlex.split(command) def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): """ Run a command, and return its standard output. """ test = subprocess.Popen(shlex_split(command), stdout=subprocess.PIPE) test.wait() return pipe_output(test).split("\n") class Test(object): """ Executor for a single lrmd regression test """ def __init__(self, name, description, verbose=0, tls=0): self.name = name self.description = description self.cmds = [] if tls: self.daemon_location = "pacemaker_remoted" else: self.daemon_location = "lrmd" self.test_tool_location = "lrmd_test" self.verbose = verbose self.tls = tls self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = 0 self.lrmd_process = None self.stonith_process = None self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): """ Add a command to be executed as part of this test """ if self.verbose and cmd == self.test_tool_location: args = args + " -V " if (cmd == self.test_tool_location) and self.tls: args = args + " -S " self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "cmd_output" : "", } ) def start_environment(self): """ Prepare the host for running a test """ ### make sure we are in full control here ### cmd = shlex_split("killall -q -9 stonithd lt-stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test pacemaker_remoted") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() additional_args = "" if self.tls == 0: self.stonith_process = subprocess.Popen(shlex_split("stonithd -s")) if self.verbose: additional_args = additional_args + " -V" self.lrmd_process = subprocess.Popen(shlex_split("%s %s -l /tmp/lrmd-regression.log" % (self.daemon_location, additional_args))) time.sleep(1) def clean_environment(self): """ Clean up the host after running a test """ if self.lrmd_process: self.lrmd_process.terminate() self.lrmd_process.wait() if self.verbose: print("Daemon output") logfile = io.open('/tmp/lrmd-regression.log', 'rt') for line in logfile.readlines(): print(line.strip()) os.remove('/tmp/lrmd-regression.log') if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.lrmd_process = None self.stonith_process = None def add_sys_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, 0, "") def add_sys_cmd_no_wait(self, cmd, args): """ Add a simple command to be executed (without waiting) as part of this test """ self.__new_cmd(cmd, args, 0, "", 1) def add_expected_fail_sys_cmd(self, cmd, args, exitcode): """ Add a command to be executed as part of this test and expected to fail """ self.__new_cmd(cmd, args, exitcode) def add_cmd_check_stdout(self, args, match, no_match=""): """ Add a command with expected output to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, 0, match, 0, no_match) def add_cmd(self, args): """ Add an lrmd_test command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, 0, "") def add_cmd_and_kill(self, kill_proc, args): """ Add an lrmd_test command and system command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, 0, "", kill=kill_proc) def add_expected_fail_cmd(self, args): """ Add an lrmd_test command to be executed as part of this test and expected to fail """ self.__new_cmd(self.test_tool_location, args, 1, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex_split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) ### Typically, the kill argument is used to detect some sort of ### failure. Without yielding for a few seconds here, the process ### launched earlier that is listening for the failure may not have ### time to connect to the lrmd. time.sleep(2) subprocess.Popen(shlex_split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return 0 output = pipe_output(test) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: test.returncode = -2 print("STDOUT string '%s' was not found in cmd output" % (args['stdout_match'])) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: test.returncode = -2 print("STDOUT string '%s' was found in cmd output" % (args['stdout_negative_match'])) args['cmd_output'] = output return test.returncode def run(self): """ Execute this test. """ res = 0 i = 1 if self.tls and self.name.count("stonith") != 0: self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) print(self.result_txt) return res self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = 0 for cmd in self.cmds: res = self.run_cmd(cmd) if res != cmd['expected_exitcode']: print(cmd['cmd_output']) print("Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode'])) msg = "FAILURE - '%s' failed at step %d. Command: lrmd_test %s" self.result_txt = msg % (self.name, i, cmd['args']) self.result_exitcode = -1 break else: if self.verbose: print(cmd['cmd_output'].strip()) print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all lrmd regression tests """ def __init__(self, verbose=0, tls=0): self.tests = [] self.verbose = verbose self.tls = tls self.rsc_classes = output_from_command("crm_resource --list-standards") self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line self.need_authkey = 0 self.action_timeout = " -t 5000 " if self.tls: self.rsc_classes.remove("stonith") if "systemd" in self.rsc_classes: try: # This code doesn't need this import, but lrmd_dummy_daemon does, # so ensure the dependency is available rather than cause all # systemd tests to fail. import systemd.daemon except ImportError: print("Fatal error: python systemd bindings not found. Is package installed?", file=sys.stderr) sys.exit(1) print("Testing "+repr(self.rsc_classes)) self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout, "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout, "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout, "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : "-c exec -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "ocf_cancel_line" : "-c cancel -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc "+self.action_timeout+" -C systemd -T lrmd_dummy_daemon", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout, "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout, "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout, "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : "-c exec -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "systemd_cancel_line" : "-c cancel -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T lrmd_dummy_daemon", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout, "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout, "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout, "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : "-c exec -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "upstart_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "upstart_cancel_line" : "-c cancel -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout, "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout, "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout, "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : "-c exec -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "service_cancel_line" : "-c cancel -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout, "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout, "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout, "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : "-c exec -r \"lsb_test_rsc\" -a status -i \"2000\" "+self.action_timeout, "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "lsb_cancel_line" : "-c cancel -r \"lsb_test_rsc\" -a \"status\" -i \"2000\" -t \"6000\" ", "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", "heartbeat_reg_line" : "-c register_rsc -r hb_test_rsc "+self.action_timeout+" -C heartbeat -T HBDummy", "heartbeat_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:hb_test_rsc action:none rc:ok op_status:complete\" ", "heartbeat_unreg_line" : "-c unregister_rsc -r \"hb_test_rsc\" "+self.action_timeout, "heartbeat_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:hb_test_rsc action:none rc:ok op_status:complete\"", "heartbeat_start_line" : "-c exec -r \"hb_test_rsc\" -a \"start\" -k 1 -v a -k 2 -v b "+self.action_timeout, "heartbeat_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:start rc:ok op_status:complete\" ", "heartbeat_stop_line" : "-c exec -r \"hb_test_rsc\" -a \"stop\" -k 1 -v a -k 2 -v b "+self.action_timeout, "heartbeat_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:stop rc:ok op_status:complete\" ", "heartbeat_monitor_line" : "-c exec -r \"hb_test_rsc\" -a status -k 1 -v a -k 2 -v b -i \"2000\" "+self.action_timeout, "heartbeat_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "heartbeat_cancel_line" : "-c cancel -r \"hb_test_rsc\" -a \"status\" -k 1 -v a -k 2 -v b -i \"2000\" -t \"6000\" ", "heartbeat_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:status rc:ok op_status:Cancelled\" ", "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc "+self.action_timeout+" -C stonith -P pacemaker -T fence_dummy_monitor", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout, "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : "-c exec -r \"stonith_test_rsc\" -a \"start\" -t 8000 ", "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout, "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : "-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "stonith_cancel_line" : "-c cancel -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): """ Create a named test """ test = Test(name, description, self.verbose, self.tls) self.tests.append(test) return test def setup_test_environment(self): """ Prepare the host before executing any tests """ os.system("service pacemaker_remote stop") self.cleanup_test_environment() if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): self.need_authkey = 1 os.system("mkdir -p /etc/pacemaker") os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") ### Make fake systemd daemon and unit file ### dummy_daemon = """#!/usr/bin/python import time, systemd.daemon time.sleep(3) systemd.daemon.notify("READY=1") while True: time.sleep(5) """ dummy_service_file = """ [Unit] Description=Dummy resource that takes a while to start [Service] Type=notify ExecStart=/usr/sbin/lrmd_dummy_daemon """ dummy_upstart_job = (""" description "Dummy service for regression tests" exec dd if=/dev/random of=/dev/null """) dummy_fence_sleep_agent = ("""#!/usr/bin/python import sys import time def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: time.sleep(30000) sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) dummy_fence_agent = ("""#!/usr/bin/python from __future__ import print_function, unicode_literals, absolute_import, division import sys def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: sys.exit(0) if line.count("metadata") > 0: print('') print(' dummy description.') print(' http://www.example.com') print(' ') print(' ') print(' ') print(' ') print(' Fencing Action') print(' ') print(' ') print(' ') print(' ') print(' Physical plug number or name of virtual machine') print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print('') sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) os.system("cat <<-END >>/etc/init/lrmd_dummy_daemon.conf\n%s\nEND" % (dummy_upstart_job)) os.system("cat <<-END >>/usr/sbin/lrmd_dummy_daemon\n%s\nEND" % (dummy_daemon)) os.system("cat <<-END >>/lib/systemd/system/lrmd_dummy_daemon.service\n%s\nEND" % (dummy_service_file)) os.system("chmod a+x /usr/sbin/lrmd_dummy_daemon") os.system("cat <<-END >>/usr/sbin/fence_dummy_sleep\n%s\nEND" % (dummy_fence_sleep_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_sleep") os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor\n%s\nEND" % (dummy_fence_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_monitor") if os.path.exists("%s/cts/LSBDummy" % BUILD_DIR): print("Using %s/cts/LSBDummy" % BUILD_DIR) os.system("cp %s/cts/LSBDummy /etc/init.d/LSBDummy" % BUILD_DIR) if not os.path.exists("@OCF_RA_DIR@/pacemaker"): os.system("mkdir -p @OCF_RA_DIR@/pacemaker/") # Install helper OCF agents for agent in ["Dummy", "Stateful", "ping"]: os.system("cp %s/extra/resources/%s @OCF_RA_DIR@/pacemaker/%s" % (BUILD_DIR, agent, agent)) os.system("chmod a+x @OCF_RA_DIR@/pacemaker/%s" % (agent)) else: # Assume it's installed print("Using @datadir@/@PACKAGE@/tests/cts/LSBDummy") os.system("cp @datadir@/@PACKAGE@/tests/cts/LSBDummy /etc/init.d/LSBDummy") os.system("chmod a+x /etc/init.d/LSBDummy") os.system("ls -al /etc/init.d/LSBDummy") os.system("mkdir -p @CRM_CORE_DIR@/root") os.system("mkdir -p /etc/ha.d/resource.d") if os.path.exists("%s/cts/HBDummy" % BUILD_DIR): print("Using %s/cts/HBDummy" % BUILD_DIR) os.system("cp %s/cts/HBDummy /etc/ha.d/resource.d/HBDummy" % BUILD_DIR) else: # Assume it's installed print("Using @datadir@/@PACKAGE@/tests/cts/HBDummy") os.system("cp @datadir@/@PACKAGE@/tests/cts/HBDummy /etc/ha.d/resource.d/HBDummy") os.system("chmod a+x /etc/ha.d/resource.d/HBDummy") os.system("ls -al /etc/ha.d/resource.d/HBDummy") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") def cleanup_test_environment(self): """ Clean up the host after executing desired tests """ if self.need_authkey: os.system("rm -f /etc/pacemaker/authkey") os.system("rm -f /etc/init.d/LSBDummy") os.system("rm -f /etc/ha.d/resource.d/HBDummy") os.system("rm -f /lib/systemd/system/lrmd_dummy_daemon.service") os.system("rm -f /usr/sbin/lrmd_dummy_daemon") os.system("rm -f /usr/sbin/fence_dummy_monitor") os.system("rm -f /usr/sbin/fence_dummy_sleep") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") def build_generic_tests(self): """ Register tests that apply to all resource classes """ common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor duplicate test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # Add the duplicate monitors test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) # verify we still get update events ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_multi_rsc_tests(self): """ Register complex tests that involve managing multiple resouces of different types """ common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: ### If this fails, that means the monitor is not being rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_negative_tests(self): """ Register tests related to how the lrmd handles failures """ ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") # -t must be less than self.action_timeout test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 1000 -w") # -t must be less than self.action_timeout test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after lrmd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) test.add_cmd('-c exec -r "stonith_test_rsc" -a "monitor" -i "600000" -l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" ' + self.action_timeout) test.add_cmd_and_kill("killall -9 -q stonithd lt-stonithd", '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error" -t 15000') test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r "test_rsc" -a "monitor" -i "100" ' + self.action_timeout + '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" -t 6000') test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" " + self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" " + self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r "test_rsc" -a "monitor" -i "100" ' + self.action_timeout + ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ') test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout + '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"') ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T lrmd_dummy_daemon "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q lrmd_dummy_daemon", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T lrmd_dummy_daemon "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q dd", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-c cancel -r test_rsc -a \"monitor\" -i 1234 -t \"6000\" " ### interval is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a stop -i 100 -t \"6000\" " ### action name is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a monitor -i 6000 "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") def build_stress_tests(self): """ Register stress tests """ timeout = "-t 20000" iterations = 25 test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) if "systemd" in self.rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T lrmd_dummy_daemon -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) iterations = 9 timeout = "-t 30000" ### Verify recurring op in-flight collision is handled in series properly test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout)) for i in range(iterations): test.add_cmd("-c exec -r test_rsc -a monitor %s -i 100%d -k op_sleep -v 2 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\"" % (timeout, i)) # test.add_sys_cmd("sleep", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout)) test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout)) - def build_versioned_tests(self): - """ Register tests for versioned resource parameters """ - - ### Verify that versioned resource parameters are chosen properly ### - - # We'll default to one state file, then check the version to use a different one - state_file_default = "@CRM_RSCTMP_DIR@/versioned_params_default.state" - state_file_expected = "@CRM_RSCTMP_DIR@/versioned_params_expected.state" - - # Versioned attributes are passed as a single key-value pair. - # Here, we define the value to choose the state file; - # ocf:heartbeat:Dummy should always be at least version 0.9. - versioned_key = "#versioned_attributes" - versioned_attrs = """ - - - - - - - - - - """ % (state_file_expected, state_file_default) - cmd_params = "-k '%s' -v '%s' " % (versioned_key, versioned_attrs) - - test = self.new_test("versioned_params", "Verify use of versioned resource parameters") - - # First, remove the possible state files, so we can reliably tell what gets created. - test.add_sys_cmd("rm", "-f %s %s" % (state_file_expected, state_file_default)) - - # Register and start the resource. - test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " - "-l 'NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete' " - "%s" % (self.action_timeout)) - test.add_cmd("-c exec -r test_rsc -a start -t 6000 %s" % cmd_params) - - # Check the created state file. - test.add_expected_fail_sys_cmd("ls", state_file_default, 2) - test.add_sys_cmd("ls", state_file_expected) - - # Stop and unregister the resource. - test.add_cmd("-c exec -r test_rsc -a stop -t 4000 %s" % cmd_params) - test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") - def build_custom_tests(self): """ Register tests that target specific cases """ ### verify resource temporary folder is created and used by heartbeat agents. ### test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start -t 4000") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") test.add_cmd("-c exec -r test_rsc -a stop -t 4000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T lrmd_dummy_daemon "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T lrmd_dummy_daemon "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+" -n " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"", "resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"", "resource-agent name='LSBDummy'") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_monitor\"", "resource-agent name=\"fence_dummy_monitor\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"lrmd_dummy_daemon\"", "resource-agent name=\"lrmd_dummy_daemon\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"lrmd_dummy_daemon\"", "resource-agent name=\"lrmd_dummy_daemon\"") if "heartbeat" in self.rsc_classes: test = self.new_test("get_heartbeat_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"heartbeat\" -T \"HBDummy\"", "resource-agent name='HBDummy'") ### get ocf providers ### test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy_monitor") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist if "stonith" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy_monitor") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "fence_dummy_monitor") if "heartbeat" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C heartbeat", "HBDummy") test.add_cmd_check_stdout("-c list_agents -C heartbeat", "", "LSBDummy") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "HBDummy") ### should not exist def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: sys.exit(-1) sys.exit(0) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['show-usage'] = 0 self.options['pacemaker-remote'] = 0 def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-R" or args[i] == "--pacemaker-remote": self.options['pacemaker-remote'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--pacemaker-remote | -R Test pacemaker-remote binary instead of lrmd.") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_top'") print("\t\t python ./regression.py --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t python ./regression.py --run-only-pattern systemd") def main(argv): """ Run lrmd regression tests as specified by arguments """ update_path() opts = TestOptions() opts.build_options(argv) tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote']) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() - tests.build_versioned_tests() tests.build_custom_tests() tests.build_stress_tests() tests.setup_test_environment() print("Starting ...") if opts.options['list-tests']: tests.print_list() elif opts.options['show-usage']: opts.show_usage() elif opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() if __name__ == "__main__": main(sys.argv)