diff --git a/exec/cmap.c b/exec/cmap.c index 6da48759..ba4c7987 100644 --- a/exec/cmap.c +++ b/exec/cmap.c @@ -1,1029 +1,1029 @@ /* * Copyright (c) 2011-2012 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "service.h" LOGSYS_DECLARE_SUBSYS ("CMAP"); #define MAX_REQ_EXEC_CMAP_MCAST_ITEMS 32 #define ICMAP_VALUETYPE_NOT_EXIST 0 struct cmap_conn_info { struct hdb_handle_database iter_db; struct hdb_handle_database track_db; }; typedef uint64_t cmap_iter_handle_t; typedef uint64_t cmap_track_handle_t; struct cmap_track_user_data { void *conn; cmap_track_handle_t track_handle; uint64_t track_inst_handle; }; enum cmap_message_req_types { MESSAGE_REQ_EXEC_CMAP_MCAST = 0, }; enum cmap_mcast_reason { CMAP_MCAST_REASON_SYNC = 0, CMAP_MCAST_REASON_NEW_CONFIG_VERSION = 1, }; static struct corosync_api_v1 *api; static char *cmap_exec_init_fn (struct corosync_api_v1 *corosync_api); static int cmap_exec_exit_fn(void); static int cmap_lib_init_fn (void *conn); static int cmap_lib_exit_fn (void *conn); static void message_handler_req_lib_cmap_set(void *conn, const void *message); static void message_handler_req_lib_cmap_delete(void *conn, const void *message); static void message_handler_req_lib_cmap_get(void *conn, const void *message); static void message_handler_req_lib_cmap_adjust_int(void *conn, const void *message); static void message_handler_req_lib_cmap_iter_init(void *conn, const void *message); static void message_handler_req_lib_cmap_iter_next(void *conn, const void *message); static void message_handler_req_lib_cmap_iter_finalize(void *conn, const void *message); static void message_handler_req_lib_cmap_track_add(void *conn, const void *message); static void message_handler_req_lib_cmap_track_delete(void *conn, const void *message); static void cmap_notify_fn(int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data); static void message_handler_req_exec_cmap_mcast( const void *message, unsigned int nodeid); static void exec_cmap_mcast_endian_convert(void *message); /* * Reson is subtype of message. argc is number of items in argv array. Argv is array * of strings (key names) which will be send to wire. There can be maximum * MAX_REQ_EXEC_CMAP_MCAST_ITEMS items (for more items, CS_ERR_TOO_MANY_GROUPS * error is returned). If key is not found, item has type ICMAP_VALUETYPE_NOT_EXIST * and length zero. */ static cs_error_t cmap_mcast_send(enum cmap_mcast_reason reason, int argc, char *argv[]); static void cmap_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id); static int cmap_sync_process (void); static void cmap_sync_activate (void); static void cmap_sync_abort (void); static void cmap_config_version_track_cb( int32_t event, const char *key_name, struct icmap_notify_value new_value, struct icmap_notify_value old_value, void *user_data); /* * Library Handler Definition */ static struct corosync_lib_handler cmap_lib_engine[] = { { /* 0 */ .lib_handler_fn = message_handler_req_lib_cmap_set, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 1 */ .lib_handler_fn = message_handler_req_lib_cmap_delete, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 2 */ .lib_handler_fn = message_handler_req_lib_cmap_get, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 3 */ .lib_handler_fn = message_handler_req_lib_cmap_adjust_int, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 4 */ .lib_handler_fn = message_handler_req_lib_cmap_iter_init, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 5 */ .lib_handler_fn = message_handler_req_lib_cmap_iter_next, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 */ .lib_handler_fn = message_handler_req_lib_cmap_iter_finalize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 */ .lib_handler_fn = message_handler_req_lib_cmap_track_add, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 8 */ .lib_handler_fn = message_handler_req_lib_cmap_track_delete, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, }; static struct corosync_exec_handler cmap_exec_engine[] = { { /* 0 - MESSAGE_REQ_EXEC_CMAP_MCAST */ .exec_handler_fn = message_handler_req_exec_cmap_mcast, .exec_endian_convert_fn = exec_cmap_mcast_endian_convert }, }; struct corosync_service_engine cmap_service_engine = { .name = "corosync configuration map access", .id = CMAP_SERVICE, .priority = 1, .private_data_size = sizeof(struct cmap_conn_info), .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = cmap_lib_init_fn, .lib_exit_fn = cmap_lib_exit_fn, .lib_engine = cmap_lib_engine, .lib_engine_count = sizeof (cmap_lib_engine) / sizeof (struct corosync_lib_handler), .exec_init_fn = cmap_exec_init_fn, .exec_exit_fn = cmap_exec_exit_fn, .exec_engine = cmap_exec_engine, .exec_engine_count = sizeof (cmap_exec_engine) / sizeof (struct corosync_exec_handler), .sync_init = cmap_sync_init, .sync_process = cmap_sync_process, .sync_activate = cmap_sync_activate, .sync_abort = cmap_sync_abort }; struct corosync_service_engine *cmap_get_service_engine_ver0 (void) { return (&cmap_service_engine); } struct req_exec_cmap_mcast_item { mar_name_t key_name __attribute__((aligned(8))); mar_uint8_t value_type __attribute__((aligned(8))); mar_size_t value_len __attribute__((aligned(8))); uint8_t value[] __attribute__((aligned(8))); }; struct req_exec_cmap_mcast { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_uint8_t reason __attribute__((aligned(8))); mar_uint8_t no_items __attribute__((aligned(8))); mar_uint8_t reserved1 __attribute__((aligned(8))); mar_uint8_t reserver2 __attribute__((aligned(8))); /* * Following are array of req_exec_cmap_mcast_item alligned to 8 bytes */ }; static size_t cmap_sync_trans_list_entries = 0; static size_t cmap_sync_member_list_entries = 0; static uint64_t cmap_highest_config_version_received = 0; static uint64_t cmap_my_config_version = 0; static int cmap_first_sync = 1; static icmap_track_t cmap_config_version_track; static void cmap_config_version_track_cb( int32_t event, const char *key_name, struct icmap_notify_value new_value, struct icmap_notify_value old_value, void *user_data) { const char *key = "totem.config_version"; cs_error_t ret; ENTER(); if (icmap_get_uint64("totem.config_version", &cmap_my_config_version) != CS_OK) { cmap_my_config_version = 0; } ret = cmap_mcast_send(CMAP_MCAST_REASON_NEW_CONFIG_VERSION, 1, (char **)&key); if (ret != CS_OK) { log_printf(LOGSYS_LEVEL_ERROR, "Can't inform other nodes about new config version"); } LEAVE(); } static int cmap_exec_exit_fn(void) { if (icmap_track_delete(cmap_config_version_track) != CS_OK) { log_printf(LOGSYS_LEVEL_ERROR, "Can't delete config_version icmap tracker"); } return 0; } static char *cmap_exec_init_fn ( struct corosync_api_v1 *corosync_api) { cs_error_t ret; api = corosync_api; ret = icmap_track_add("totem.config_version", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, cmap_config_version_track_cb, NULL, &cmap_config_version_track); if (ret != CS_OK) { return ((char *)"Can't add config_version icmap tracker"); } return (NULL); } static int cmap_lib_init_fn (void *conn) { struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p", conn); api->ipc_refcnt_inc(conn); memset(conn_info, 0, sizeof(*conn_info)); hdb_create(&conn_info->iter_db); hdb_create(&conn_info->track_db); return (0); } static int cmap_lib_exit_fn (void *conn) { struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); hdb_handle_t iter_handle = 0; icmap_iter_t *iter; hdb_handle_t track_handle = 0; icmap_track_t *track; log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p", conn); hdb_iterator_reset(&conn_info->iter_db); while (hdb_iterator_next(&conn_info->iter_db, (void*)&iter, &iter_handle) == 0) { icmap_iter_finalize(*iter); (void)hdb_handle_put (&conn_info->iter_db, iter_handle); } hdb_destroy(&conn_info->iter_db); hdb_iterator_reset(&conn_info->track_db); while (hdb_iterator_next(&conn_info->track_db, (void*)&track, &track_handle) == 0) { free(icmap_track_get_user_data(*track)); icmap_track_delete(*track); (void)hdb_handle_put (&conn_info->track_db, track_handle); } hdb_destroy(&conn_info->track_db); api->ipc_refcnt_dec(conn); return (0); } static void cmap_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { cmap_sync_trans_list_entries = trans_list_entries; cmap_sync_member_list_entries = member_list_entries; cmap_highest_config_version_received = 0; if (icmap_get_uint64("totem.config_version", &cmap_my_config_version) != CS_OK) { cmap_my_config_version = 0; } } static int cmap_sync_process (void) { const char *key = "totem.config_version"; cs_error_t ret; ret = cmap_mcast_send(CMAP_MCAST_REASON_SYNC, 1, (char **)&key); return (ret == CS_OK ? 0 : -1); } static void cmap_sync_activate (void) { if (cmap_sync_trans_list_entries == 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Single node sync -> no action"); return ; } if (cmap_first_sync == 1) { cmap_first_sync = 0; } else { log_printf(LOGSYS_LEVEL_DEBUG, "Not first sync -> no action"); return ; } if (cmap_my_config_version == 0) { log_printf(LOGSYS_LEVEL_DEBUG, "My config version is 0 -> no action"); return ; } if (cmap_highest_config_version_received == 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Other nodes version is 0 -> no action"); return ; } if (cmap_highest_config_version_received != cmap_my_config_version) { log_printf(LOGSYS_LEVEL_ERROR, "Received config version (%"PRIu64") is different than my config version (%"PRIu64")! Exiting", cmap_highest_config_version_received, cmap_my_config_version); api->shutdown_request(); return ; } } static void cmap_sync_abort (void) { } static void message_handler_req_lib_cmap_set(void *conn, const void *message) { const struct req_lib_cmap_set *req_lib_cmap_set = message; struct res_lib_cmap_set res_lib_cmap_set; cs_error_t ret; if (icmap_is_key_ro((char *)req_lib_cmap_set->key_name.value)) { ret = CS_ERR_ACCESS; } else { ret = icmap_set((char *)req_lib_cmap_set->key_name.value, &req_lib_cmap_set->value, req_lib_cmap_set->value_len, req_lib_cmap_set->type); } memset(&res_lib_cmap_set, 0, sizeof(res_lib_cmap_set)); res_lib_cmap_set.header.size = sizeof(res_lib_cmap_set); res_lib_cmap_set.header.id = MESSAGE_RES_CMAP_SET; res_lib_cmap_set.header.error = ret; api->ipc_response_send(conn, &res_lib_cmap_set, sizeof(res_lib_cmap_set)); } static void message_handler_req_lib_cmap_delete(void *conn, const void *message) { const struct req_lib_cmap_set *req_lib_cmap_set = message; struct res_lib_cmap_delete res_lib_cmap_delete; cs_error_t ret; if (icmap_is_key_ro((char *)req_lib_cmap_set->key_name.value)) { ret = CS_ERR_ACCESS; } else { ret = icmap_delete((char *)req_lib_cmap_set->key_name.value); } memset(&res_lib_cmap_delete, 0, sizeof(res_lib_cmap_delete)); res_lib_cmap_delete.header.size = sizeof(res_lib_cmap_delete); res_lib_cmap_delete.header.id = MESSAGE_RES_CMAP_DELETE; res_lib_cmap_delete.header.error = ret; api->ipc_response_send(conn, &res_lib_cmap_delete, sizeof(res_lib_cmap_delete)); } static void message_handler_req_lib_cmap_get(void *conn, const void *message) { const struct req_lib_cmap_get *req_lib_cmap_get = message; struct res_lib_cmap_get *res_lib_cmap_get; struct res_lib_cmap_get error_res_lib_cmap_get; cs_error_t ret; size_t value_len; size_t res_lib_cmap_get_size; icmap_value_types_t type; void *value; value_len = req_lib_cmap_get->value_len; res_lib_cmap_get_size = sizeof(*res_lib_cmap_get) + value_len; res_lib_cmap_get = malloc(res_lib_cmap_get_size); if (res_lib_cmap_get == NULL) { ret = CS_ERR_NO_MEMORY; goto error_exit; } memset(res_lib_cmap_get, 0, res_lib_cmap_get_size); if (value_len > 0) { value = res_lib_cmap_get->value; } else { value = NULL; } ret = icmap_get((char *)req_lib_cmap_get->key_name.value, value, &value_len, &type); if (ret != CS_OK) { free(res_lib_cmap_get); goto error_exit; } res_lib_cmap_get->header.size = res_lib_cmap_get_size; res_lib_cmap_get->header.id = MESSAGE_RES_CMAP_GET; res_lib_cmap_get->header.error = ret; res_lib_cmap_get->type = type; res_lib_cmap_get->value_len = value_len; api->ipc_response_send(conn, res_lib_cmap_get, res_lib_cmap_get_size); free(res_lib_cmap_get); return ; error_exit: memset(&error_res_lib_cmap_get, 0, sizeof(error_res_lib_cmap_get)); error_res_lib_cmap_get.header.size = sizeof(error_res_lib_cmap_get); error_res_lib_cmap_get.header.id = MESSAGE_RES_CMAP_GET; error_res_lib_cmap_get.header.error = ret; api->ipc_response_send(conn, &error_res_lib_cmap_get, sizeof(error_res_lib_cmap_get)); } static void message_handler_req_lib_cmap_adjust_int(void *conn, const void *message) { const struct req_lib_cmap_adjust_int *req_lib_cmap_adjust_int = message; struct res_lib_cmap_adjust_int res_lib_cmap_adjust_int; cs_error_t ret; if (icmap_is_key_ro((char *)req_lib_cmap_adjust_int->key_name.value)) { ret = CS_ERR_ACCESS; } else { ret = icmap_adjust_int((char *)req_lib_cmap_adjust_int->key_name.value, req_lib_cmap_adjust_int->step); } memset(&res_lib_cmap_adjust_int, 0, sizeof(res_lib_cmap_adjust_int)); res_lib_cmap_adjust_int.header.size = sizeof(res_lib_cmap_adjust_int); res_lib_cmap_adjust_int.header.id = MESSAGE_RES_CMAP_ADJUST_INT; res_lib_cmap_adjust_int.header.error = ret; api->ipc_response_send(conn, &res_lib_cmap_adjust_int, sizeof(res_lib_cmap_adjust_int)); } static void message_handler_req_lib_cmap_iter_init(void *conn, const void *message) { const struct req_lib_cmap_iter_init *req_lib_cmap_iter_init = message; struct res_lib_cmap_iter_init res_lib_cmap_iter_init; cs_error_t ret; icmap_iter_t iter; icmap_iter_t *hdb_iter; cmap_iter_handle_t handle = 0ULL; const char *prefix; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); if (req_lib_cmap_iter_init->prefix.length > 0) { prefix = (char *)req_lib_cmap_iter_init->prefix.value; } else { prefix = NULL; } iter = icmap_iter_init(prefix); if (iter == NULL) { ret = CS_ERR_NO_SECTIONS; goto reply_send; } ret = hdb_error_to_cs(hdb_handle_create(&conn_info->iter_db, sizeof(iter), &handle)); if (ret != CS_OK) { goto reply_send; } ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, handle, (void *)&hdb_iter)); if (ret != CS_OK) { goto reply_send; } *hdb_iter = iter; (void)hdb_handle_put (&conn_info->iter_db, handle); reply_send: memset(&res_lib_cmap_iter_init, 0, sizeof(res_lib_cmap_iter_init)); res_lib_cmap_iter_init.header.size = sizeof(res_lib_cmap_iter_init); res_lib_cmap_iter_init.header.id = MESSAGE_RES_CMAP_ITER_INIT; res_lib_cmap_iter_init.header.error = ret; res_lib_cmap_iter_init.iter_handle = handle; api->ipc_response_send(conn, &res_lib_cmap_iter_init, sizeof(res_lib_cmap_iter_init)); } static void message_handler_req_lib_cmap_iter_next(void *conn, const void *message) { const struct req_lib_cmap_iter_next *req_lib_cmap_iter_next = message; struct res_lib_cmap_iter_next res_lib_cmap_iter_next; cs_error_t ret; icmap_iter_t *iter; size_t value_len = 0; icmap_value_types_t type = 0; const char *res = NULL; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, req_lib_cmap_iter_next->iter_handle, (void *)&iter)); if (ret != CS_OK) { goto reply_send; } res = icmap_iter_next(*iter, &value_len, &type); if (res == NULL) { ret = CS_ERR_NO_SECTIONS; } (void)hdb_handle_put (&conn_info->iter_db, req_lib_cmap_iter_next->iter_handle); reply_send: memset(&res_lib_cmap_iter_next, 0, sizeof(res_lib_cmap_iter_next)); res_lib_cmap_iter_next.header.size = sizeof(res_lib_cmap_iter_next); res_lib_cmap_iter_next.header.id = MESSAGE_RES_CMAP_ITER_NEXT; res_lib_cmap_iter_next.header.error = ret; if (res != NULL) { res_lib_cmap_iter_next.value_len = value_len; res_lib_cmap_iter_next.type = type; memcpy(res_lib_cmap_iter_next.key_name.value, res, strlen(res)); res_lib_cmap_iter_next.key_name.length = strlen(res); } api->ipc_response_send(conn, &res_lib_cmap_iter_next, sizeof(res_lib_cmap_iter_next)); } static void message_handler_req_lib_cmap_iter_finalize(void *conn, const void *message) { const struct req_lib_cmap_iter_finalize *req_lib_cmap_iter_finalize = message; struct res_lib_cmap_iter_finalize res_lib_cmap_iter_finalize; cs_error_t ret; icmap_iter_t *iter; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle, (void *)&iter)); if (ret != CS_OK) { goto reply_send; } icmap_iter_finalize(*iter); (void)hdb_handle_destroy(&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle); (void)hdb_handle_put (&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle); reply_send: memset(&res_lib_cmap_iter_finalize, 0, sizeof(res_lib_cmap_iter_finalize)); res_lib_cmap_iter_finalize.header.size = sizeof(res_lib_cmap_iter_finalize); res_lib_cmap_iter_finalize.header.id = MESSAGE_RES_CMAP_ITER_FINALIZE; res_lib_cmap_iter_finalize.header.error = ret; api->ipc_response_send(conn, &res_lib_cmap_iter_finalize, sizeof(res_lib_cmap_iter_finalize)); } static void cmap_notify_fn(int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { struct cmap_track_user_data *cmap_track_user_data = (struct cmap_track_user_data *)user_data; struct res_lib_cmap_notify_callback res_lib_cmap_notify_callback; struct iovec iov[3]; memset(&res_lib_cmap_notify_callback, 0, sizeof(res_lib_cmap_notify_callback)); res_lib_cmap_notify_callback.header.size = sizeof(res_lib_cmap_notify_callback) + new_val.len + old_val.len; res_lib_cmap_notify_callback.header.id = MESSAGE_RES_CMAP_NOTIFY_CALLBACK; res_lib_cmap_notify_callback.header.error = CS_OK; res_lib_cmap_notify_callback.new_value_type = new_val.type; res_lib_cmap_notify_callback.old_value_type = old_val.type; res_lib_cmap_notify_callback.new_value_len = new_val.len; res_lib_cmap_notify_callback.old_value_len = old_val.len; res_lib_cmap_notify_callback.event = event; res_lib_cmap_notify_callback.key_name.length = strlen(key_name); res_lib_cmap_notify_callback.track_inst_handle = cmap_track_user_data->track_inst_handle; memcpy(res_lib_cmap_notify_callback.key_name.value, key_name, strlen(key_name)); iov[0].iov_base = (char *)&res_lib_cmap_notify_callback; iov[0].iov_len = sizeof(res_lib_cmap_notify_callback); iov[1].iov_base = (char *)new_val.data; iov[1].iov_len = new_val.len; iov[2].iov_base = (char *)old_val.data; iov[2].iov_len = old_val.len; api->ipc_dispatch_iov_send(cmap_track_user_data->conn, iov, 3); } static void message_handler_req_lib_cmap_track_add(void *conn, const void *message) { const struct req_lib_cmap_track_add *req_lib_cmap_track_add = message; struct res_lib_cmap_track_add res_lib_cmap_track_add; cs_error_t ret; cmap_track_handle_t handle = 0; icmap_track_t track = NULL; icmap_track_t *hdb_track; struct cmap_track_user_data *cmap_track_user_data; const char *key_name; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); cmap_track_user_data = malloc(sizeof(*cmap_track_user_data)); if (cmap_track_user_data == NULL) { ret = CS_ERR_NO_MEMORY; goto reply_send; } memset(cmap_track_user_data, 0, sizeof(*cmap_track_user_data)); if (req_lib_cmap_track_add->key_name.length > 0) { key_name = (char *)req_lib_cmap_track_add->key_name.value; } else { key_name = NULL; } ret = icmap_track_add(key_name, req_lib_cmap_track_add->track_type, cmap_notify_fn, cmap_track_user_data, &track); if (ret != CS_OK) { free(cmap_track_user_data); goto reply_send; } ret = hdb_error_to_cs(hdb_handle_create(&conn_info->track_db, sizeof(track), &handle)); if (ret != CS_OK) { free(cmap_track_user_data); goto reply_send; } ret = hdb_error_to_cs(hdb_handle_get(&conn_info->track_db, handle, (void *)&hdb_track)); if (ret != CS_OK) { free(cmap_track_user_data); goto reply_send; } *hdb_track = track; cmap_track_user_data->conn = conn; cmap_track_user_data->track_handle = handle; cmap_track_user_data->track_inst_handle = req_lib_cmap_track_add->track_inst_handle; (void)hdb_handle_put (&conn_info->track_db, handle); reply_send: memset(&res_lib_cmap_track_add, 0, sizeof(res_lib_cmap_track_add)); res_lib_cmap_track_add.header.size = sizeof(res_lib_cmap_track_add); res_lib_cmap_track_add.header.id = MESSAGE_RES_CMAP_TRACK_ADD; res_lib_cmap_track_add.header.error = ret; res_lib_cmap_track_add.track_handle = handle; api->ipc_response_send(conn, &res_lib_cmap_track_add, sizeof(res_lib_cmap_track_add)); } static void message_handler_req_lib_cmap_track_delete(void *conn, const void *message) { const struct req_lib_cmap_track_delete *req_lib_cmap_track_delete = message; struct res_lib_cmap_track_delete res_lib_cmap_track_delete; cs_error_t ret; icmap_track_t *track; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); uint64_t track_inst_handle = 0; ret = hdb_error_to_cs(hdb_handle_get(&conn_info->track_db, req_lib_cmap_track_delete->track_handle, (void *)&track)); if (ret != CS_OK) { goto reply_send; } track_inst_handle = ((struct cmap_track_user_data *)icmap_track_get_user_data(*track))->track_inst_handle; free(icmap_track_get_user_data(*track)); ret = icmap_track_delete(*track); (void)hdb_handle_put (&conn_info->track_db, req_lib_cmap_track_delete->track_handle); (void)hdb_handle_destroy(&conn_info->track_db, req_lib_cmap_track_delete->track_handle); reply_send: memset(&res_lib_cmap_track_delete, 0, sizeof(res_lib_cmap_track_delete)); res_lib_cmap_track_delete.header.size = sizeof(res_lib_cmap_track_delete); res_lib_cmap_track_delete.header.id = MESSAGE_RES_CMAP_TRACK_DELETE; res_lib_cmap_track_delete.header.error = ret; res_lib_cmap_track_delete.track_inst_handle = track_inst_handle; api->ipc_response_send(conn, &res_lib_cmap_track_delete, sizeof(res_lib_cmap_track_delete)); } static cs_error_t cmap_mcast_send(enum cmap_mcast_reason reason, int argc, char *argv[]) { int i; size_t value_len; icmap_value_types_t value_type; cs_error_t err; size_t item_len; size_t msg_len = 0; struct req_exec_cmap_mcast req_exec_cmap_mcast; struct req_exec_cmap_mcast_item *item = NULL; struct iovec req_exec_cmap_iovec[MAX_REQ_EXEC_CMAP_MCAST_ITEMS + 1]; ENTER(); if (argc > MAX_REQ_EXEC_CMAP_MCAST_ITEMS) { return (CS_ERR_TOO_MANY_GROUPS); } memset(req_exec_cmap_iovec, 0, sizeof(req_exec_cmap_iovec)); for (i = 0; i < argc; i++) { err = icmap_get(argv[i], NULL, &value_len, &value_type); if (err != CS_OK && err != CS_ERR_NOT_EXIST) { goto free_mem; } if (err == CS_ERR_NOT_EXIST) { value_type = ICMAP_VALUETYPE_NOT_EXIST; value_len = 0; } item_len = MAR_ALIGN_UP(sizeof(*item) + value_len, 8); item = malloc(item_len); if (item == NULL) { goto free_mem; } - memset(item, 0, sizeof(item_len)); + memset(item, 0, item_len); item->value_type = value_type; item->value_len = value_len; item->key_name.length = strlen(argv[i]); strcpy((char *)item->key_name.value, argv[i]); if (value_type != ICMAP_VALUETYPE_NOT_EXIST) { err = icmap_get(argv[i], item->value, &value_len, &value_type); if (err != CS_OK) { goto free_mem; } } req_exec_cmap_iovec[i + 1].iov_base = item; req_exec_cmap_iovec[i + 1].iov_len = item_len; msg_len += item_len; qb_log(LOG_TRACE, "Item %u - type %u, len %zu", i, item->value_type, item->value_len); item = NULL; } memset(&req_exec_cmap_mcast, 0, sizeof(req_exec_cmap_mcast)); req_exec_cmap_mcast.header.size = sizeof(req_exec_cmap_mcast) + msg_len; req_exec_cmap_mcast.reason = reason; req_exec_cmap_mcast.no_items = argc; req_exec_cmap_iovec[0].iov_base = &req_exec_cmap_mcast; req_exec_cmap_iovec[0].iov_len = sizeof(req_exec_cmap_mcast); qb_log(LOG_TRACE, "Sending %u items (%u iovec) for reason %u", argc, argc + 1, reason); err = (api->totem_mcast(req_exec_cmap_iovec, argc + 1, TOTEM_AGREED) == 0 ? CS_OK : CS_ERR_MESSAGE_ERROR); free_mem: for (i = 0; i < argc; i++) { free(req_exec_cmap_iovec[i + 1].iov_base); } free(item); LEAVE(); return (err); } static struct req_exec_cmap_mcast_item *cmap_mcast_item_find( const void *message, char *key) { const struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; int i; const char *p; struct req_exec_cmap_mcast_item *item; mar_uint16_t key_name_len; p = (const char *)message + sizeof(*req_exec_cmap_mcast); for (i = 0; i < req_exec_cmap_mcast->no_items; i++) { item = (struct req_exec_cmap_mcast_item *)p; key_name_len = item->key_name.length; if (strlen(key) == key_name_len && strcmp((char *)item->key_name.value, key) == 0) { return (item); } p += MAR_ALIGN_UP(sizeof(*item) + item->value_len, 8); } return (NULL); } static void message_handler_req_exec_cmap_mcast_reason_sync_nv( enum cmap_mcast_reason reason, const void *message, unsigned int nodeid) { char member_config_version[ICMAP_KEYNAME_MAXLEN]; uint64_t config_version = 0; struct req_exec_cmap_mcast_item *item; mar_size_t value_len; ENTER(); item = cmap_mcast_item_find(message, (char *)"totem.config_version"); if (item != NULL) { value_len = item->value_len; if (item->value_type == ICMAP_VALUETYPE_NOT_EXIST) { config_version = 0; } if (item->value_type == ICMAP_VALUETYPE_UINT64) { memcpy(&config_version, item->value, value_len); } } qb_log(LOG_TRACE, "Received config version %"PRIu64" from node %x", config_version, nodeid); if (nodeid != api->totem_nodeid_get() && config_version > cmap_highest_config_version_received) { cmap_highest_config_version_received = config_version; } snprintf(member_config_version, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.config_version", nodeid); icmap_set_uint64(member_config_version, config_version); LEAVE(); } static void message_handler_req_exec_cmap_mcast( const void *message, unsigned int nodeid) { const struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; ENTER(); switch (req_exec_cmap_mcast->reason) { case CMAP_MCAST_REASON_SYNC: message_handler_req_exec_cmap_mcast_reason_sync_nv(req_exec_cmap_mcast->reason, message, nodeid); break; case CMAP_MCAST_REASON_NEW_CONFIG_VERSION: message_handler_req_exec_cmap_mcast_reason_sync_nv(req_exec_cmap_mcast->reason, message, nodeid); break; default: qb_log(LOG_TRACE, "Received mcast with unknown reason %u", req_exec_cmap_mcast->reason); }; LEAVE(); } static void exec_cmap_mcast_endian_convert(void *message) { struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; const char *p; int i; struct req_exec_cmap_mcast_item *item; uint16_t u16; uint32_t u32; uint64_t u64; float flt; double dbl; swab_coroipc_request_header_t(&req_exec_cmap_mcast->header); p = (const char *)message + sizeof(*req_exec_cmap_mcast); for (i = 0; i < req_exec_cmap_mcast->no_items; i++) { item = (struct req_exec_cmap_mcast_item *)p; swab_mar_uint16_t(&item->key_name.length); swab_mar_size_t(&item->value_len); switch (item->value_type) { case ICMAP_VALUETYPE_INT16: case ICMAP_VALUETYPE_UINT16: memcpy(&u16, item->value, sizeof(u16)); u16 = swab16(u16); memcpy(item->value, &u16, sizeof(u16)); break; case ICMAP_VALUETYPE_INT32: case ICMAP_VALUETYPE_UINT32: memcpy(&u32, item->value, sizeof(u32)); u32 = swab32(u32); memcpy(item->value, &u32, sizeof(u32)); break; case ICMAP_VALUETYPE_INT64: case ICMAP_VALUETYPE_UINT64: memcpy(&u64, item->value, sizeof(u64)); u64 = swab64(u64); memcpy(item->value, &u64, sizeof(u64)); break; case ICMAP_VALUETYPE_FLOAT: memcpy(&flt, item->value, sizeof(flt)); swabflt(&flt); memcpy(item->value, &flt, sizeof(flt)); break; case ICMAP_VALUETYPE_DOUBLE: memcpy(&dbl, item->value, sizeof(dbl)); swabdbl(&dbl); memcpy(item->value, &dbl, sizeof(dbl)); break; } p += MAR_ALIGN_UP(sizeof(*item) + item->value_len, 8); } } diff --git a/exec/cpg.c b/exec/cpg.c index a18b8503..5bd830fd 100644 --- a/exec/cpg.c +++ b/exec/cpg.c @@ -1,2437 +1,2438 @@ /* * Copyright (c) 2006-2015 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield (ccaulfie@redhat.com) * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif #include "service.h" LOGSYS_DECLARE_SUBSYS ("CPG"); #define GROUP_HASH_SIZE 32 enum cpg_message_req_types { MESSAGE_REQ_EXEC_CPG_PROCJOIN = 0, MESSAGE_REQ_EXEC_CPG_PROCLEAVE = 1, MESSAGE_REQ_EXEC_CPG_JOINLIST = 2, MESSAGE_REQ_EXEC_CPG_MCAST = 3, MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4, MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5, MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST = 6, }; struct zcb_mapped { struct list_head list; void *addr; size_t size; }; /* * state` exec deliver * match group name, pid -> if matched deliver for YES: * XXX indicates impossible state * * join leave mcast * UNJOINED XXX XXX NO * LEAVE_STARTED XXX YES(unjoined_enter) YES * JOIN_STARTED YES(join_started_enter) XXX NO * JOIN_COMPLETED XXX NO YES * * join_started_enter * set JOIN_COMPLETED * add entry to process_info list * unjoined_enter * set UNJOINED * delete entry from process_info list * * * library accept join error codes * UNJOINED YES(CS_OK) set JOIN_STARTED * LEAVE_STARTED NO(CS_ERR_BUSY) * JOIN_STARTED NO(CS_ERR_EXIST) * JOIN_COMPlETED NO(CS_ERR_EXIST) * * library accept leave error codes * UNJOINED NO(CS_ERR_NOT_EXIST) * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) * JOIN_STARTED NO(CS_ERR_BUSY) * JOIN_COMPLETED YES(CS_OK) set LEAVE_STARTED * * library accept mcast * UNJOINED NO(CS_ERR_NOT_EXIST) * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) * JOIN_STARTED YES(CS_OK) * JOIN_COMPLETED YES(CS_OK) */ enum cpd_state { CPD_STATE_UNJOINED, CPD_STATE_LEAVE_STARTED, CPD_STATE_JOIN_STARTED, CPD_STATE_JOIN_COMPLETED }; enum cpg_sync_state { CPGSYNC_DOWNLIST, CPGSYNC_JOINLIST }; enum cpg_downlist_state_e { CPG_DOWNLIST_NONE, CPG_DOWNLIST_WAITING_FOR_MESSAGES, CPG_DOWNLIST_APPLYING, }; static enum cpg_downlist_state_e downlist_state; static struct list_head downlist_messages_head; static struct list_head joinlist_messages_head; struct cpg_pd { void *conn; mar_cpg_name_t group_name; uint32_t pid; enum cpd_state cpd_state; unsigned int flags; int initial_totem_conf_sent; uint64_t transition_counter; /* These two are used when sending fragmented messages */ uint64_t initial_transition_counter; struct list_head list; struct list_head iteration_instance_list_head; struct list_head zcb_mapped_list_head; }; struct cpg_iteration_instance { hdb_handle_t handle; struct list_head list; struct list_head items_list_head; /* List of process_info */ struct list_head *current_pointer; }; DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL); DECLARE_LIST_INIT(cpg_pd_list_head); static unsigned int my_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_member_list_entries; static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_old_member_list_entries = 0; static struct corosync_api_v1 *api = NULL; static enum cpg_sync_state my_sync_state = CPGSYNC_DOWNLIST; static mar_cpg_ring_id_t last_sync_ring_id; struct process_info { unsigned int nodeid; uint32_t pid; mar_cpg_name_t group; struct list_head list; /* on the group_info members list */ }; DECLARE_LIST_INIT(process_info_list_head); struct join_list_entry { uint32_t pid; mar_cpg_name_t group_name; }; /* * Service Interfaces required by service_message_handler struct */ static char *cpg_exec_init_fn (struct corosync_api_v1 *); static int cpg_lib_init_fn (void *conn); static int cpg_lib_exit_fn (void *conn); static void message_handler_req_exec_cpg_procjoin ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_procleave ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_joinlist ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_mcast ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_partial_mcast ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_downlist_old ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_downlist ( const void *message, unsigned int nodeid); static void exec_cpg_procjoin_endian_convert (void *msg); static void exec_cpg_joinlist_endian_convert (void *msg); static void exec_cpg_mcast_endian_convert (void *msg); static void exec_cpg_partial_mcast_endian_convert (void *msg); static void exec_cpg_downlist_endian_convert_old (void *msg); static void exec_cpg_downlist_endian_convert (void *msg); static void message_handler_req_lib_cpg_join (void *conn, const void *message); static void message_handler_req_lib_cpg_leave (void *conn, const void *message); static void message_handler_req_lib_cpg_finalize (void *conn, const void *message); static void message_handler_req_lib_cpg_mcast (void *conn, const void *message); static void message_handler_req_lib_cpg_partial_mcast (void *conn, const void *message); static void message_handler_req_lib_cpg_membership (void *conn, const void *message); static void message_handler_req_lib_cpg_local_get (void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_initialize ( void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_next ( void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_finalize ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_alloc ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_free ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_execute ( void *conn, const void *message); static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason); static int cpg_exec_send_downlist(void); static int cpg_exec_send_joinlist(void); static void downlist_messages_delete (void); static void downlist_master_choose_and_send (void); static void joinlist_inform_clients (void); static void joinlist_messages_delete (void); static void cpg_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id); static int cpg_sync_process (void); static void cpg_sync_activate (void); static void cpg_sync_abort (void); static void do_proc_join( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason); static void do_proc_leave( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason); static int notify_lib_totem_membership ( void *conn, int member_list_entries, const unsigned int *member_list); static inline int zcb_all_free ( struct cpg_pd *cpd); static char *cpg_print_group_name ( const mar_cpg_name_t *group); /* * Library Handler Definition */ static struct corosync_lib_handler cpg_lib_engine[] = { { /* 0 - MESSAGE_REQ_CPG_JOIN */ .lib_handler_fn = message_handler_req_lib_cpg_join, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 1 - MESSAGE_REQ_CPG_LEAVE */ .lib_handler_fn = message_handler_req_lib_cpg_leave, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 2 - MESSAGE_REQ_CPG_MCAST */ .lib_handler_fn = message_handler_req_lib_cpg_mcast, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 3 - MESSAGE_REQ_CPG_MEMBERSHIP */ .lib_handler_fn = message_handler_req_lib_cpg_membership, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 4 - MESSAGE_REQ_CPG_LOCAL_GET */ .lib_handler_fn = message_handler_req_lib_cpg_local_get, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 5 - MESSAGE_REQ_CPG_ITERATIONINITIALIZE */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_initialize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 - MESSAGE_REQ_CPG_ITERATIONNEXT */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_next, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 - MESSAGE_REQ_CPG_ITERATIONFINALIZE */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_finalize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 8 - MESSAGE_REQ_CPG_FINALIZE */ .lib_handler_fn = message_handler_req_lib_cpg_finalize, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 9 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_alloc, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 10 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_free, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 11 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_execute, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 12 */ .lib_handler_fn = message_handler_req_lib_cpg_partial_mcast, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, }; static struct corosync_exec_handler cpg_exec_engine[] = { { /* 0 - MESSAGE_REQ_EXEC_CPG_PROCJOIN */ .exec_handler_fn = message_handler_req_exec_cpg_procjoin, .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert }, { /* 1 - MESSAGE_REQ_EXEC_CPG_PROCLEAVE */ .exec_handler_fn = message_handler_req_exec_cpg_procleave, .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert }, { /* 2 - MESSAGE_REQ_EXEC_CPG_JOINLIST */ .exec_handler_fn = message_handler_req_exec_cpg_joinlist, .exec_endian_convert_fn = exec_cpg_joinlist_endian_convert }, { /* 3 - MESSAGE_REQ_EXEC_CPG_MCAST */ .exec_handler_fn = message_handler_req_exec_cpg_mcast, .exec_endian_convert_fn = exec_cpg_mcast_endian_convert }, { /* 4 - MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD */ .exec_handler_fn = message_handler_req_exec_cpg_downlist_old, .exec_endian_convert_fn = exec_cpg_downlist_endian_convert_old }, { /* 5 - MESSAGE_REQ_EXEC_CPG_DOWNLIST */ .exec_handler_fn = message_handler_req_exec_cpg_downlist, .exec_endian_convert_fn = exec_cpg_downlist_endian_convert }, { /* 6 - MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST */ .exec_handler_fn = message_handler_req_exec_cpg_partial_mcast, .exec_endian_convert_fn = exec_cpg_partial_mcast_endian_convert }, }; struct corosync_service_engine cpg_service_engine = { .name = "corosync cluster closed process group service v1.01", .id = CPG_SERVICE, .priority = 1, .private_data_size = sizeof (struct cpg_pd), .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = cpg_lib_init_fn, .lib_exit_fn = cpg_lib_exit_fn, .lib_engine = cpg_lib_engine, .lib_engine_count = sizeof (cpg_lib_engine) / sizeof (struct corosync_lib_handler), .exec_init_fn = cpg_exec_init_fn, .exec_dump_fn = NULL, .exec_engine = cpg_exec_engine, .exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler), .sync_init = cpg_sync_init, .sync_process = cpg_sync_process, .sync_activate = cpg_sync_activate, .sync_abort = cpg_sync_abort }; struct corosync_service_engine *cpg_get_service_engine_ver0 (void) { return (&cpg_service_engine); } struct req_exec_cpg_procjoin { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_uint32_t reason __attribute__((aligned(8))); }; struct req_exec_cpg_mcast { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t msglen __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_message_source_t source __attribute__((aligned(8))); mar_uint8_t message[] __attribute__((aligned(8))); }; struct req_exec_cpg_partial_mcast { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t msglen __attribute__((aligned(8))); mar_uint32_t fraglen __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_uint32_t type __attribute__((aligned(8))); mar_message_source_t source __attribute__((aligned(8))); mar_uint8_t message[] __attribute__((aligned(8))); }; struct req_exec_cpg_downlist_old { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); }; struct req_exec_cpg_downlist { struct qb_ipc_request_header header __attribute__((aligned(8))); /* merge decisions */ mar_uint32_t old_members __attribute__((aligned(8))); /* downlist below */ mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); }; struct downlist_msg { mar_uint32_t sender_nodeid; mar_uint32_t old_members __attribute__((aligned(8))); mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); struct list_head list; }; struct joinlist_msg { mar_uint32_t sender_nodeid; uint32_t pid; mar_cpg_name_t group_name; struct list_head list; }; static struct req_exec_cpg_downlist g_req_exec_cpg_downlist; /* * Function print group name. It's not reentrant */ static char *cpg_print_group_name(const mar_cpg_name_t *group) { static char res[CPG_MAX_NAME_LENGTH * 4 + 1]; int dest_pos = 0; char c; int i; for (i = 0; i < group->length; i++) { c = group->value[i]; if (c >= ' ' && c < 0x7f && c != '\\') { res[dest_pos++] = c; } else { if (c == '\\') { res[dest_pos++] = '\\'; res[dest_pos++] = '\\'; } else { snprintf(res + dest_pos, sizeof(res) - dest_pos, "\\x%02X", c); dest_pos += 4; } } } res[dest_pos] = 0; return (res); } static void cpg_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { int entries; int i, j; int found; my_sync_state = CPGSYNC_DOWNLIST; memcpy (my_member_list, member_list, member_list_entries * sizeof (unsigned int)); my_member_list_entries = member_list_entries; last_sync_ring_id.nodeid = ring_id->rep.nodeid; last_sync_ring_id.seq = ring_id->seq; downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES; entries = 0; /* * Determine list of nodeids for downlist message */ for (i = 0; i < my_old_member_list_entries; i++) { found = 0; for (j = 0; j < trans_list_entries; j++) { if (my_old_member_list[i] == trans_list[j]) { found = 1; break; } } if (found == 0) { g_req_exec_cpg_downlist.nodeids[entries++] = my_old_member_list[i]; } } g_req_exec_cpg_downlist.left_nodes = entries; } static int cpg_sync_process (void) { int res = -1; if (my_sync_state == CPGSYNC_DOWNLIST) { res = cpg_exec_send_downlist(); if (res == -1) { return (-1); } my_sync_state = CPGSYNC_JOINLIST; } if (my_sync_state == CPGSYNC_JOINLIST) { res = cpg_exec_send_joinlist(); } return (res); } static void cpg_sync_activate (void) { memcpy (my_old_member_list, my_member_list, my_member_list_entries * sizeof (unsigned int)); my_old_member_list_entries = my_member_list_entries; if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) { downlist_master_choose_and_send (); } joinlist_inform_clients (); downlist_messages_delete (); downlist_state = CPG_DOWNLIST_NONE; joinlist_messages_delete (); notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); } static void cpg_sync_abort (void) { downlist_state = CPG_DOWNLIST_NONE; downlist_messages_delete (); joinlist_messages_delete (); } static int notify_lib_totem_membership ( void *conn, int member_list_entries, const unsigned int *member_list) { struct list_head *iter; char *buf; int size; struct res_lib_cpg_totem_confchg_callback *res; size = sizeof(struct res_lib_cpg_totem_confchg_callback) + sizeof(mar_uint32_t) * (member_list_entries); buf = alloca(size); if (!buf) return CS_ERR_LIBRARY; res = (struct res_lib_cpg_totem_confchg_callback *)buf; res->member_list_entries = member_list_entries; res->header.size = size; res->header.id = MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK; res->header.error = CS_OK; memcpy (&res->ring_id, &last_sync_ring_id, sizeof (mar_cpg_ring_id_t)); memcpy (res->member_list, member_list, res->member_list_entries * sizeof (mar_uint32_t)); if (conn == NULL) { for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpg_pd = list_entry (iter, struct cpg_pd, list); api->ipc_dispatch_send (cpg_pd->conn, buf, size); } } else { api->ipc_dispatch_send (conn, buf, size); } return CS_OK; } static int notify_lib_joinlist( const mar_cpg_name_t *group_name, void *conn, int joined_list_entries, mar_cpg_address_t *joined_list, int left_list_entries, mar_cpg_address_t *left_list, int id) { int size; char *buf; struct list_head *iter; int count; struct res_lib_cpg_confchg_callback *res; mar_cpg_address_t *retgi; count = 0; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, group_name) == 0) { int i; int founded = 0; for (i = 0; i < left_list_entries; i++) { if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { founded++; } } if (!founded) count++; } } size = sizeof(struct res_lib_cpg_confchg_callback) + sizeof(mar_cpg_address_t) * (count + left_list_entries + joined_list_entries); buf = alloca(size); if (!buf) return CS_ERR_LIBRARY; res = (struct res_lib_cpg_confchg_callback *)buf; res->joined_list_entries = joined_list_entries; res->left_list_entries = left_list_entries; res->member_list_entries = count; retgi = res->member_list; res->header.size = size; res->header.id = id; res->header.error = CS_OK; memcpy(&res->group_name, group_name, sizeof(mar_cpg_name_t)); for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi=list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, group_name) == 0) { int i; int founded = 0; for (i = 0;i < left_list_entries; i++) { if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { founded++; } } if (!founded) { retgi->nodeid = pi->nodeid; retgi->pid = pi->pid; retgi++; } } } if (left_list_entries) { memcpy (retgi, left_list, left_list_entries * sizeof(mar_cpg_address_t)); retgi += left_list_entries; } if (joined_list_entries) { memcpy (retgi, joined_list, joined_list_entries * sizeof(mar_cpg_address_t)); retgi += joined_list_entries; } if (conn) { api->ipc_dispatch_send (conn, buf, size); } else { for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); if (mar_name_compare (&cpd->group_name, group_name) == 0) { assert (joined_list_entries <= 1); if (joined_list_entries) { if (joined_list[0].pid == cpd->pid && joined_list[0].nodeid == api->totem_nodeid_get()) { cpd->cpd_state = CPD_STATE_JOIN_COMPLETED; } } if (cpd->cpd_state == CPD_STATE_JOIN_COMPLETED || cpd->cpd_state == CPD_STATE_LEAVE_STARTED) { api->ipc_dispatch_send (cpd->conn, buf, size); cpd->transition_counter++; } if (left_list_entries) { if (left_list[0].pid == cpd->pid && left_list[0].nodeid == api->totem_nodeid_get() && left_list[0].reason == CONFCHG_CPG_REASON_LEAVE) { cpd->pid = 0; memset (&cpd->group_name, 0, sizeof(cpd->group_name)); cpd->cpd_state = CPD_STATE_UNJOINED; } } } } } /* * Traverse thru cpds and send totem membership for cpd, where it is not send yet */ for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); if ((cpd->flags & CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF) && (cpd->initial_totem_conf_sent == 0)) { cpd->initial_totem_conf_sent = 1; notify_lib_totem_membership (cpd->conn, my_old_member_list_entries, my_old_member_list); } } return CS_OK; } static void downlist_log(const char *msg, struct downlist_msg* dl) { log_printf (LOG_DEBUG, "%s: sender %s; members(old:%d left:%d)", msg, api->totem_ifaces_print(dl->sender_nodeid), dl->old_members, dl->left_nodes); } static struct downlist_msg* downlist_master_choose (void) { struct downlist_msg *cmp; struct downlist_msg *best = NULL; struct list_head *iter; uint32_t cmp_members; uint32_t best_members; uint32_t i; int ignore_msg; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter->next) { cmp = list_entry(iter, struct downlist_msg, list); downlist_log("comparing", cmp); ignore_msg = 0; for (i = 0; i < cmp->left_nodes; i++) { if (cmp->nodeids[i] == api->totem_nodeid_get()) { log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n"); ignore_msg = 1; break; } } if (ignore_msg) { continue ; } if (best == NULL) { best = cmp; continue; } best_members = best->old_members - best->left_nodes; cmp_members = cmp->old_members - cmp->left_nodes; if (cmp_members > best_members) { best = cmp; } else if (cmp_members == best_members) { if (cmp->old_members > best->old_members) { best = cmp; } else if (cmp->old_members == best->old_members) { if (cmp->sender_nodeid < best->sender_nodeid) { best = cmp; } } } } assert (best != NULL); return best; } static void downlist_master_choose_and_send (void) { struct downlist_msg *stored_msg; struct list_head *iter; struct process_info *left_pi; qb_map_t *group_map; struct cpg_name cpg_group; mar_cpg_name_t group; struct confchg_data{ struct cpg_name cpg_group; mar_cpg_address_t left_list[CPG_MEMBERS_MAX]; int left_list_entries; struct list_head list; } *pcd; qb_map_iter_t *miter; int i, size; downlist_state = CPG_DOWNLIST_APPLYING; stored_msg = downlist_master_choose (); if (!stored_msg) { log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist"); return; } downlist_log("chosen downlist", stored_msg); group_map = qb_skiplist_create(); /* * only the cpg groups included in left nodes should receive * confchg event, so we will collect these cpg groups and * relative left_lists here. */ for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { struct process_info *pi = list_entry(iter, struct process_info, list); iter = iter->next; left_pi = NULL; for (i = 0; i < stored_msg->left_nodes; i++) { if (pi->nodeid == stored_msg->nodeids[i]) { left_pi = pi; break; } } if (left_pi) { marshall_from_mar_cpg_name_t(&cpg_group, &left_pi->group); cpg_group.value[cpg_group.length] = 0; pcd = (struct confchg_data *)qb_map_get(group_map, cpg_group.value); if (pcd == NULL) { pcd = (struct confchg_data *)calloc(1, sizeof(struct confchg_data)); memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name)); qb_map_put(group_map, pcd->cpg_group.value, pcd); } size = pcd->left_list_entries; pcd->left_list[size].nodeid = left_pi->nodeid; pcd->left_list[size].pid = left_pi->pid; pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN; pcd->left_list_entries++; list_del (&left_pi->list); free (left_pi); } } /* send only one confchg event per cpg group */ miter = qb_map_iter_create(group_map); while (qb_map_iter_next(miter, (void **)&pcd)) { marshall_to_mar_cpg_name_t(&group, &pcd->cpg_group); log_printf (LOG_DEBUG, "left_list_entries:%d", pcd->left_list_entries); for (i=0; ileft_list_entries; i++) { log_printf (LOG_DEBUG, "left_list[%d] group:%s, ip:%s, pid:%d", i, cpg_print_group_name(&group), (char*)api->totem_ifaces_print(pcd->left_list[i].nodeid), pcd->left_list[i].pid); } /* send confchg event */ notify_lib_joinlist(&group, NULL, 0, NULL, pcd->left_list_entries, pcd->left_list, MESSAGE_RES_CPG_CONFCHG_CALLBACK); free(pcd); } qb_map_iter_free(miter); qb_map_destroy(group_map); } /* * Remove processes that might have left the group while we were suspended. */ static void joinlist_remove_zombie_pi_entries (void) { struct list_head *pi_iter; struct list_head *jl_iter; struct process_info *pi; struct joinlist_msg *stored_msg; int found; for (pi_iter = process_info_list_head.next; pi_iter != &process_info_list_head; ) { pi = list_entry (pi_iter, struct process_info, list); pi_iter = pi_iter->next; /* * Ignore local node */ if (pi->nodeid == api->totem_nodeid_get()) { continue ; } /* * Try to find message in joinlist messages */ found = 0; for (jl_iter = joinlist_messages_head.next; jl_iter != &joinlist_messages_head; jl_iter = jl_iter->next) { stored_msg = list_entry(jl_iter, struct joinlist_msg, list); if (stored_msg->sender_nodeid == api->totem_nodeid_get()) { continue ; } if (pi->nodeid == stored_msg->sender_nodeid && pi->pid == stored_msg->pid && mar_name_compare (&pi->group, &stored_msg->group_name) == 0) { found = 1; break ; } } if (!found) { do_proc_leave(&pi->group, pi->pid, pi->nodeid, CONFCHG_CPG_REASON_PROCDOWN); } } } static void joinlist_inform_clients (void) { struct joinlist_msg *stored_msg; struct list_head *iter; unsigned int i; i = 0; for (iter = joinlist_messages_head.next; iter != &joinlist_messages_head; iter = iter->next) { stored_msg = list_entry(iter, struct joinlist_msg, list); log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%s, ip:%s, pid:%d", i++, cpg_print_group_name(&stored_msg->group_name), (char*)api->totem_ifaces_print(stored_msg->sender_nodeid), stored_msg->pid); /* Ignore our own messages */ if (stored_msg->sender_nodeid == api->totem_nodeid_get()) { continue ; } do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid, CONFCHG_CPG_REASON_NODEUP); } joinlist_remove_zombie_pi_entries (); } static void downlist_messages_delete (void) { struct downlist_msg *stored_msg; struct list_head *iter, *iter_next; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter_next) { iter_next = iter->next; stored_msg = list_entry(iter, struct downlist_msg, list); list_del (&stored_msg->list); free (stored_msg); } } static void joinlist_messages_delete (void) { struct joinlist_msg *stored_msg; struct list_head *iter, *iter_next; for (iter = joinlist_messages_head.next; iter != &joinlist_messages_head; iter = iter_next) { iter_next = iter->next; stored_msg = list_entry(iter, struct joinlist_msg, list); list_del (&stored_msg->list); free (stored_msg); } list_init (&joinlist_messages_head); } static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) { list_init (&downlist_messages_head); list_init (&joinlist_messages_head); api = corosync_api; return (NULL); } static void cpg_iteration_instance_finalize (struct cpg_iteration_instance *cpg_iteration_instance) { struct list_head *iter, *iter_next; struct process_info *pi; for (iter = cpg_iteration_instance->items_list_head.next; iter != &cpg_iteration_instance->items_list_head; iter = iter_next) { iter_next = iter->next; pi = list_entry (iter, struct process_info, list); list_del (&pi->list); free (pi); } list_del (&cpg_iteration_instance->list); hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); } static void cpg_pd_finalize (struct cpg_pd *cpd) { struct list_head *iter, *iter_next; struct cpg_iteration_instance *cpii; zcb_all_free(cpd); for (iter = cpd->iteration_instance_list_head.next; iter != &cpd->iteration_instance_list_head; iter = iter_next) { iter_next = iter->next; cpii = list_entry (iter, struct cpg_iteration_instance, list); cpg_iteration_instance_finalize (cpii); } list_del (&cpd->list); } static int cpg_lib_exit_fn (void *conn) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p", conn); if (cpd->group_name.length > 0 && cpd->cpd_state != CPD_STATE_LEAVE_STARTED) { cpg_node_joinleave_send (cpd->pid, &cpd->group_name, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN); } cpg_pd_finalize (cpd); api->ipc_refcnt_dec (conn); return (0); } static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason) { struct req_exec_cpg_procjoin req_exec_cpg_procjoin; struct iovec req_exec_cpg_iovec; int result; memcpy(&req_exec_cpg_procjoin.group_name, group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_procjoin.pid = pid; req_exec_cpg_procjoin.reason = reason; req_exec_cpg_procjoin.header.size = sizeof(req_exec_cpg_procjoin); req_exec_cpg_procjoin.header.id = SERVICE_ID_MAKE(CPG_SERVICE, fn); req_exec_cpg_iovec.iov_base = (char *)&req_exec_cpg_procjoin; req_exec_cpg_iovec.iov_len = sizeof(req_exec_cpg_procjoin); result = api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED); return (result); } /* Can byteswap join & leave messages */ static void exec_cpg_procjoin_endian_convert (void *msg) { struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = msg; req_exec_cpg_procjoin->pid = swab32(req_exec_cpg_procjoin->pid); swab_mar_cpg_name_t (&req_exec_cpg_procjoin->group_name); req_exec_cpg_procjoin->reason = swab32(req_exec_cpg_procjoin->reason); } static void exec_cpg_joinlist_endian_convert (void *msg_v) { char *msg = msg_v; struct qb_ipc_response_header *res = (struct qb_ipc_response_header *)msg; struct join_list_entry *jle = (struct join_list_entry *)(msg + sizeof(struct qb_ipc_response_header)); swab_mar_int32_t (&res->size); while ((const char*)jle < msg + res->size) { jle->pid = swab32(jle->pid); swab_mar_cpg_name_t (&jle->group_name); jle++; } } static void exec_cpg_downlist_endian_convert_old (void *msg) { } static void exec_cpg_downlist_endian_convert (void *msg) { struct req_exec_cpg_downlist *req_exec_cpg_downlist = msg; unsigned int i; req_exec_cpg_downlist->left_nodes = swab32(req_exec_cpg_downlist->left_nodes); req_exec_cpg_downlist->old_members = swab32(req_exec_cpg_downlist->old_members); for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) { req_exec_cpg_downlist->nodeids[i] = swab32(req_exec_cpg_downlist->nodeids[i]); } } static void exec_cpg_mcast_endian_convert (void *msg) { struct req_exec_cpg_mcast *req_exec_cpg_mcast = msg; swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); swab_mar_message_source_t (&req_exec_cpg_mcast->source); } static void exec_cpg_partial_mcast_endian_convert (void *msg) { struct req_exec_cpg_partial_mcast *req_exec_cpg_mcast = msg; swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); req_exec_cpg_mcast->fraglen = swab32(req_exec_cpg_mcast->fraglen); req_exec_cpg_mcast->type = swab32(req_exec_cpg_mcast->type); swab_mar_message_source_t (&req_exec_cpg_mcast->source); } static struct process_info *process_info_find(const mar_cpg_name_t *group_name, uint32_t pid, unsigned int nodeid) { struct list_head *iter; for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { struct process_info *pi = list_entry (iter, struct process_info, list); iter = iter->next; if (pi->pid == pid && pi->nodeid == nodeid && mar_name_compare (&pi->group, group_name) == 0) { return pi; } } return NULL; } static void do_proc_join( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason) { struct process_info *pi; struct process_info *pi_entry; mar_cpg_address_t notify_info; struct list_head *list; struct list_head *list_to_add = NULL; if (process_info_find (name, pid, nodeid) != NULL) { return ; } pi = malloc (sizeof (struct process_info)); if (!pi) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); return; } pi->nodeid = nodeid; pi->pid = pid; memcpy(&pi->group, name, sizeof(*name)); list_init(&pi->list); /* * Insert new process in sorted order so synchronization works properly */ list_to_add = &process_info_list_head; for (list = process_info_list_head.next; list != &process_info_list_head; list = list->next) { pi_entry = list_entry(list, struct process_info, list); if (pi_entry->nodeid > pi->nodeid || (pi_entry->nodeid == pi->nodeid && pi_entry->pid > pi->pid)) { break; } list_to_add = list; } list_add (&pi->list, list_to_add); notify_info.pid = pi->pid; notify_info.nodeid = nodeid; notify_info.reason = reason; notify_lib_joinlist(&pi->group, NULL, 1, ¬ify_info, 0, NULL, MESSAGE_RES_CPG_CONFCHG_CALLBACK); } static void do_proc_leave( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason) { struct process_info *pi; struct list_head *iter; mar_cpg_address_t notify_info; notify_info.pid = pid; notify_info.nodeid = nodeid; notify_info.reason = reason; notify_lib_joinlist(name, NULL, 0, NULL, 1, ¬ify_info, MESSAGE_RES_CPG_CONFCHG_CALLBACK); for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { pi = list_entry(iter, struct process_info, list); iter = iter->next; if (pi->pid == pid && pi->nodeid == nodeid && mar_name_compare (&pi->group, name)==0) { list_del (&pi->list); free (pi); } } } static void message_handler_req_exec_cpg_downlist_old ( const void *message, unsigned int nodeid) { log_printf (LOGSYS_LEVEL_WARNING, "downlist OLD from node 0x%x", nodeid); } static void message_handler_req_exec_cpg_downlist( const void *message, unsigned int nodeid) { const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message; int i; struct list_head *iter; struct downlist_msg *stored_msg; int found; if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) { log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d", req_exec_cpg_downlist->left_nodes, downlist_state); return; } stored_msg = malloc (sizeof (struct downlist_msg)); stored_msg->sender_nodeid = nodeid; stored_msg->old_members = req_exec_cpg_downlist->old_members; stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes; memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids, req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t)); list_init (&stored_msg->list); list_add (&stored_msg->list, &downlist_messages_head); for (i = 0; i < my_member_list_entries; i++) { found = 0; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter->next) { stored_msg = list_entry(iter, struct downlist_msg, list); if (my_member_list[i] == stored_msg->sender_nodeid) { found = 1; } } if (!found) { return; } } downlist_master_choose_and_send (); } static void message_handler_req_exec_cpg_procjoin ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; log_printf(LOGSYS_LEVEL_DEBUG, "got procjoin message from cluster node 0x%x (%s) for pid %u", nodeid, api->totem_ifaces_print(nodeid), (unsigned int)req_exec_cpg_procjoin->pid); do_proc_join (&req_exec_cpg_procjoin->group_name, req_exec_cpg_procjoin->pid, nodeid, CONFCHG_CPG_REASON_JOIN); } static void message_handler_req_exec_cpg_procleave ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; log_printf(LOGSYS_LEVEL_DEBUG, "got procleave message from cluster node 0x%x (%s) for pid %u", nodeid, api->totem_ifaces_print(nodeid), (unsigned int)req_exec_cpg_procjoin->pid); do_proc_leave (&req_exec_cpg_procjoin->group_name, req_exec_cpg_procjoin->pid, nodeid, req_exec_cpg_procjoin->reason); } /* Got a proclist from another node */ static void message_handler_req_exec_cpg_joinlist ( const void *message_v, unsigned int nodeid) { const char *message = message_v; const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message; const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header)); struct joinlist_msg *stored_msg; log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node 0x%x", nodeid); while ((const char*)jle < message + res->size) { stored_msg = malloc (sizeof (struct joinlist_msg)); memset(stored_msg, 0, sizeof (struct joinlist_msg)); stored_msg->sender_nodeid = nodeid; stored_msg->pid = jle->pid; memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t)); list_init (&stored_msg->list); list_add (&stored_msg->list, &joinlist_messages_head); jle++; } } static void message_handler_req_exec_cpg_mcast ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_mcast *req_exec_cpg_mcast = message; struct res_lib_cpg_deliver_callback res_lib_cpg_mcast; int msglen = req_exec_cpg_mcast->msglen; struct list_head *iter, *pi_iter; struct cpg_pd *cpd; struct iovec iovec[2]; int known_node = 0; res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_DELIVER_CALLBACK; res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; res_lib_cpg_mcast.msglen = msglen; res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; res_lib_cpg_mcast.nodeid = nodeid; memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, sizeof(mar_cpg_name_t)); iovec[0].iov_base = (void *)&res_lib_cpg_mcast; iovec[0].iov_len = sizeof (res_lib_cpg_mcast); iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); iovec[1].iov_len = msglen; for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) { cpd = list_entry(iter, struct cpg_pd, list); iter = iter->next; if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { if (!known_node) { /* Try to find, if we know the node */ for (pi_iter = process_info_list_head.next; pi_iter != &process_info_list_head; pi_iter = pi_iter->next) { struct process_info *pi = list_entry (pi_iter, struct process_info, list); if (pi->nodeid == nodeid && mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { known_node = 1; break; } } } if (!known_node) { log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); return ; } api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); } } } static void message_handler_req_exec_cpg_partial_mcast ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_partial_mcast *req_exec_cpg_mcast = message; struct res_lib_cpg_partial_deliver_callback res_lib_cpg_mcast; int msglen = req_exec_cpg_mcast->fraglen; struct list_head *iter, *pi_iter; struct cpg_pd *cpd; struct iovec iovec[2]; int known_node = 0; log_printf(LOGSYS_LEVEL_DEBUG, "Got fragmented message from node %d, size = %d bytes\n", nodeid, msglen); res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_PARTIAL_DELIVER_CALLBACK; res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; res_lib_cpg_mcast.fraglen = msglen; res_lib_cpg_mcast.msglen = req_exec_cpg_mcast->msglen; res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; res_lib_cpg_mcast.type = req_exec_cpg_mcast->type; res_lib_cpg_mcast.nodeid = nodeid; memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, sizeof(mar_cpg_name_t)); iovec[0].iov_base = (void *)&res_lib_cpg_mcast; iovec[0].iov_len = sizeof (res_lib_cpg_mcast); iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); iovec[1].iov_len = msglen; for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) { cpd = list_entry(iter, struct cpg_pd, list); iter = iter->next; if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { if (!known_node) { /* Try to find, if we know the node */ for (pi_iter = process_info_list_head.next; pi_iter != &process_info_list_head; pi_iter = pi_iter->next) { struct process_info *pi = list_entry (pi_iter, struct process_info, list); if (pi->nodeid == nodeid && mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { known_node = 1; break; } } } if (!known_node) { log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); return ; } api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); } } } static int cpg_exec_send_downlist(void) { struct iovec iov; g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST); g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist); g_req_exec_cpg_downlist.old_members = my_old_member_list_entries; iov.iov_base = (void *)&g_req_exec_cpg_downlist; iov.iov_len = g_req_exec_cpg_downlist.header.size; return (api->totem_mcast (&iov, 1, TOTEM_AGREED)); } static int cpg_exec_send_joinlist(void) { int count = 0; struct list_head *iter; struct qb_ipc_response_header *res; char *buf; struct join_list_entry *jle; struct iovec req_exec_cpg_iovec; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get ()) { count++; } } /* Nothing to send */ if (!count) return 0; buf = alloca(sizeof(struct qb_ipc_response_header) + sizeof(struct join_list_entry) * count); if (!buf) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate joinlist buffer"); return -1; } jle = (struct join_list_entry *)(buf + sizeof(struct qb_ipc_response_header)); res = (struct qb_ipc_response_header *)buf; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get ()) { memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t)); jle->pid = pi->pid; jle++; } } res->id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_JOINLIST); res->size = sizeof(struct qb_ipc_response_header)+sizeof(struct join_list_entry) * count; req_exec_cpg_iovec.iov_base = buf; req_exec_cpg_iovec.iov_len = res->size; return (api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED)); } static int cpg_lib_init_fn (void *conn) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); memset (cpd, 0, sizeof(struct cpg_pd)); cpd->conn = conn; list_add (&cpd->list, &cpg_pd_list_head); list_init (&cpd->iteration_instance_list_head); list_init (&cpd->zcb_mapped_list_head); api->ipc_refcnt_inc (conn); log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p, cpd=%p", conn, cpd); return (0); } /* Join message from the library */ static void message_handler_req_lib_cpg_join (void *conn, const void *message) { const struct req_lib_cpg_join *req_lib_cpg_join = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct res_lib_cpg_join res_lib_cpg_join; cs_error_t error = CS_OK; struct list_head *iter; /* Test, if we don't have same pid and group name joined */ for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd_item = list_entry (iter, struct cpg_pd, list); if (cpd_item->pid == req_lib_cpg_join->pid && mar_name_compare(&req_lib_cpg_join->group_name, &cpd_item->group_name) == 0) { /* We have same pid and group name joined -> return error */ error = CS_ERR_EXIST; goto response_send; } } /* * Same check must be done in process info list, because there may be not yet delivered * leave of client. */ for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get () && pi->pid == req_lib_cpg_join->pid && mar_name_compare(&req_lib_cpg_join->group_name, &pi->group) == 0) { /* We have same pid and group name joined -> return error */ error = CS_ERR_TRY_AGAIN; goto response_send; } } if (req_lib_cpg_join->group_name.length > CPG_MAX_NAME_LENGTH) { error = CS_ERR_NAME_TOO_LONG; goto response_send; } switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_OK; cpd->cpd_state = CPD_STATE_JOIN_STARTED; cpd->pid = req_lib_cpg_join->pid; cpd->flags = req_lib_cpg_join->flags; memcpy (&cpd->group_name, &req_lib_cpg_join->group_name, sizeof (cpd->group_name)); cpg_node_joinleave_send (req_lib_cpg_join->pid, &req_lib_cpg_join->group_name, MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN); break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_BUSY; break; case CPD_STATE_JOIN_STARTED: error = CS_ERR_EXIST; break; case CPD_STATE_JOIN_COMPLETED: error = CS_ERR_EXIST; break; } response_send: res_lib_cpg_join.header.size = sizeof(res_lib_cpg_join); res_lib_cpg_join.header.id = MESSAGE_RES_CPG_JOIN; res_lib_cpg_join.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_join, sizeof(res_lib_cpg_join)); } /* Leave message from the library */ static void message_handler_req_lib_cpg_leave (void *conn, const void *message) { struct res_lib_cpg_leave res_lib_cpg_leave; cs_error_t error = CS_OK; struct req_lib_cpg_leave *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p", conn); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_ERR_BUSY; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; cpd->cpd_state = CPD_STATE_LEAVE_STARTED; cpg_node_joinleave_send (req_lib_cpg_leave->pid, &req_lib_cpg_leave->group_name, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_LEAVE); break; } /* send return */ res_lib_cpg_leave.header.size = sizeof(res_lib_cpg_leave); res_lib_cpg_leave.header.id = MESSAGE_RES_CPG_LEAVE; res_lib_cpg_leave.header.error = error; api->ipc_response_send(conn, &res_lib_cpg_leave, sizeof(res_lib_cpg_leave)); } /* Finalize message from library */ static void message_handler_req_lib_cpg_finalize ( void *conn, const void *message) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct res_lib_cpg_finalize res_lib_cpg_finalize; cs_error_t error = CS_OK; log_printf (LOGSYS_LEVEL_DEBUG, "cpg finalize for conn=%p", conn); /* * We will just remove cpd from list. After this call, connection will be * closed on lib side, and cpg_lib_exit_fn will be called */ list_del (&cpd->list); list_init (&cpd->list); res_lib_cpg_finalize.header.size = sizeof (res_lib_cpg_finalize); res_lib_cpg_finalize.header.id = MESSAGE_RES_CPG_FINALIZE; res_lib_cpg_finalize.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_finalize, sizeof (res_lib_cpg_finalize)); } static int memory_map ( const char *path, size_t bytes, void **buf) { int32_t fd; void *addr; int32_t res; fd = open (path, O_RDWR, 0600); unlink (path); if (fd == -1) { return (-1); } res = ftruncate (fd, bytes); if (res == -1) { goto error_close_unlink; } addr = mmap (NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { goto error_close_unlink; } #ifdef MADV_NOSYNC madvise(addr, bytes, MADV_NOSYNC); #endif res = close (fd); if (res) { + munmap (addr, bytes); return (-1); } *buf = addr; return (0); error_close_unlink: close (fd); unlink(path); return -1; } static inline int zcb_alloc ( struct cpg_pd *cpd, const char *path_to_file, size_t size, void **addr) { struct zcb_mapped *zcb_mapped; unsigned int res; zcb_mapped = malloc (sizeof (struct zcb_mapped)); if (zcb_mapped == NULL) { return (-1); } res = memory_map ( path_to_file, size, addr); if (res == -1) { free (zcb_mapped); return (-1); } list_init (&zcb_mapped->list); zcb_mapped->addr = *addr; zcb_mapped->size = size; list_add_tail (&zcb_mapped->list, &cpd->zcb_mapped_list_head); return (0); } static inline int zcb_free (struct zcb_mapped *zcb_mapped) { unsigned int res; res = munmap (zcb_mapped->addr, zcb_mapped->size); list_del (&zcb_mapped->list); free (zcb_mapped); return (res); } static inline int zcb_by_addr_free (struct cpg_pd *cpd, void *addr) { struct list_head *list; struct zcb_mapped *zcb_mapped; unsigned int res = 0; for (list = cpd->zcb_mapped_list_head.next; list != &cpd->zcb_mapped_list_head; list = list->next) { zcb_mapped = list_entry (list, struct zcb_mapped, list); if (zcb_mapped->addr == addr) { res = zcb_free (zcb_mapped); break; } } return (res); } static inline int zcb_all_free ( struct cpg_pd *cpd) { struct list_head *list; struct zcb_mapped *zcb_mapped; for (list = cpd->zcb_mapped_list_head.next; list != &cpd->zcb_mapped_list_head;) { zcb_mapped = list_entry (list, struct zcb_mapped, list); list = list->next; zcb_free (zcb_mapped); } return (0); } union u { uint64_t server_addr; void *server_ptr; }; static uint64_t void2serveraddr (void *server_ptr) { union u u; u.server_ptr = server_ptr; return (u.server_addr); } static void *serveraddr2void (uint64_t server_addr) { union u u; u.server_addr = server_addr; return (u.server_ptr); }; static void message_handler_req_lib_cpg_zc_alloc ( void *conn, const void *message) { mar_req_coroipcc_zc_alloc_t *hdr = (mar_req_coroipcc_zc_alloc_t *)message; struct qb_ipc_response_header res_header; void *addr = NULL; struct coroipcs_zc_header *zc_header; unsigned int res; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "path: %s", hdr->path_to_file); res = zcb_alloc (cpd, hdr->path_to_file, hdr->map_size, &addr); assert(res == 0); zc_header = (struct coroipcs_zc_header *)addr; zc_header->server_address = void2serveraddr(addr); res_header.size = sizeof (struct qb_ipc_response_header); res_header.id = 0; api->ipc_response_send (conn, &res_header, res_header.size); } static void message_handler_req_lib_cpg_zc_free ( void *conn, const void *message) { mar_req_coroipcc_zc_free_t *hdr = (mar_req_coroipcc_zc_free_t *)message; struct qb_ipc_response_header res_header; void *addr = NULL; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, " free'ing"); addr = serveraddr2void (hdr->server_address); zcb_by_addr_free (cpd, addr); res_header.size = sizeof (struct qb_ipc_response_header); res_header.id = 0; api->ipc_response_send ( conn, &res_header, res_header.size); } /* Fragmented mcast message from the library */ static void message_handler_req_lib_cpg_partial_mcast (void *conn, const void *message) { const struct req_lib_cpg_partial_mcast *req_lib_cpg_mcast = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); mar_cpg_name_t group_name = cpd->group_name; struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_partial_mcast req_exec_cpg_mcast; struct res_lib_cpg_partial_send res_lib_cpg_partial_send; int msglen = req_lib_cpg_mcast->fraglen; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_TRACE, "got fragmented mcast request on %p", conn); log_printf(LOGSYS_LEVEL_DEBUG, "Sending fragmented message size = %d bytes\n", msglen); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } res_lib_cpg_partial_send.header.size = sizeof(res_lib_cpg_partial_send); res_lib_cpg_partial_send.header.id = MESSAGE_RES_CPG_PARTIAL_SEND; if (req_lib_cpg_mcast->type == LIBCPG_PARTIAL_FIRST) { cpd->initial_transition_counter = cpd->transition_counter; } if (cpd->transition_counter != cpd->initial_transition_counter) { error = CS_ERR_INTERRUPT; } if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; req_exec_cpg_mcast.type = req_lib_cpg_mcast->type; req_exec_cpg_mcast.fraglen = req_lib_cpg_mcast->fraglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; req_exec_cpg_iovec[1].iov_len = msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); assert(result == 0); } else { log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d", conn, group_name.value, cpd->cpd_state, error); } res_lib_cpg_partial_send.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_partial_send, sizeof (res_lib_cpg_partial_send)); } /* Mcast message from the library */ static void message_handler_req_lib_cpg_mcast (void *conn, const void *message) { const struct req_lib_cpg_mcast *req_lib_cpg_mcast = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); mar_cpg_name_t group_name = cpd->group_name; struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_mcast req_exec_cpg_mcast; int msglen = req_lib_cpg_mcast->msglen; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_TRACE, "got mcast request on %p", conn); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = msglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; req_exec_cpg_iovec[1].iov_len = msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); assert(result == 0); } else { log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d", conn, group_name.value, cpd->cpd_state, error); } } static void message_handler_req_lib_cpg_zc_execute ( void *conn, const void *message) { mar_req_coroipcc_zc_execute_t *hdr = (mar_req_coroipcc_zc_execute_t *)message; struct qb_ipc_request_header *header; struct res_lib_cpg_mcast res_lib_cpg_mcast; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_mcast req_exec_cpg_mcast; struct req_lib_cpg_mcast *req_lib_cpg_mcast; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_TRACE, "got ZC mcast request on %p", conn); header = (struct qb_ipc_request_header *)(((char *)serveraddr2void(hdr->server_address) + sizeof (struct coroipcs_zc_header))); req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)header; switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast); res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_MCAST; if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + req_lib_cpg_mcast->msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &cpd->group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)header + sizeof(struct req_lib_cpg_mcast); req_exec_cpg_iovec[1].iov_len = req_exec_cpg_mcast.msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); if (result == 0) { res_lib_cpg_mcast.header.error = CS_OK; } else { res_lib_cpg_mcast.header.error = CS_ERR_TRY_AGAIN; } } else { res_lib_cpg_mcast.header.error = error; } api->ipc_response_send (conn, &res_lib_cpg_mcast, sizeof (res_lib_cpg_mcast)); } static void message_handler_req_lib_cpg_membership (void *conn, const void *message) { struct req_lib_cpg_membership_get *req_lib_cpg_membership_get = (struct req_lib_cpg_membership_get *)message; struct res_lib_cpg_membership_get res_lib_cpg_membership_get; struct list_head *iter; int member_count = 0; res_lib_cpg_membership_get.header.id = MESSAGE_RES_CPG_MEMBERSHIP; res_lib_cpg_membership_get.header.error = CS_OK; res_lib_cpg_membership_get.header.size = sizeof (struct res_lib_cpg_membership_get); for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, &req_lib_cpg_membership_get->group_name) == 0) { res_lib_cpg_membership_get.member_list[member_count].nodeid = pi->nodeid; res_lib_cpg_membership_get.member_list[member_count].pid = pi->pid; member_count += 1; } } res_lib_cpg_membership_get.member_count = member_count; api->ipc_response_send (conn, &res_lib_cpg_membership_get, sizeof (res_lib_cpg_membership_get)); } static void message_handler_req_lib_cpg_local_get (void *conn, const void *message) { struct res_lib_cpg_local_get res_lib_cpg_local_get; res_lib_cpg_local_get.header.size = sizeof (res_lib_cpg_local_get); res_lib_cpg_local_get.header.id = MESSAGE_RES_CPG_LOCAL_GET; res_lib_cpg_local_get.header.error = CS_OK; res_lib_cpg_local_get.local_nodeid = api->totem_nodeid_get (); api->ipc_response_send (conn, &res_lib_cpg_local_get, sizeof (res_lib_cpg_local_get)); } static void message_handler_req_lib_cpg_iteration_initialize ( void *conn, const void *message) { const struct req_lib_cpg_iterationinitialize *req_lib_cpg_iterationinitialize = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); hdb_handle_t cpg_iteration_handle = 0; struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize; struct list_head *iter, *iter2; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration initialize"); /* Because between calling this function and *next can be some operations which will * change list, we must do full copy. */ /* * Create new iteration instance */ res = hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance), &cpg_iteration_handle); if (res != 0) { error = CS_ERR_NO_MEMORY; goto response_send; } res = hdb_handle_get (&cpg_iteration_handle_t_db, cpg_iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_BAD_HANDLE; goto error_destroy; } list_init (&cpg_iteration_instance->items_list_head); cpg_iteration_instance->handle = cpg_iteration_handle; /* * Create copy of process_info list "grouped by" group name */ for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); struct process_info *new_pi; if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { /* * Try to find processed group name in our list new list */ int found = 0; for (iter2 = cpg_iteration_instance->items_list_head.next; iter2 != &cpg_iteration_instance->items_list_head; iter2 = iter2->next) { struct process_info *pi2 = list_entry (iter2, struct process_info, list); if (mar_name_compare (&pi2->group, &pi->group) == 0) { found = 1; break; } } if (found) { /* * We have this name in list -> don't add */ continue ; } } else if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_ONE_GROUP) { /* * Test pi group name with request */ if (mar_name_compare (&pi->group, &req_lib_cpg_iterationinitialize->group_name) != 0) /* * Not same -> don't add */ continue ; } new_pi = malloc (sizeof (struct process_info)); if (!new_pi) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); error = CS_ERR_NO_MEMORY; goto error_put_destroy; } memcpy (new_pi, pi, sizeof (struct process_info)); list_init (&new_pi->list); if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { /* * pid and nodeid -> undefined */ new_pi->pid = new_pi->nodeid = 0; } /* * We will return list "grouped" by "group name", so try to find right place to add */ for (iter2 = cpg_iteration_instance->items_list_head.next; iter2 != &cpg_iteration_instance->items_list_head; iter2 = iter2->next) { struct process_info *pi2 = list_entry (iter2, struct process_info, list); if (mar_name_compare (&pi2->group, &pi->group) == 0) { break; } } list_add (&new_pi->list, iter2); } /* * Now we have a full "grouped by" copy of process_info list */ /* * Add instance to current cpd list */ list_init (&cpg_iteration_instance->list); list_add (&cpg_iteration_instance->list, &cpd->iteration_instance_list_head); cpg_iteration_instance->current_pointer = &cpg_iteration_instance->items_list_head; error_put_destroy: hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_handle); error_destroy: if (error != CS_OK) { hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_handle); } response_send: res_lib_cpg_iterationinitialize.header.size = sizeof (res_lib_cpg_iterationinitialize); res_lib_cpg_iterationinitialize.header.id = MESSAGE_RES_CPG_ITERATIONINITIALIZE; res_lib_cpg_iterationinitialize.header.error = error; res_lib_cpg_iterationinitialize.iteration_handle = cpg_iteration_handle; api->ipc_response_send (conn, &res_lib_cpg_iterationinitialize, sizeof (res_lib_cpg_iterationinitialize)); } static void message_handler_req_lib_cpg_iteration_next ( void *conn, const void *message) { const struct req_lib_cpg_iterationnext *req_lib_cpg_iterationnext = message; struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; struct process_info *pi; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration next"); res = hdb_handle_get (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_LIBRARY; goto error_exit; } assert (cpg_iteration_instance); cpg_iteration_instance->current_pointer = cpg_iteration_instance->current_pointer->next; if (cpg_iteration_instance->current_pointer == &cpg_iteration_instance->items_list_head) { error = CS_ERR_NO_SECTIONS; goto error_put; } pi = list_entry (cpg_iteration_instance->current_pointer, struct process_info, list); /* * Copy iteration data */ res_lib_cpg_iterationnext.description.nodeid = pi->nodeid; res_lib_cpg_iterationnext.description.pid = pi->pid; memcpy (&res_lib_cpg_iterationnext.description.group, &pi->group, sizeof (mar_cpg_name_t)); error_put: hdb_handle_put (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle); error_exit: res_lib_cpg_iterationnext.header.size = sizeof (res_lib_cpg_iterationnext); res_lib_cpg_iterationnext.header.id = MESSAGE_RES_CPG_ITERATIONNEXT; res_lib_cpg_iterationnext.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_iterationnext, sizeof (res_lib_cpg_iterationnext)); } static void message_handler_req_lib_cpg_iteration_finalize ( void *conn, const void *message) { const struct req_lib_cpg_iterationfinalize *req_lib_cpg_iterationfinalize = message; struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration finalize"); res = hdb_handle_get (&cpg_iteration_handle_t_db, req_lib_cpg_iterationfinalize->iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_LIBRARY; goto error_exit; } assert (cpg_iteration_instance); cpg_iteration_instance_finalize (cpg_iteration_instance); hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); error_exit: res_lib_cpg_iterationfinalize.header.size = sizeof (res_lib_cpg_iterationfinalize); res_lib_cpg_iterationfinalize.header.id = MESSAGE_RES_CPG_ITERATIONFINALIZE; res_lib_cpg_iterationfinalize.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_iterationfinalize, sizeof (res_lib_cpg_iterationfinalize)); } diff --git a/exec/totemconfig.c b/exec/totemconfig.c index 070edbe4..84a828db 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -1,1653 +1,1660 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. * Copyright (c) 2006-2013 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" #include "totemconfig.h" #define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4 #define TOKEN_TIMEOUT 1000 #define TOKEN_COEFFICIENT 650 #define JOIN_TIMEOUT 50 #define MERGE_TIMEOUT 200 #define DOWNCHECK_TIMEOUT 1000 #define FAIL_TO_RECV_CONST 2500 #define SEQNO_UNCHANGED_CONST 30 #define MINIMUM_TIMEOUT (int)(1000/HZ)*3 #define MAX_NETWORK_DELAY 50 #define WINDOW_SIZE 50 #define MAX_MESSAGES 17 #define MISS_COUNT_CONST 5 #define RRP_PROBLEM_COUNT_TIMEOUT 2000 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN 2 #define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000 #define DEFAULT_PORT 5405 static char error_string_response[512]; static void add_totem_config_notification(struct totem_config *totem_config); /* All the volatile parameters are uint32s, luckily */ static uint32_t *totem_get_param_by_name(struct totem_config *totem_config, const char *param_name) { if (strcmp(param_name, "totem.token") == 0) return &totem_config->token_timeout; if (strcmp(param_name, "totem.token_retransmit") == 0) return &totem_config->token_retransmit_timeout; if (strcmp(param_name, "totem.hold") == 0) return &totem_config->token_hold_timeout; if (strcmp(param_name, "totem.token_retransmits_before_loss_const") == 0) return &totem_config->token_retransmits_before_loss_const; if (strcmp(param_name, "totem.join") == 0) return &totem_config->join_timeout; if (strcmp(param_name, "totem.send_join") == 0) return &totem_config->send_join_timeout; if (strcmp(param_name, "totem.consensus") == 0) return &totem_config->consensus_timeout; if (strcmp(param_name, "totem.merge") == 0) return &totem_config->merge_timeout; if (strcmp(param_name, "totem.downcheck") == 0) return &totem_config->downcheck_timeout; if (strcmp(param_name, "totem.fail_recv_const") == 0) return &totem_config->fail_to_recv_const; if (strcmp(param_name, "totem.seqno_unchanged_const") == 0) return &totem_config->seqno_unchanged_const; if (strcmp(param_name, "totem.rrp_token_expired_timeout") == 0) return &totem_config->rrp_token_expired_timeout; if (strcmp(param_name, "totem.rrp_problem_count_timeout") == 0) return &totem_config->rrp_problem_count_timeout; if (strcmp(param_name, "totem.rrp_problem_count_threshold") == 0) return &totem_config->rrp_problem_count_threshold; if (strcmp(param_name, "totem.rrp_problem_count_mcast_threshold") == 0) return &totem_config->rrp_problem_count_mcast_threshold; if (strcmp(param_name, "totem.rrp_autorecovery_check_timeout") == 0) return &totem_config->rrp_autorecovery_check_timeout; if (strcmp(param_name, "totem.heartbeat_failures_allowed") == 0) return &totem_config->heartbeat_failures_allowed; if (strcmp(param_name, "totem.max_network_delay") == 0) return &totem_config->max_network_delay; if (strcmp(param_name, "totem.window_size") == 0) return &totem_config->window_size; if (strcmp(param_name, "totem.max_messages") == 0) return &totem_config->max_messages; if (strcmp(param_name, "totem.miss_count_const") == 0) return &totem_config->miss_count_const; return NULL; } /* * Read key_name from icmap. If key is not found or key_name == delete_key or if allow_zero is false * and readed value is zero, default value is used and stored into totem_config. */ static void totem_volatile_config_set_value (struct totem_config *totem_config, const char *key_name, const char *deleted_key, unsigned int default_value, int allow_zero_value) { char runtime_key_name[ICMAP_KEYNAME_MAXLEN]; if (icmap_get_uint32(key_name, totem_get_param_by_name(totem_config, key_name)) != CS_OK || (deleted_key != NULL && strcmp(deleted_key, key_name) == 0) || (!allow_zero_value && *totem_get_param_by_name(totem_config, key_name) == 0)) { *totem_get_param_by_name(totem_config, key_name) = default_value; } /* * Store totem_config value to cmap runtime section */ + if (strlen("runtime.config.") + strlen(key_name) >= ICMAP_KEYNAME_MAXLEN) { + /* + * This shouldn't happen + */ + return ; + } + strcpy(runtime_key_name, "runtime.config."); strcat(runtime_key_name, key_name); icmap_set_uint32(runtime_key_name, *totem_get_param_by_name(totem_config, key_name)); } /* * Read and validate config values from cmap and store them into totem_config. If key doesn't exists, * default value is stored. deleted_key is name of key beeing processed by delete operation * from cmap. It is considered as non existing even if it can be read. Can be NULL. */ static void totem_volatile_config_read (struct totem_config *totem_config, const char *deleted_key) { uint32_t u32; totem_volatile_config_set_value(totem_config, "totem.token_retransmits_before_loss_const", deleted_key, TOKEN_RETRANSMITS_BEFORE_LOSS_CONST, 0); totem_volatile_config_set_value(totem_config, "totem.token", deleted_key, TOKEN_TIMEOUT, 0); if (totem_config->interface_count > 0 && totem_config->interfaces[0].member_count > 2) { u32 = TOKEN_COEFFICIENT; icmap_get_uint32("totem.token_coefficient", &u32); totem_config->token_timeout += (totem_config->interfaces[0].member_count - 2) * u32; /* * Store totem_config value to cmap runtime section */ icmap_set_uint32("runtime.config.totem.token", totem_config->token_timeout); } totem_volatile_config_set_value(totem_config, "totem.max_network_delay", deleted_key, MAX_NETWORK_DELAY, 0); totem_volatile_config_set_value(totem_config, "totem.window_size", deleted_key, WINDOW_SIZE, 0); totem_volatile_config_set_value(totem_config, "totem.max_messages", deleted_key, MAX_MESSAGES, 0); totem_volatile_config_set_value(totem_config, "totem.miss_count_const", deleted_key, MISS_COUNT_CONST, 0); totem_volatile_config_set_value(totem_config, "totem.token_retransmit", deleted_key, (int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)), 0); totem_volatile_config_set_value(totem_config, "totem.hold", deleted_key, (int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)), 0); totem_volatile_config_set_value(totem_config, "totem.join", deleted_key, JOIN_TIMEOUT, 0); totem_volatile_config_set_value(totem_config, "totem.consensus", deleted_key, (int)(float)(1.2 * totem_config->token_timeout), 0); totem_volatile_config_set_value(totem_config, "totem.merge", deleted_key, MERGE_TIMEOUT, 0); totem_volatile_config_set_value(totem_config, "totem.downcheck", deleted_key, DOWNCHECK_TIMEOUT, 0); totem_volatile_config_set_value(totem_config, "totem.fail_recv_const", deleted_key, FAIL_TO_RECV_CONST, 0); totem_volatile_config_set_value(totem_config, "totem.seqno_unchanged_const", deleted_key, SEQNO_UNCHANGED_CONST, 0); totem_volatile_config_set_value(totem_config, "totem.send_join", deleted_key, 0, 1); totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_timeout", deleted_key, RRP_PROBLEM_COUNT_TIMEOUT, 0); totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_threshold", deleted_key, RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT, 0); totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_mcast_threshold", deleted_key, totem_config->rrp_problem_count_threshold * 10, 0); totem_volatile_config_set_value(totem_config, "totem.rrp_token_expired_timeout", deleted_key, totem_config->token_retransmit_timeout, 0); totem_volatile_config_set_value(totem_config, "totem.rrp_autorecovery_check_timeout", deleted_key, RRP_AUTORECOVERY_CHECK_TIMEOUT, 0); totem_volatile_config_set_value(totem_config, "totem.heartbeat_failures_allowed", deleted_key, 0, 1); } static int totem_volatile_config_validate ( struct totem_config *totem_config, const char **error_string) { static char local_error_reason[512]; const char *error_reason = local_error_reason; if (totem_config->max_network_delay < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The max_network_delay parameter (%d ms) may not be less than (%d ms).", totem_config->max_network_delay, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token timeout parameter (%d ms) may not be less than (%d ms).", totem_config->token_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token retransmit timeout parameter (%d ms) may not be less than (%d ms).", totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token hold timeout parameter (%d ms) may not be less than (%d ms).", totem_config->token_hold_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->join_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The join timeout parameter (%d ms) may not be less than (%d ms).", totem_config->join_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The consensus timeout parameter (%d ms) may not be less than (%d ms).", totem_config->consensus_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->consensus_timeout < totem_config->join_timeout) { snprintf (local_error_reason, sizeof(local_error_reason), "The consensus timeout parameter (%d ms) may not be less than join timeout (%d ms).", totem_config->consensus_timeout, totem_config->join_timeout); goto parse_error; } if (totem_config->merge_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The merge timeout parameter (%d ms) may not be less than (%d ms).", totem_config->merge_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The downcheck timeout parameter (%d ms) may not be less than (%d ms).", totem_config->downcheck_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->rrp_problem_count_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP problem count timeout parameter (%d ms) may not be less than (%d ms).", totem_config->rrp_problem_count_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP problem count threshold (%d problem count) may not be less than (%d problem count).", totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); goto parse_error; } if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP multicast problem count threshold (%d problem count) may not be less than (%d problem count).", totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); goto parse_error; } if (totem_config->rrp_token_expired_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP token expired timeout parameter (%d ms) may not be less than (%d ms).", totem_config->rrp_token_expired_timeout, MINIMUM_TIMEOUT); goto parse_error; } return 0; parse_error: snprintf (error_string_response, sizeof(error_string_response), "parse error in config: %s\n", error_reason); *error_string = error_string_response; return (-1); } static int totem_get_crypto(struct totem_config *totem_config) { char *str; const char *tmp_cipher; const char *tmp_hash; tmp_hash = "sha1"; tmp_cipher = "aes256"; if (icmap_get_string("totem.secauth", &str) == CS_OK) { if (strcmp (str, "off") == 0) { tmp_hash = "none"; tmp_cipher = "none"; } free(str); } if (icmap_get_string("totem.crypto_cipher", &str) == CS_OK) { if (strcmp(str, "none") == 0) { tmp_cipher = "none"; } if (strcmp(str, "aes256") == 0) { tmp_cipher = "aes256"; } if (strcmp(str, "aes192") == 0) { tmp_cipher = "aes192"; } if (strcmp(str, "aes128") == 0) { tmp_cipher = "aes128"; } if (strcmp(str, "3des") == 0) { tmp_cipher = "3des"; } free(str); } if (icmap_get_string("totem.crypto_hash", &str) == CS_OK) { if (strcmp(str, "none") == 0) { tmp_hash = "none"; } if (strcmp(str, "md5") == 0) { tmp_hash = "md5"; } if (strcmp(str, "sha1") == 0) { tmp_hash = "sha1"; } if (strcmp(str, "sha256") == 0) { tmp_hash = "sha256"; } if (strcmp(str, "sha384") == 0) { tmp_hash = "sha384"; } if (strcmp(str, "sha512") == 0) { tmp_hash = "sha512"; } free(str); } if ((strcmp(tmp_cipher, "none") != 0) && (strcmp(tmp_hash, "none") == 0)) { return -1; } free(totem_config->crypto_cipher_type); free(totem_config->crypto_hash_type); totem_config->crypto_cipher_type = strdup(tmp_cipher); totem_config->crypto_hash_type = strdup(tmp_hash); return 0; } static int totem_config_get_ip_version(void) { int res; char *str; res = AF_INET; if (icmap_get_string("totem.ip_version", &str) == CS_OK) { if (strcmp(str, "ipv4") == 0) { res = AF_INET; } if (strcmp(str, "ipv6") == 0) { res = AF_INET6; } free(str); } return (res); } static uint16_t generate_cluster_id (const char *cluster_name) { int i; int value = 0; for (i = 0; i < strlen(cluster_name); i++) { value <<= 1; value += cluster_name[i]; } return (value & 0xFFFF); } static int get_cluster_mcast_addr ( const char *cluster_name, unsigned int ringnumber, int ip_version, struct totem_ip_address *res) { uint16_t clusterid; char addr[INET6_ADDRSTRLEN + 1]; int err; if (cluster_name == NULL) { return (-1); } clusterid = generate_cluster_id(cluster_name) + ringnumber; memset (res, 0, sizeof(*res)); switch (ip_version) { case AF_INET: snprintf(addr, sizeof(addr), "239.192.%d.%d", clusterid >> 8, clusterid % 0xFF); break; case AF_INET6: snprintf(addr, sizeof(addr), "ff15::%x", clusterid); break; default: /* * Unknown family */ return (-1); } err = totemip_parse (res, addr, ip_version); return (err); } static unsigned int generate_nodeid_for_duplicate_test( struct totem_config *totem_config, char *addr) { unsigned int nodeid; struct totem_ip_address totemip; /* AF_INET hard-coded here because auto-generated nodeids are only for IPv4 */ if (totemip_parse(&totemip, addr, AF_INET) != 0) return -1; memcpy (&nodeid, &totemip.addr, sizeof (unsigned int)); #if __BYTE_ORDER == __LITTLE_ENDIAN nodeid = swab32 (nodeid); #endif if (totem_config->clear_node_high_bit) { nodeid &= 0x7FFFFFFF; } return nodeid; } static int check_for_duplicate_nodeids( struct totem_config *totem_config, const char **error_string) { icmap_iter_t iter; icmap_iter_t subiter; const char *iter_key; int res = 0; int retval = 0; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char *ring0_addr=NULL; char *ring0_addr1=NULL; unsigned int node_pos; unsigned int node_pos1; unsigned int nodeid; unsigned int nodeid1; int autogenerated; iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos); autogenerated = 0; if (icmap_get_uint32(tmp_key, &nodeid) != CS_OK) { snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); if (icmap_get_string(tmp_key, &ring0_addr) != CS_OK) { continue; } /* Generate nodeid so we can check that auto-generated nodeids don't clash either */ nodeid = generate_nodeid_for_duplicate_test(totem_config, ring0_addr); if (nodeid == -1) { continue; } autogenerated = 1; } node_pos1 = 0; subiter = icmap_iter_init("nodelist.node."); while (((iter_key = icmap_iter_next(subiter, NULL, NULL)) != NULL) && (node_pos1 < node_pos)) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos1, tmp_key); if ((res != 2) || (node_pos1 >= node_pos)) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos1); if (icmap_get_uint32(tmp_key, &nodeid1) != CS_OK) { snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos1); if (icmap_get_string(tmp_key, &ring0_addr1) != CS_OK) { continue; } nodeid1 = generate_nodeid_for_duplicate_test(totem_config, ring0_addr1); if (nodeid1 == -1) { continue; } } if (nodeid == nodeid1) { retval = -1; snprintf (error_string_response, sizeof(error_string_response), "Nodeid %u%s%s%s appears twice in corosync.conf", nodeid, autogenerated?"(autogenerated from ":"", autogenerated?ring0_addr:"", autogenerated?")":""); log_printf (LOGSYS_LEVEL_ERROR, error_string_response); *error_string = error_string_response; break; } } icmap_iter_finalize(subiter); } icmap_iter_finalize(iter); return retval; } static int find_local_node_in_nodelist(struct totem_config *totem_config) { icmap_iter_t iter; const char *iter_key; int res = 0; unsigned int node_pos; int local_node_pos = -1; struct totem_ip_address bind_addr; int interface_up, interface_num; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; struct totem_ip_address node_addr; res = totemip_iface_check(&totem_config->interfaces[0].bindnet, &bind_addr, &interface_up, &interface_num, totem_config->clear_node_high_bit); if (res == -1) { return (-1); } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); if (icmap_get_string(tmp_key, &node_addr_str) != CS_OK) { continue; } res = totemip_parse (&node_addr, node_addr_str, totem_config->ip_version); free(node_addr_str); if (res == -1) { continue ; } if (totemip_equal(&bind_addr, &node_addr)) { local_node_pos = node_pos; } } icmap_iter_finalize(iter); return (local_node_pos); } /* * Compute difference between two set of totem interface arrays. set1 and set2 * are changed so for same ring, ip existing in both set1 and set2 are cleared * (set to 0), and ips which are only in set1 or set2 remains untouched. * totempg_node_add/remove is called. */ static void compute_interfaces_diff(int interface_count, struct totem_interface *set1, struct totem_interface *set2) { int ring_no, set1_pos, set2_pos; struct totem_ip_address empty_ip_address; memset(&empty_ip_address, 0, sizeof(empty_ip_address)); for (ring_no = 0; ring_no < interface_count; ring_no++) { for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) { for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) { /* * For current ring_no remove all set1 items existing * in set2 */ if (memcmp(&set1[ring_no].member_list[set1_pos], &set2[ring_no].member_list[set2_pos], sizeof(struct totem_ip_address)) == 0) { memset(&set1[ring_no].member_list[set1_pos], 0, sizeof(struct totem_ip_address)); memset(&set2[ring_no].member_list[set2_pos], 0, sizeof(struct totem_ip_address)); } } } } for (ring_no = 0; ring_no < interface_count; ring_no++) { for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) { /* * All items which remained in set1 doesn't exists in set2 any longer so * node has to be removed. */ if (memcmp(&set1[ring_no].member_list[set1_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) { log_printf(LOGSYS_LEVEL_DEBUG, "removing dynamic member %s for ring %u", totemip_print(&set1[ring_no].member_list[set1_pos]), ring_no); totempg_member_remove(&set1[ring_no].member_list[set1_pos], ring_no); } } for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) { /* * All items which remained in set2 doesn't existed in set1 so this is no node * and has to be added. */ if (memcmp(&set2[ring_no].member_list[set2_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) { log_printf(LOGSYS_LEVEL_DEBUG, "adding dynamic member %s for ring %u", totemip_print(&set2[ring_no].member_list[set2_pos]), ring_no); totempg_member_add(&set2[ring_no].member_list[set2_pos], ring_no); } } } } static void put_nodelist_members_to_config(struct totem_config *totem_config, int reload) { icmap_iter_t iter, iter2; const char *iter_key, *iter_key2; int res = 0; unsigned int node_pos; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key2[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; int member_count; unsigned int ringnumber = 0; int i, j; struct totem_interface *orig_interfaces = NULL; struct totem_interface *new_interfaces = NULL; if (reload) { /* * We need to compute diff only for reload. Also for initial configuration * not all totem structures are initialized so corosync will crash during * member_add/remove */ orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); assert(orig_interfaces != NULL); new_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); assert(new_interfaces != NULL); memcpy(orig_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX); } /* Clear out nodelist so we can put the new one in if needed */ for (i = 0; i < totem_config->interface_count; i++) { for (j = 0; j < PROCESSOR_COUNT_MAX; j++) { memset(&totem_config->interfaces[i].member_list[j], 0, sizeof(struct totem_ip_address)); } totem_config->interfaces[i].member_count = 0; } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); iter2 = icmap_iter_init(tmp_key); while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) { res = sscanf(iter_key2, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2); if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { continue; } if (icmap_get_string(iter_key2, &node_addr_str) != CS_OK) { continue; } member_count = totem_config->interfaces[ringnumber].member_count; res = totemip_parse(&totem_config->interfaces[ringnumber].member_list[member_count], node_addr_str, totem_config->ip_version); if (res != -1) { totem_config->interfaces[ringnumber].member_count++; } free(node_addr_str); } icmap_iter_finalize(iter2); } icmap_iter_finalize(iter); if (reload) { memcpy(new_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX); compute_interfaces_diff(totem_config->interface_count, orig_interfaces, new_interfaces); free(new_interfaces); free(orig_interfaces); } } static void nodelist_dynamic_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { int res; unsigned int ring_no; unsigned int member_no; char tmp_str[ICMAP_KEYNAME_MAXLEN]; uint8_t reloading; struct totem_config *totem_config = (struct totem_config *)user_data; /* * If a full reload is in progress then don't do anything until it's done and * can reconfigure it all atomically */ if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) { return ; } res = sscanf(key_name, "nodelist.node.%u.ring%u%s", &member_no, &ring_no, tmp_str); if (res != 3) return ; if (strcmp(tmp_str, "_addr") != 0) { return; } put_nodelist_members_to_config(totem_config, 1); } /* * Tries to find node (node_pos) in config nodelist which address matches any * local interface. Address can be stored in ring0_addr or if ipaddr_key_prefix is not NULL * key with prefix ipaddr_key is used (there can be multiuple of them) * This function differs * from find_local_node_in_nodelist because it doesn't need bindnetaddr, * but doesn't work when bind addr is network address (so IP must be exact * match). * * Returns 1 on success (address was found, node_pos is then correctly set) or 0 on failure. */ int totem_config_find_local_addr_in_nodelist(const char *ipaddr_key_prefix, unsigned int *node_pos) { struct list_head addrs; struct totem_ip_if_address *if_addr; icmap_iter_t iter, iter2; const char *iter_key, *iter_key2; struct list_head *list; const char *ipaddr_key; int ip_version; struct totem_ip_address node_addr; char *node_addr_str; int node_found = 0; int res = 0; char tmp_key[ICMAP_KEYNAME_MAXLEN]; if (totemip_getifaddrs(&addrs) == -1) { return 0; } ip_version = totem_config_get_ip_version(); iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) { continue ; } free(node_addr_str); /* * ring0_addr found -> let's iterate thru ipaddr_key_prefix */ snprintf(tmp_key, sizeof(tmp_key), "nodelist.node.%u.%s", *node_pos, (ipaddr_key_prefix != NULL ? ipaddr_key_prefix : "ring0_addr")); iter2 = icmap_iter_init(tmp_key); while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) { /* * ring0_addr must be exact match, not prefix */ ipaddr_key = (ipaddr_key_prefix != NULL ? iter_key2 : tmp_key); if (icmap_get_string(ipaddr_key, &node_addr_str) != CS_OK) { continue ; } if (totemip_parse(&node_addr, node_addr_str, ip_version) == -1) { free(node_addr_str); continue ; } free(node_addr_str); /* * Try to match ip with if_addrs */ node_found = 0; for (list = addrs.next; list != &addrs; list = list->next) { if_addr = list_entry(list, struct totem_ip_if_address, list); if (totemip_equal(&node_addr, &if_addr->ip_addr)) { node_found = 1; break; } } if (node_found) { break ; } } icmap_iter_finalize(iter2); if (node_found) { break ; } } icmap_iter_finalize(iter); totemip_freeifaddrs(&addrs); return (node_found); } static void config_convert_nodelist_to_interface(struct totem_config *totem_config) { int res = 0; unsigned int node_pos; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key2[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; unsigned int ringnumber = 0; icmap_iter_t iter; const char *iter_key; if (totem_config_find_local_addr_in_nodelist(NULL, &node_pos)) { /* * We found node, so create interface section */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); iter = icmap_iter_init(tmp_key); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2); if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { continue ; } if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) { continue; } snprintf(tmp_key2, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", ringnumber); icmap_set_string(tmp_key2, node_addr_str); free(node_addr_str); } icmap_iter_finalize(iter); } } extern int totem_config_read ( struct totem_config *totem_config, const char **error_string, uint64_t *warnings) { int res = 0; char *str; unsigned int ringnumber = 0; int member_count = 0; icmap_iter_t iter, member_iter; const char *iter_key; const char *member_iter_key; char ringnumber_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key[ICMAP_KEYNAME_MAXLEN]; uint8_t u8; uint16_t u16; char *cluster_name = NULL; int i; int local_node_pos; int nodeid_set; *warnings = 0; memset (totem_config, 0, sizeof (struct totem_config)); totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); if (totem_config->interfaces == 0) { *error_string = "Out of memory trying to allocate ethernet interface storage area"; return -1; } memset (totem_config->interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX); strcpy (totem_config->rrp_mode, "none"); icmap_get_uint32("totem.version", (uint32_t *)&totem_config->version); if (totem_get_crypto(totem_config) != 0) { *error_string = "crypto_cipher requires crypto_hash with value other than none"; return -1; } if (icmap_get_string("totem.rrp_mode", &str) == CS_OK) { if (strlen(str) >= TOTEM_RRP_MODE_BYTES) { *error_string = "totem.rrp_mode is too long"; free(str); return -1; } strcpy (totem_config->rrp_mode, str); free(str); } icmap_get_uint32("totem.nodeid", &totem_config->node_id); totem_config->clear_node_high_bit = 0; if (icmap_get_string("totem.clear_node_high_bit", &str) == CS_OK) { if (strcmp (str, "yes") == 0) { totem_config->clear_node_high_bit = 1; } free(str); } icmap_get_uint32("totem.threads", &totem_config->threads); icmap_get_uint32("totem.netmtu", &totem_config->net_mtu); if (icmap_get_string("totem.cluster_name", &cluster_name) != CS_OK) { cluster_name = NULL; } totem_config->ip_version = totem_config_get_ip_version(); if (icmap_get_string("totem.interface.0.bindnetaddr", &str) != CS_OK) { /* * We were not able to find ring 0 bindnet addr. Try to use nodelist informations */ config_convert_nodelist_to_interface(totem_config); } else { free(str); } /* * Broadcast option is global but set in interface section, * so reset before processing interfaces. */ totem_config->broadcast_use = 0; iter = icmap_iter_init("totem.interface."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "totem.interface.%[^.].%s", ringnumber_key, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "bindnetaddr") != 0) { continue; } member_count = 0; ringnumber = atoi(ringnumber_key); if (ringnumber >= INTERFACE_MAX) { free(cluster_name); snprintf (error_string_response, sizeof(error_string_response), "parse error in config: interface ring number %u is bigger than allowed maximum %u\n", ringnumber, INTERFACE_MAX - 1); *error_string = error_string_response; return -1; } /* * Get the bind net address */ if (icmap_get_string(iter_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str, totem_config->ip_version); free(str); } /* * Get interface multicast address */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", ringnumber); if (icmap_get_string(tmp_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, str, totem_config->ip_version); free(str); } else { /* * User not specified address -> autogenerate one from cluster_name key * (if available). Return code is intentionally ignored, because * udpu doesn't need mcastaddr and validity of mcastaddr for udp is * checked later anyway. */ (void)get_cluster_mcast_addr (cluster_name, ringnumber, totem_config->ip_version, &totem_config->interfaces[ringnumber].mcast_addr); } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", ringnumber); if (icmap_get_string(tmp_key, &str) == CS_OK) { if (strcmp (str, "yes") == 0) { totem_config->broadcast_use = 1; } free(str); } /* * Get mcast port */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", ringnumber); if (icmap_get_uint16(tmp_key, &totem_config->interfaces[ringnumber].ip_port) != CS_OK) { if (totem_config->broadcast_use) { totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT + (2 * ringnumber); } else { totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT; } } /* * Get the TTL */ totem_config->interfaces[ringnumber].ttl = 1; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", ringnumber); if (icmap_get_uint8(tmp_key, &u8) == CS_OK) { totem_config->interfaces[ringnumber].ttl = u8; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.", ringnumber); member_iter = icmap_iter_init(tmp_key); while ((member_iter_key = icmap_iter_next(member_iter, NULL, NULL)) != NULL) { if (member_count == 0) { if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { free(str); *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_IGNORED; break; } else { *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED; } } if (icmap_get_string(member_iter_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++], str, totem_config->ip_version); } } icmap_iter_finalize(member_iter); totem_config->interfaces[ringnumber].member_count = member_count; totem_config->interface_count++; } icmap_iter_finalize(iter); /* * Use broadcast is global, so if set, make sure to fill mcast addr correctly */ if (totem_config->broadcast_use) { for (ringnumber = 0; ringnumber < totem_config->interface_count; ringnumber++) { totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, "255.255.255.255", 0); } } /* * Store automatically generated items back to icmap */ for (i = 0; i < totem_config->interface_count; i++) { snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", i); if (icmap_get_string(tmp_key, &str) == CS_OK) { free(str); } else { str = (char *)totemip_print(&totem_config->interfaces[i].mcast_addr); icmap_set_string(tmp_key, str); } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", i); if (icmap_get_uint16(tmp_key, &u16) != CS_OK) { icmap_set_uint16(tmp_key, totem_config->interfaces[i].ip_port); } } totem_config->transport_number = TOTEM_TRANSPORT_UDP; if (icmap_get_string("totem.transport", &str) == CS_OK) { if (strcmp (str, "udpu") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_UDPU; } if (strcmp (str, "iba") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_RDMA; } free(str); } free(cluster_name); /* * Check existence of nodelist */ if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { free(str); /* * find local node */ local_node_pos = find_local_node_in_nodelist(totem_config); if (local_node_pos != -1) { icmap_set_uint32("nodelist.local_node_pos", local_node_pos); snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", local_node_pos); nodeid_set = (totem_config->node_id != 0); if (icmap_get_uint32(tmp_key, &totem_config->node_id) == CS_OK && nodeid_set) { *warnings |= TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED; } /* * Make localnode ring0_addr read only, so we can be sure that local * node never changes. If rebinding to other IP would be in future * supported, this must be changed and handled properly! */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", local_node_pos); icmap_set_ro_access(tmp_key, 0, 1); icmap_set_ro_access("nodelist.local_node_pos", 0, 1); } put_nodelist_members_to_config(totem_config, 0); } /* * Get things that might change in the future (and can depend on totem_config->interfaces); */ totem_volatile_config_read(totem_config, NULL); icmap_set_uint8("config.totemconfig_reload_in_progress", 0); add_totem_config_notification(totem_config); return 0; } int totem_config_validate ( struct totem_config *totem_config, const char **error_string) { static char local_error_reason[512]; char parse_error[512]; const char *error_reason = local_error_reason; int i, j; unsigned int interface_max = INTERFACE_MAX; unsigned int port1, port2; if (totem_config->interface_count == 0) { error_reason = "No interfaces defined"; goto parse_error; } for (i = 0; i < totem_config->interface_count; i++) { /* * Some error checking of parsed data to make sure its valid */ struct totem_ip_address null_addr; memset (&null_addr, 0, sizeof (struct totem_ip_address)); if ((totem_config->transport_number == 0) && memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, sizeof (struct totem_ip_address)) == 0) { error_reason = "No multicast address specified"; goto parse_error; } if (totem_config->interfaces[i].ip_port == 0) { error_reason = "No multicast port specified"; goto parse_error; } if (totem_config->interfaces[i].ttl > 255) { error_reason = "Invalid TTL (should be 0..255)"; goto parse_error; } if (totem_config->transport_number != TOTEM_TRANSPORT_UDP && totem_config->interfaces[i].ttl != 1) { error_reason = "Can only set ttl on multicast transport types"; goto parse_error; } if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 && totem_config->node_id == 0) { error_reason = "An IPV6 network requires that a node ID be specified."; goto parse_error; } if (totem_config->broadcast_use == 0 && totem_config->transport_number == TOTEM_TRANSPORT_UDP) { if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Multicast address family does not match bind address family"; goto parse_error; } if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) { error_reason = "mcastaddr is not a correct multicast address."; goto parse_error; } } if (totem_config->interfaces[0].bindnet.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Not all bind address belong to the same IP family"; goto parse_error; } /* * Ensure mcast address/port differs */ if (totem_config->transport_number == TOTEM_TRANSPORT_UDP) { for (j = i + 1; j < totem_config->interface_count; j++) { port1 = totem_config->interfaces[i].ip_port; port2 = totem_config->interfaces[j].ip_port; if (totemip_equal(&totem_config->interfaces[i].mcast_addr, &totem_config->interfaces[j].mcast_addr) && (((port1 > port2 ? port1 : port2) - (port1 < port2 ? port1 : port2)) <= 1)) { error_reason = "Interfaces multicast address/port pair must differ"; goto parse_error; } } } } if (totem_config->version != 2) { error_reason = "This totem parser can only parse version 2 configurations."; goto parse_error; } if (totem_volatile_config_validate(totem_config, error_string) == -1) { return (-1); } if (check_for_duplicate_nodeids(totem_config, error_string) == -1) { return (-1); } /* * RRP values validation */ if (strcmp (totem_config->rrp_mode, "none") && strcmp (totem_config->rrp_mode, "active") && strcmp (totem_config->rrp_mode, "passive")) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP mode \"%s\" specified is invalid. It must be none, active, or passive.\n", totem_config->rrp_mode); goto parse_error; } if (strcmp (totem_config->rrp_mode, "none") == 0) { interface_max = 1; } if (interface_max < totem_config->interface_count) { snprintf (parse_error, sizeof(parse_error), "%d is too many configured interfaces for the rrp_mode setting %s.", totem_config->interface_count, totem_config->rrp_mode); error_reason = parse_error; goto parse_error; } if (totem_config->net_mtu == 0) { totem_config->net_mtu = 1500; } return 0; parse_error: snprintf (error_string_response, sizeof(error_string_response), "parse error in config: %s\n", error_reason); *error_string = error_string_response; return (-1); } static int read_keyfile ( const char *key_location, struct totem_config *totem_config, const char **error_string) { int fd; int res; ssize_t expected_key_len = sizeof (totem_config->private_key); int saved_errno; char error_str[100]; const char *error_ptr; fd = open (key_location, O_RDONLY); if (fd == -1) { error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not open %s: %s\n", key_location, error_ptr); goto parse_error; } res = read (fd, totem_config->private_key, expected_key_len); saved_errno = errno; close (fd); if (res == -1) { error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not read %s: %s\n", key_location, error_ptr); goto parse_error; } totem_config->private_key_len = expected_key_len; if (res != expected_key_len) { snprintf (error_string_response, sizeof(error_string_response), "Could only read %d bits of 1024 bits from %s.\n", res * 8, key_location); goto parse_error; } return 0; parse_error: *error_string = error_string_response; return (-1); } int totem_config_keyread ( struct totem_config *totem_config, const char **error_string) { int got_key = 0; char *key_location = NULL; int res; size_t key_len; memset (totem_config->private_key, 0, 128); totem_config->private_key_len = 128; if (strcmp(totem_config->crypto_cipher_type, "none") == 0 && strcmp(totem_config->crypto_hash_type, "none") == 0) { return (0); } /* cmap may store the location of the key file */ if (icmap_get_string("totem.keyfile", &key_location) == CS_OK) { res = read_keyfile(key_location, totem_config, error_string); free(key_location); if (res) { goto key_error; } got_key = 1; } else { /* Or the key itself may be in the cmap */ if (icmap_get("totem.key", NULL, &key_len, NULL) == CS_OK) { if (key_len > sizeof (totem_config->private_key)) { sprintf(error_string_response, "key is too long"); goto key_error; } if (icmap_get("totem.key", totem_config->private_key, &key_len, NULL) == CS_OK) { totem_config->private_key_len = key_len; got_key = 1; } else { sprintf(error_string_response, "can't store private key"); goto key_error; } } } /* In desperation we read the default filename */ if (!got_key) { const char *filename = getenv("COROSYNC_TOTEM_AUTHKEY_FILE"); if (!filename) filename = COROSYSCONFDIR "/authkey"; res = read_keyfile(filename, totem_config, error_string); if (res) goto key_error; } return (0); key_error: *error_string = error_string_response; return (-1); } static void debug_dump_totem_config(const struct totem_config *totem_config) { log_printf(LOGSYS_LEVEL_DEBUG, "Token Timeout (%d ms) retransmit timeout (%d ms)", totem_config->token_timeout, totem_config->token_retransmit_timeout); log_printf(LOGSYS_LEVEL_DEBUG, "token hold (%d ms) retransmits before loss (%d retrans)", totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const); log_printf(LOGSYS_LEVEL_DEBUG, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)", totem_config->join_timeout, totem_config->send_join_timeout, totem_config->consensus_timeout, totem_config->merge_timeout); log_printf(LOGSYS_LEVEL_DEBUG, "downcheck (%d ms) fail to recv const (%d msgs)", totem_config->downcheck_timeout, totem_config->fail_to_recv_const); log_printf(LOGSYS_LEVEL_DEBUG, "seqno unchanged const (%d rotations) Maximum network MTU %d", totem_config->seqno_unchanged_const, totem_config->net_mtu); log_printf(LOGSYS_LEVEL_DEBUG, "window size per rotation (%d messages) maximum messages per rotation (%d messages)", totem_config->window_size, totem_config->max_messages); log_printf(LOGSYS_LEVEL_DEBUG, "missed count const (%d messages)", totem_config->miss_count_const); log_printf(LOGSYS_LEVEL_DEBUG, "RRP token expired timeout (%d ms)", totem_config->rrp_token_expired_timeout); log_printf(LOGSYS_LEVEL_DEBUG, "RRP token problem counter (%d ms)", totem_config->rrp_problem_count_timeout); log_printf(LOGSYS_LEVEL_DEBUG, "RRP threshold (%d problem count)", totem_config->rrp_problem_count_threshold); log_printf(LOGSYS_LEVEL_DEBUG, "RRP multicast threshold (%d problem count)", totem_config->rrp_problem_count_mcast_threshold); log_printf(LOGSYS_LEVEL_DEBUG, "RRP automatic recovery check timeout (%d ms)", totem_config->rrp_autorecovery_check_timeout); log_printf(LOGSYS_LEVEL_DEBUG, "RRP mode set to %s.", totem_config->rrp_mode); log_printf(LOGSYS_LEVEL_DEBUG, "heartbeat_failures_allowed (%d)", totem_config->heartbeat_failures_allowed); log_printf(LOGSYS_LEVEL_DEBUG, "max_network_delay (%d ms)", totem_config->max_network_delay); } static void totem_change_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { struct totem_config *totem_config = (struct totem_config *)user_data; uint32_t *param; uint8_t reloading; const char *deleted_key = NULL; const char *error_string; /* * If a full reload is in progress then don't do anything until it's done and * can reconfigure it all atomically */ if (icmap_get_uint8("config.reload_in_progress", &reloading) == CS_OK && reloading) return; param = totem_get_param_by_name((struct totem_config *)user_data, key_name); /* * Process change only if changed key is found in totem_config (-> param is not NULL) * or for special key token_coefficient. token_coefficient key is not stored in * totem_config, but it is used for computation of token timeout. */ if (!param && strcmp(key_name, "totem.token_coefficient") != 0) return; /* * Values other than UINT32 are not supported, or needed (yet) */ switch (event) { case ICMAP_TRACK_DELETE: deleted_key = key_name; break; case ICMAP_TRACK_ADD: case ICMAP_TRACK_MODIFY: deleted_key = NULL; break; default: break; } totem_volatile_config_read (totem_config, deleted_key); log_printf(LOGSYS_LEVEL_DEBUG, "Totem related config key changed. Dumping actual totem config."); debug_dump_totem_config(totem_config); if (totem_volatile_config_validate(totem_config, &error_string) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); /* * TODO: Consider corosync exit and/or load defaults for volatile * values. For now, log error seems to be enough */ } } static void totem_reload_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { struct totem_config *totem_config = (struct totem_config *)user_data; uint32_t local_node_pos; const char *error_string; /* Reload has completed */ if (*(uint8_t *)new_val.data == 0) { put_nodelist_members_to_config (totem_config, 1); totem_volatile_config_read (totem_config, NULL); log_printf(LOGSYS_LEVEL_DEBUG, "Configuration reloaded. Dumping actual totem config."); debug_dump_totem_config(totem_config); if (totem_volatile_config_validate(totem_config, &error_string) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); /* * TODO: Consider corosync exit and/or load defaults for volatile * values. For now, log error seems to be enough */ } /* Reinstate the local_node_pos */ local_node_pos = find_local_node_in_nodelist(totem_config); if (local_node_pos != -1) { icmap_set_uint32("nodelist.local_node_pos", local_node_pos); } icmap_set_uint8("config.totemconfig_reload_in_progress", 0); } else { icmap_set_uint8("config.totemconfig_reload_in_progress", 1); } } static void add_totem_config_notification(struct totem_config *totem_config) { icmap_track_t icmap_track; icmap_track_add("totem.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, totem_change_notify, totem_config, &icmap_track); icmap_track_add("config.reload_in_progress", ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY, totem_reload_notify, totem_config, &icmap_track); icmap_track_add("nodelist.node.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, nodelist_dynamic_notify, (void *)totem_config, &icmap_track); } diff --git a/exec/votequorum.c b/exec/votequorum.c index 4f90749b..459388f4 100644 --- a/exec/votequorum.c +++ b/exec/votequorum.c @@ -1,3019 +1,3021 @@ /* * Copyright (c) 2009-2015 Red Hat, Inc. * * All rights reserved. * * Authors: Christine Caulfield (ccaulfie@redhat.com) * Fabio M. Di Nitto (fdinitto@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "quorum.h" #include #include #include #include #include #include #include #include "service.h" #include "util.h" LOGSYS_DECLARE_SUBSYS ("VOTEQ"); /* * interface with corosync */ static struct corosync_api_v1 *corosync_api; /* * votequorum global config vars */ static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; static struct cluster_node *qdevice = NULL; static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT; static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT; static uint8_t qdevice_can_operate = 1; static void *qdevice_reg_conn = NULL; static uint8_t qdevice_master_wins = 0; static uint8_t two_node = 0; static uint8_t wait_for_all = 0; static uint8_t wait_for_all_status = 0; static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE; static int lowest_node_id = -1; static int highest_node_id = -1; #define DEFAULT_LMS_WIN 10000 static uint8_t last_man_standing = 0; static uint32_t last_man_standing_window = DEFAULT_LMS_WIN; static uint8_t allow_downscale = 0; static uint32_t ev_barrier = 0; static uint8_t ev_tracking = 0; static uint32_t ev_tracking_barrier = 0; static int ev_tracking_fd = -1; /* * votequorum_exec defines/structs/forward definitions */ struct req_exec_quorum_nodeinfo { struct qb_ipc_request_header header __attribute__((aligned(8))); uint32_t nodeid; uint32_t votes; uint32_t expected_votes; uint32_t flags; } __attribute__((packed)); struct req_exec_quorum_reconfigure { struct qb_ipc_request_header header __attribute__((aligned(8))); uint32_t nodeid; uint32_t value; uint8_t param; uint8_t _pad0; uint8_t _pad1; uint8_t _pad2; } __attribute__((packed)); struct req_exec_quorum_qdevice_reg { struct qb_ipc_request_header header __attribute__((aligned(8))); uint32_t operation; char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; } __attribute__((packed)); struct req_exec_quorum_qdevice_reconfigure { struct qb_ipc_request_header header __attribute__((aligned(8))); char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; } __attribute__((packed)); /* * votequorum_exec onwire version (via totem) */ #include "votequorum.h" /* * votequorum_exec onwire messages (via totem) */ #define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0 #define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3 static void votequorum_exec_send_expectedvotes_notification(void); static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context); static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context); #define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1 #define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2 #define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value); /* * used by req_exec_quorum_qdevice_reg */ #define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0 #define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1 /* * votequorum internal node status/view */ #define NODE_FLAGS_QUORATE 1 #define NODE_FLAGS_LEAVING 2 #define NODE_FLAGS_WFASTATUS 4 #define NODE_FLAGS_FIRST 8 #define NODE_FLAGS_QDEVICE_REGISTERED 16 #define NODE_FLAGS_QDEVICE_ALIVE 32 #define NODE_FLAGS_QDEVICE_CAST_VOTE 64 #define NODE_FLAGS_QDEVICE_MASTER_WINS 128 typedef enum { NODESTATE_MEMBER=1, NODESTATE_DEAD, NODESTATE_LEAVING } nodestate_t; struct cluster_node { int node_id; nodestate_t state; uint32_t votes; uint32_t expected_votes; uint32_t flags; struct list_head list; }; /* * votequorum internal quorum status */ static uint8_t quorum; static uint8_t cluster_is_quorate; /* * votequorum membership data */ static struct cluster_node *us; static struct list_head cluster_members_list; static unsigned int quorum_members[PROCESSOR_COUNT_MAX]; static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX]; static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX]; static int quorum_members_entries = 0; static int previous_quorum_members_entries = 0; static int atb_nodelist_entries = 0; static struct memb_ring_id quorum_ringid; /* * pre allocate all cluster_nodes + one for qdevice */ static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2]; static int cluster_nodes_entries = 0; /* * votequorum tracking */ struct quorum_pd { unsigned char track_flags; int tracking_enabled; uint64_t tracking_context; struct list_head list; void *conn; }; static struct list_head trackers_list; /* * votequorum timers */ static corosync_timer_handle_t qdevice_timer; static int qdevice_timer_set = 0; static corosync_timer_handle_t last_man_standing_timer; static int last_man_standing_timer_set = 0; static int sync_nodeinfo_sent = 0; static int sync_wait_for_poll_or_timeout = 0; /* * Service Interfaces required by service_message_handler struct */ static int sync_in_progress = 0; static void votequorum_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id); static int votequorum_sync_process (void); static void votequorum_sync_activate (void); static void votequorum_sync_abort (void); static quorum_set_quorate_fn_t quorum_callback; /* * votequorum_exec handler and definitions */ static char *votequorum_exec_init_fn (struct corosync_api_v1 *api); static int votequorum_exec_exit_fn (void); static int votequorum_exec_send_nodeinfo(uint32_t nodeid); static void message_handler_req_exec_votequorum_nodeinfo ( const void *message, unsigned int nodeid); static void exec_votequorum_nodeinfo_endian_convert (void *message); static void message_handler_req_exec_votequorum_reconfigure ( const void *message, unsigned int nodeid); static void exec_votequorum_reconfigure_endian_convert (void *message); static void message_handler_req_exec_votequorum_qdevice_reg ( const void *message, unsigned int nodeid); static void exec_votequorum_qdevice_reg_endian_convert (void *message); static void message_handler_req_exec_votequorum_qdevice_reconfigure ( const void *message, unsigned int nodeid); static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message); static struct corosync_exec_handler votequorum_exec_engine[] = { { /* 0 */ .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo, .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert }, { /* 1 */ .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure, .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert }, { /* 2 */ .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg, .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert }, { /* 3 */ .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure, .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert }, }; /* * Library Handler and Functions Definitions */ static int quorum_lib_init_fn (void *conn); static int quorum_lib_exit_fn (void *conn); static void qdevice_timer_fn(void *arg); static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message); static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message); static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message); static void message_handler_req_lib_votequorum_trackstart (void *conn, const void *message); static void message_handler_req_lib_votequorum_trackstop (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_register (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_update (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_poll (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn, const void *message); static struct corosync_lib_handler quorum_lib_service[] = { { /* 0 */ .lib_handler_fn = message_handler_req_lib_votequorum_getinfo, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 1 */ .lib_handler_fn = message_handler_req_lib_votequorum_setexpected, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 2 */ .lib_handler_fn = message_handler_req_lib_votequorum_setvotes, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 3 */ .lib_handler_fn = message_handler_req_lib_votequorum_trackstart, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 4 */ .lib_handler_fn = message_handler_req_lib_votequorum_trackstop, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 5 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 8 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 9 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED } }; static struct corosync_service_engine votequorum_service_engine = { .name = "corosync vote quorum service v1.0", .id = VOTEQUORUM_SERVICE, .priority = 2, .private_data_size = sizeof (struct quorum_pd), .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED, .lib_init_fn = quorum_lib_init_fn, .lib_exit_fn = quorum_lib_exit_fn, .lib_engine = quorum_lib_service, .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler), .exec_init_fn = votequorum_exec_init_fn, .exec_exit_fn = votequorum_exec_exit_fn, .exec_engine = votequorum_exec_engine, .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler), .sync_init = votequorum_sync_init, .sync_process = votequorum_sync_process, .sync_activate = votequorum_sync_activate, .sync_abort = votequorum_sync_abort }; struct corosync_service_engine *votequorum_get_service_engine_ver0 (void) { return (&votequorum_service_engine); } static struct default_service votequorum_service[] = { { .name = "corosync_votequorum", .ver = 0, .loader = votequorum_get_service_engine_ver0 }, }; /* * common/utility macros/functions */ #define max(a,b) (((a) > (b)) ? (a) : (b)) #define list_iterate(v, head) \ for (v = (head)->next; v != head; v = v->next) static void node_add_ordered(struct cluster_node *newnode) { struct cluster_node *node = NULL; struct list_head *tmp; struct list_head *newlist = &newnode->list; ENTER(); list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); if (newnode->node_id < node->node_id) { break; } } if (!node) { list_add(&newnode->list, &cluster_members_list); } else { newlist->prev = tmp->prev; newlist->next = tmp; tmp->prev->next = newlist; tmp->prev = newlist; } LEAVE(); } static struct cluster_node *allocate_node(unsigned int nodeid) { struct cluster_node *cl = NULL; struct list_head *tmp; ENTER(); if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) { cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries]; cluster_nodes_entries++; } else { list_iterate(tmp, &cluster_members_list) { cl = list_entry(tmp, struct cluster_node, list); if (cl->state == NODESTATE_DEAD) { break; } } /* * this should never happen */ if (!cl) { log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node %u data!!", nodeid); goto out; } list_del(tmp); } memset(cl, 0, sizeof(struct cluster_node)); cl->node_id = nodeid; if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { node_add_ordered(cl); } out: LEAVE(); return cl; } static struct cluster_node *find_node_by_nodeid(unsigned int nodeid) { struct cluster_node *node; struct list_head *tmp; ENTER(); if (nodeid == us->node_id) { LEAVE(); return us; } if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { LEAVE(); return qdevice; } list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); if (node->node_id == nodeid) { LEAVE(); return node; } } LEAVE(); return NULL; } static void get_lowest_node_id(void) { struct cluster_node *node = NULL; struct list_head *tmp; ENTER(); lowest_node_id = us->node_id; list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->node_id < lowest_node_id)) { lowest_node_id = node->node_id; } } log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: %d us: %d", lowest_node_id, us->node_id); icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id); LEAVE(); } static void get_highest_node_id(void) { struct cluster_node *node = NULL; struct list_head *tmp; ENTER(); highest_node_id = us->node_id; list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->node_id > highest_node_id)) { highest_node_id = node->node_id; } } log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: %d us: %d", highest_node_id, us->node_id); icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id); LEAVE(); } static int check_low_node_id_partition(void) { struct cluster_node *node = NULL; struct list_head *tmp; int found = 0; ENTER(); list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->node_id == lowest_node_id)) { found = 1; } } LEAVE(); return found; } static int check_high_node_id_partition(void) { struct cluster_node *node = NULL; struct list_head *tmp; int found = 0; ENTER(); list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->node_id == highest_node_id)) { found = 1; } } LEAVE(); return found; } static int is_in_nodelist(int nodeid, unsigned int *members, int entries) { int i; ENTER(); for (i=0; istate == NODESTATE_MEMBER) && (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) && (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE)) { found = 1; } } LEAVE(); return found; } static void decode_flags(uint32_t flags) { ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s", (flags & NODE_FLAGS_QUORATE)?"Yes":"No", (flags & NODE_FLAGS_LEAVING)?"Yes":"No", (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No", (flags & NODE_FLAGS_FIRST)?"Yes":"No", (flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No", (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No", (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No", (flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No"); LEAVE(); } /* * load/save are copied almost pristine from totemsrp,c */ static int load_ev_tracking_barrier(void) { int res = 0; char filename[PATH_MAX]; ENTER(); snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_run_dir()); ev_tracking_fd = open(filename, O_RDWR, 0700); if (ev_tracking_fd != -1) { res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t)); + close(ev_tracking_fd); if (res == sizeof (uint32_t)) { LEAVE(); return 0; } } ev_tracking_barrier = 0; umask(0); ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700); if (ev_tracking_fd != -1) { res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t)); if ((res == -1) || (res != sizeof (uint32_t))) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to write to %s", filename); } + close(ev_tracking_fd); LEAVE(); return 0; } log_printf(LOGSYS_LEVEL_WARNING, "Unable to create %s file", filename); LEAVE(); return -1; } static void update_wait_for_all_status(uint8_t wfa_status) { ENTER(); wait_for_all_status = wfa_status; if (wait_for_all_status) { us->flags |= NODE_FLAGS_WFASTATUS; } else { us->flags &= ~NODE_FLAGS_WFASTATUS; } icmap_set_uint8("runtime.votequorum.wait_for_all_status", wait_for_all_status); LEAVE(); } static void update_two_node(void) { ENTER(); icmap_set_uint8("runtime.votequorum.two_node", two_node); LEAVE(); } static void update_ev_barrier(uint32_t expected_votes) { ENTER(); ev_barrier = expected_votes; icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier); LEAVE(); } static void update_qdevice_can_operate(uint8_t status) { ENTER(); qdevice_can_operate = status; icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate); LEAVE(); } static void update_qdevice_master_wins(uint8_t allow) { ENTER(); qdevice_master_wins = allow; icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins); LEAVE(); } static void update_ev_tracking_barrier(uint32_t ev_t_barrier) { int res; ENTER(); ev_tracking_barrier = ev_t_barrier; icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier); if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to update ev_tracking_barrier on disk data!!!"); LEAVE(); return; } res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t)); if (res != sizeof (uint32_t)) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to update ev_tracking_barrier on disk data!!!"); } #ifdef HAVE_FDATASYNC fdatasync(ev_tracking_fd); #else fsync(ev_tracking_fd); #endif LEAVE(); } /* * quorum calculation core bits */ static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes) { struct list_head *nodelist; struct cluster_node *node; unsigned int total_votes = 0; unsigned int highest_expected = 0; unsigned int newquorum, q1, q2; unsigned int total_nodes = 0; ENTER(); if ((allow_downscale) && (allow_decrease) && (max_expected)) { max_expected = max(ev_barrier, max_expected); } list_iterate(nodelist, &cluster_members_list) { node = list_entry(nodelist, struct cluster_node, list); log_printf(LOGSYS_LEVEL_DEBUG, "node %u state=%d, votes=%u, expected=%u", node->node_id, node->state, node->votes, node->expected_votes); if (node->state == NODESTATE_MEMBER) { if (max_expected) { node->expected_votes = max_expected; } else { highest_expected = max(highest_expected, node->expected_votes); } total_votes += node->votes; total_nodes++; } } if (us->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes); total_votes += qdevice->votes; total_nodes++; } if (max_expected > 0) { highest_expected = max_expected; } /* * This quorum calculation is taken from the OpenVMS Cluster Systems * manual, but, then, you guessed that didn't you */ q1 = (highest_expected + 2) / 2; q2 = (total_votes + 2) / 2; newquorum = max(q1, q2); /* * Normally quorum never decreases but the system administrator can * force it down by setting expected votes to a maximum value */ if (!allow_decrease) { newquorum = max(quorum, newquorum); } /* * The special two_node mode allows each of the two nodes to retain * quorum if the other fails. Only one of the two should live past * fencing (as both nodes try to fence each other in split-brain.) * Also: if there are more than two nodes, force us inquorate to avoid * any damage or confusion. */ if (two_node && total_nodes <= 2) { newquorum = 1; } if (ret_total_votes) { *ret_total_votes = total_votes; } LEAVE(); return newquorum; } static void are_we_quorate(unsigned int total_votes) { int quorate; int quorum_change = 0; ENTER(); /* * wait for all nodes to show up before granting quorum */ if ((wait_for_all) && (wait_for_all_status)) { if (total_votes != us->expected_votes) { log_printf(LOGSYS_LEVEL_NOTICE, "Waiting for all cluster members. " "Current votes: %d expected_votes: %d", total_votes, us->expected_votes); cluster_is_quorate = 0; return; } update_wait_for_all_status(0); } if (quorum > total_votes) { quorate = 0; } else { quorate = 1; get_lowest_node_id(); get_highest_node_id(); } if ((auto_tie_breaker != ATB_NONE) && /* Must be a half (or half-1) split */ (total_votes == (us->expected_votes / 2)) && /* If the 'other' partition in a split might have quorum then we can't run ATB */ (previous_quorum_members_entries - quorum_members_entries < quorum) && (check_auto_tie_breaker() == 1)) { quorate = 1; } if ((qdevice_master_wins) && (!quorate) && (check_qdevice_master() == 1)) { log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition"); quorate = 1; } if (cluster_is_quorate && !quorate) { quorum_change = 1; log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity"); } if (!cluster_is_quorate && quorate) { quorum_change = 1; log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity"); } cluster_is_quorate = quorate; if (cluster_is_quorate) { us->flags |= NODE_FLAGS_QUORATE; } else { us->flags &= ~NODE_FLAGS_QUORATE; } if (wait_for_all) { if (quorate) { update_wait_for_all_status(0); } else { update_wait_for_all_status(1); } } if ((quorum_change) && (sync_in_progress == 0)) { quorum_callback(quorum_members, quorum_members_entries, cluster_is_quorate, &quorum_ringid); votequorum_exec_send_quorum_notification(NULL, 0L); } LEAVE(); } static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members) { unsigned int total_votes = 0; unsigned int cluster_members = 0; struct list_head *nodelist; struct cluster_node *node; ENTER(); list_iterate(nodelist, &cluster_members_list) { node = list_entry(nodelist, struct cluster_node, list); if (node->state == NODESTATE_MEMBER) { cluster_members++; total_votes += node->votes; } } if (qdevice->votes) { total_votes += qdevice->votes; cluster_members++; } *totalvotes = total_votes; *current_members = cluster_members; LEAVE(); } /* * Recalculate cluster quorum, set quorate and notify changes */ static void recalculate_quorum(int allow_decrease, int by_current_nodes) { unsigned int total_votes = 0; unsigned int cluster_members = 0; ENTER(); get_total_votes(&total_votes, &cluster_members); if (!by_current_nodes) { cluster_members = 0; } /* * Keep expected_votes at the highest number of votes in the cluster */ log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes); if (total_votes > us->expected_votes) { us->expected_votes = total_votes; votequorum_exec_send_expectedvotes_notification(); } if ((ev_tracking) && (us->expected_votes > ev_tracking_barrier)) { update_ev_tracking_barrier(us->expected_votes); } quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes); are_we_quorate(total_votes); LEAVE(); } /* * configuration bits and pieces */ static int votequorum_read_nodelist_configuration(uint32_t *votes, uint32_t *nodes, uint32_t *expected_votes) { icmap_iter_t iter; const char *iter_key; char tmp_key[ICMAP_KEYNAME_MAXLEN]; uint32_t our_pos, node_pos; uint32_t nodecount = 0; uint32_t nodelist_expected_votes = 0; uint32_t node_votes = 0; int res = 0; ENTER(); if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) { log_printf(LOGSYS_LEVEL_DEBUG, "No nodelist defined or our node is not in the nodelist"); return 0; } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } nodecount++; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos); if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) { node_votes = 1; } nodelist_expected_votes = nodelist_expected_votes + node_votes; if (node_pos == our_pos) { *votes = node_votes; } } *expected_votes = nodelist_expected_votes; *nodes = nodecount; icmap_iter_finalize(iter); LEAVE(); return 1; } static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes) { char *qdevice_model = NULL; int ret = 0; ENTER(); if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) { if (strlen(qdevice_model)) { if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) { *qdevice_votes = -1; } if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) { qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT; } if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) { qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT; } update_qdevice_can_operate(1); ret = 1; } free(qdevice_model); } LEAVE(); return ret; } #define VOTEQUORUM_READCONFIG_STARTUP 0 #define VOTEQUORUM_READCONFIG_RUNTIME 1 static char *votequorum_readconfig(int runtime) { uint32_t node_votes = 0, qdevice_votes = 0; uint32_t node_expected_votes = 0, expected_votes = 0; uint32_t node_count = 0; uint8_t atb = 0; int have_nodelist, have_qdevice; char *atb_string = NULL; char *error = NULL; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime); /* * Set the few things we re-read at runtime back to their defaults */ if (runtime) { two_node = 0; expected_votes = 0; } /* * gather basic data here */ icmap_get_uint32("quorum.expected_votes", &expected_votes); have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes); have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes); icmap_get_uint8("quorum.two_node", &two_node); /* * do config verification and enablement */ if ((!have_nodelist) && (!expected_votes)) { if (!runtime) { error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!"; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!"); log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data"); } goto out; } /* * two_node and qdevice are not compatible in the same config. * try to make an educated guess of what to do */ if ((two_node) && (have_qdevice)) { if (!runtime) { error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!"); if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node"); two_node = 0; } else { log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node"); update_qdevice_can_operate(0); } } } /* * Enable special features */ if (!runtime) { if (two_node) { wait_for_all = 1; } icmap_get_uint8("quorum.allow_downscale", &allow_downscale); icmap_get_uint8("quorum.wait_for_all", &wait_for_all); icmap_get_uint8("quorum.last_man_standing", &last_man_standing); icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window); icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking); icmap_get_uint8("quorum.auto_tie_breaker", &atb); icmap_get_string("quorum.auto_tie_breaker_node", &atb_string); /* auto_tie_breaker defaults to LOWEST */ if (atb) { auto_tie_breaker = ATB_LOWEST; icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); } else { auto_tie_breaker = ATB_NONE; if (atb_string) { log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0"); } } if (atb && atb_string) { parse_atb_string(atb_string); } free(atb_string); /* allow_downscale requires ev_tracking */ if (allow_downscale) { ev_tracking = 1; } if (ev_tracking) { if (load_ev_tracking_barrier() < 0) { LEAVE(); return ((char *)"Unable to load ev_tracking file!"); } update_ev_tracking_barrier(ev_tracking_barrier); } } /* two_node and auto_tie_breaker are not compatible as two_node uses * a fence race to decide quorum whereas ATB decides based on node id */ if (two_node && auto_tie_breaker != ATB_NONE) { log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible."); log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf"); two_node = 0; } /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs * to be set so that an isolated half+1 without the tie breaker node * does not have quorum on reboot. */ if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) && (!wait_for_all)) { if (last_man_standing) { /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what * they might want so we'll just quit. */ log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n"); log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n"); log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n"); log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n"); log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n"); error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n"); log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n"); log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n"); auto_tie_breaker = ATB_NONE; icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); } } /* * quorum device is not compatible with last_man_standing and auto_tie_breaker * neither lms or atb can be set at runtime, so there is no need to check for * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have * been enabled at startup. */ if ((have_qdevice) && (last_man_standing)) { if (!runtime) { error = (char *)"configuration error: quorum.device is not compatible with last_man_standing"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) { if (!runtime) { error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } if ((have_qdevice) && (allow_downscale)) { if (!runtime) { error = (char *)"configuration error: quorum.device is not compatible with allow_downscale"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } /* * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes * we don't know what the quorum device should vote. */ if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) { if (!runtime) { error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } /* * if user specifies a node list with uneven votes and no device.votes * we cannot autocalculate the votes */ if ((have_qdevice) && (qdevice_votes == -1) && (have_nodelist) && (node_count != node_expected_votes)) { if (!runtime) { error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } /* * validate quorum device votes vs expected_votes */ if ((qdevice_votes > 0) && (expected_votes)) { int delta = expected_votes - qdevice_votes; if (delta < 2) { if (!runtime) { error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } } /* * automatically calculate device votes and adjust expected_votes from nodelist */ if ((have_qdevice) && (qdevice_votes == -1) && (!expected_votes) && (have_nodelist) && (node_count == node_expected_votes)) { qdevice_votes = node_expected_votes - 1; node_expected_votes = node_expected_votes + qdevice_votes; } /* * set this node votes and expected_votes */ log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes); if (ev_tracking) { expected_votes = ev_tracking_barrier; } if (have_nodelist) { us->votes = node_votes; us->expected_votes = node_expected_votes; } else { us->votes = 1; icmap_get_uint32("quorum.votes", &us->votes); } if (expected_votes) { us->expected_votes = expected_votes; } /* * set qdevice votes */ if (!have_qdevice) { qdevice->votes = 0; } if (qdevice_votes != -1) { qdevice->votes = qdevice_votes; } update_ev_barrier(us->expected_votes); update_two_node(); if (wait_for_all) { update_wait_for_all_status(1); } out: LEAVE(); return error; } static void votequorum_refresh_config( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { int old_votes, old_expected_votes; uint8_t reloading; uint8_t cancel_wfa; ENTER(); /* * If a full reload is in progress then don't do anything until it's done and * can reconfigure it all atomically */ if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) { return ; } icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa); if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 && cancel_wfa >= 1) { icmap_set_uint8("quorum.cancel_wait_for_all", 0); votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA, us->node_id, 0); return; } old_votes = us->votes; old_expected_votes = us->expected_votes; /* * Reload the configuration */ votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME); /* * activate new config */ votequorum_exec_send_nodeinfo(us->node_id); votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); if (us->votes != old_votes) { votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, us->node_id, us->votes); } if (us->expected_votes != old_expected_votes) { votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id, us->expected_votes); } LEAVE(); } static void votequorum_exec_add_config_notification(void) { icmap_track_t icmap_track_nodelist = NULL; icmap_track_t icmap_track_quorum = NULL; icmap_track_t icmap_track_reload = NULL; ENTER(); icmap_track_add("nodelist.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, votequorum_refresh_config, NULL, &icmap_track_nodelist); icmap_track_add("quorum.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, votequorum_refresh_config, NULL, &icmap_track_quorum); icmap_track_add("config.totemconfig_reload_in_progress", ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY, votequorum_refresh_config, NULL, &icmap_track_reload); LEAVE(); } /* * votequorum_exec core */ static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value) { struct req_exec_quorum_reconfigure req_exec_quorum_reconfigure; struct iovec iov[1]; int ret; ENTER(); req_exec_quorum_reconfigure.nodeid = nodeid; req_exec_quorum_reconfigure.value = value; req_exec_quorum_reconfigure.param = param; req_exec_quorum_reconfigure._pad0 = 0; req_exec_quorum_reconfigure._pad1 = 0; req_exec_quorum_reconfigure._pad2 = 0; req_exec_quorum_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE); req_exec_quorum_reconfigure.header.size = sizeof(req_exec_quorum_reconfigure); iov[0].iov_base = (void *)&req_exec_quorum_reconfigure; iov[0].iov_len = sizeof(req_exec_quorum_reconfigure); ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); LEAVE(); return ret; } static int votequorum_exec_send_nodeinfo(uint32_t nodeid) { struct req_exec_quorum_nodeinfo req_exec_quorum_nodeinfo; struct iovec iov[1]; struct cluster_node *node; int ret; ENTER(); node = find_node_by_nodeid(nodeid); if (!node) { return -1; } req_exec_quorum_nodeinfo.nodeid = nodeid; req_exec_quorum_nodeinfo.votes = node->votes; req_exec_quorum_nodeinfo.expected_votes = node->expected_votes; req_exec_quorum_nodeinfo.flags = node->flags; if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { decode_flags(node->flags); } req_exec_quorum_nodeinfo.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO); req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo); iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo; iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo); ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); LEAVE(); return ret; } static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname) { struct req_exec_quorum_qdevice_reconfigure req_exec_quorum_qdevice_reconfigure; struct iovec iov[1]; int ret; ENTER(); req_exec_quorum_qdevice_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE); req_exec_quorum_qdevice_reconfigure.header.size = sizeof(req_exec_quorum_qdevice_reconfigure); strcpy(req_exec_quorum_qdevice_reconfigure.oldname, oldname); strcpy(req_exec_quorum_qdevice_reconfigure.newname, newname); iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure; iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure); ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); LEAVE(); return ret; } static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req) { struct req_exec_quorum_qdevice_reg req_exec_quorum_qdevice_reg; struct iovec iov[1]; int ret; ENTER(); req_exec_quorum_qdevice_reg.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG); req_exec_quorum_qdevice_reg.header.size = sizeof(req_exec_quorum_qdevice_reg); req_exec_quorum_qdevice_reg.operation = operation; strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req); iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg; iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg); ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); LEAVE(); return ret; } static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context) { struct res_lib_votequorum_quorum_notification *res_lib_votequorum_notification; struct list_head *tmp; struct cluster_node *node; int i = 0; int cluster_members = 0; int size; char buf[sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)]; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Sending quorum callback, quorate = %d", cluster_is_quorate); list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); cluster_members++; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { cluster_members++; } size = sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * cluster_members; res_lib_votequorum_notification = (struct res_lib_votequorum_quorum_notification *)&buf; res_lib_votequorum_notification->quorate = cluster_is_quorate; res_lib_votequorum_notification->context = context; res_lib_votequorum_notification->node_list_entries = cluster_members; res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION; res_lib_votequorum_notification->header.size = size; res_lib_votequorum_notification->header.error = CS_OK; /* Send all known nodes and their states */ list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); res_lib_votequorum_notification->node_list[i].nodeid = node->node_id; res_lib_votequorum_notification->node_list[i++].state = node->state; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID; res_lib_votequorum_notification->node_list[i++].state = qdevice->state; } /* Send it to all interested parties */ if (conn) { int ret = corosync_api->ipc_dispatch_send(conn, &buf, size); LEAVE(); return ret; } else { struct quorum_pd *qpd; list_iterate(tmp, &trackers_list) { qpd = list_entry(tmp, struct quorum_pd, list); res_lib_votequorum_notification->context = qpd->tracking_context; corosync_api->ipc_dispatch_send(qpd->conn, &buf, size); } } LEAVE(); return 0; } static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context) { struct res_lib_votequorum_nodelist_notification *res_lib_votequorum_notification; int i = 0; int size; struct list_head *tmp; char buf[sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries]; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Sending nodelist callback. ring_id = %d/%lld", quorum_ringid.rep.nodeid, quorum_ringid.seq); size = sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries; res_lib_votequorum_notification = (struct res_lib_votequorum_nodelist_notification *)&buf; res_lib_votequorum_notification->node_list_entries = quorum_members_entries; res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.rep.nodeid; res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq; res_lib_votequorum_notification->context = context; for (i=0; inode_list[i] = quorum_members[i]; } res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION; res_lib_votequorum_notification->header.size = size; res_lib_votequorum_notification->header.error = CS_OK; /* Send it to all interested parties */ if (conn) { int ret = corosync_api->ipc_dispatch_send(conn, &buf, size); LEAVE(); return ret; } else { struct quorum_pd *qpd; list_iterate(tmp, &trackers_list) { qpd = list_entry(tmp, struct quorum_pd, list); res_lib_votequorum_notification->context = qpd->tracking_context; corosync_api->ipc_dispatch_send(qpd->conn, &buf, size); } } LEAVE(); return 0; } static void votequorum_exec_send_expectedvotes_notification(void) { struct res_lib_votequorum_expectedvotes_notification res_lib_votequorum_expectedvotes_notification; struct quorum_pd *qpd; struct list_head *tmp; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback"); res_lib_votequorum_expectedvotes_notification.header.id = MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION; res_lib_votequorum_expectedvotes_notification.header.size = sizeof(res_lib_votequorum_expectedvotes_notification); res_lib_votequorum_expectedvotes_notification.header.error = CS_OK; res_lib_votequorum_expectedvotes_notification.expected_votes = us->expected_votes; list_iterate(tmp, &trackers_list) { qpd = list_entry(tmp, struct quorum_pd, list); res_lib_votequorum_expectedvotes_notification.context = qpd->tracking_context; corosync_api->ipc_dispatch_send(qpd->conn, &res_lib_votequorum_expectedvotes_notification, sizeof(struct res_lib_votequorum_expectedvotes_notification)); } LEAVE(); } static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message) { ENTER(); LEAVE(); } static void message_handler_req_exec_votequorum_qdevice_reconfigure ( const void *message, unsigned int nodeid) { const struct req_exec_quorum_qdevice_reconfigure *req_exec_quorum_qdevice_reconfigure = message; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node %u [from: %s to: %s]", nodeid, req_exec_quorum_qdevice_reconfigure->oldname, req_exec_quorum_qdevice_reconfigure->newname); if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) { log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename"); memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname); /* * TODO: notify qdevices about name change? * this is not relevant for now and can wait later on since * qdevices are local only and libvotequorum is not final */ } LEAVE(); } static void exec_votequorum_qdevice_reg_endian_convert (void *message) { struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message; ENTER(); req_exec_quorum_qdevice_reg->operation = swab32(req_exec_quorum_qdevice_reg->operation); LEAVE(); } static void message_handler_req_exec_votequorum_qdevice_reg ( const void *message, unsigned int nodeid) { const struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message; struct res_lib_votequorum_status res_lib_votequorum_status; int wipe_qdevice_name = 1; struct cluster_node *node = NULL; struct list_head *tmp; cs_error_t error = CS_OK; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node %u [%s]", req_exec_quorum_qdevice_reg->operation, nodeid, req_exec_quorum_qdevice_reg->qdevice_name); switch(req_exec_quorum_qdevice_reg->operation) { case VOTEQUORUM_QDEVICE_OPERATION_REGISTER: if (nodeid != us->node_id) { if (!strlen(qdevice_name)) { log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded"); strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name); } LEAVE(); return; } /* * protect against the case where we broadcast qdevice registration * to new memebers, we receive the message back, but there is no registration * connection in progress */ if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { LEAVE(); return; } /* * this should NEVER happen */ if (!qdevice_reg_conn) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!"); LEAVE(); return; } /* * registering our own device in this case */ if (!strlen(qdevice_name)) { strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name); } /* * check if it is our device or something else */ if ((!strncmp(req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) { us->flags |= NODE_FLAGS_QDEVICE_REGISTERED; votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); votequorum_exec_send_nodeinfo(us->node_id); } else { log_printf(LOGSYS_LEVEL_WARNING, "A new qdevice with different name (new: %s old: %s) is trying to register!", req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name); error = CS_ERR_EXIST; } res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); qdevice_reg_conn = NULL; break; case VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER: list_iterate(tmp, &cluster_members_list) { node = list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->flags & NODE_FLAGS_QDEVICE_REGISTERED)) { wipe_qdevice_name = 0; } } if (wipe_qdevice_name) { memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); } break; } LEAVE(); } static void exec_votequorum_nodeinfo_endian_convert (void *message) { struct req_exec_quorum_nodeinfo *nodeinfo = message; ENTER(); nodeinfo->nodeid = swab32(nodeinfo->nodeid); nodeinfo->votes = swab32(nodeinfo->votes); nodeinfo->expected_votes = swab32(nodeinfo->expected_votes); nodeinfo->flags = swab32(nodeinfo->flags); LEAVE(); } static void message_handler_req_exec_votequorum_nodeinfo ( const void *message, unsigned int sender_nodeid) { const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message; struct cluster_node *node = NULL; int old_votes; int old_expected; uint32_t old_flags; nodestate_t old_state; int new_node = 0; int allow_downgrade = 0; int by_node = 0; unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", sender_nodeid); log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d flags: %d", nodeid, req_exec_quorum_nodeinfo->votes, req_exec_quorum_nodeinfo->expected_votes, req_exec_quorum_nodeinfo->flags); if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { decode_flags(req_exec_quorum_nodeinfo->flags); } node = find_node_by_nodeid(nodeid); if (!node) { node = allocate_node(nodeid); new_node = 1; } if (!node) { corosync_api->error_memory_failure(); LEAVE(); return; } if (new_node) { old_votes = 0; old_expected = 0; old_state = NODESTATE_DEAD; old_flags = 0; } else { old_votes = node->votes; old_expected = node->expected_votes; old_state = node->state; old_flags = node->flags; } if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid); assert(sender_node != NULL); if ((!cluster_is_quorate) && (sender_node->flags & NODE_FLAGS_QUORATE)) { node->votes = req_exec_quorum_nodeinfo->votes; } else { node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes); } goto recalculate; } /* Update node state */ node->flags = req_exec_quorum_nodeinfo->flags; node->votes = req_exec_quorum_nodeinfo->votes; node->state = NODESTATE_MEMBER; if (node->flags & NODE_FLAGS_LEAVING) { node->state = NODESTATE_LEAVING; allow_downgrade = 1; by_node = 1; } if ((!cluster_is_quorate) && (node->flags & NODE_FLAGS_QUORATE)) { allow_downgrade = 1; us->expected_votes = req_exec_quorum_nodeinfo->expected_votes; } if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) { node->expected_votes = req_exec_quorum_nodeinfo->expected_votes; } else { node->expected_votes = us->expected_votes; } if ((last_man_standing) && (node->votes > 1)) { log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all" "cluster nodes votes are set to 1. Disabling LMS."); last_man_standing = 0; if (last_man_standing_timer_set) { corosync_api->timer_delete(last_man_standing_timer); last_man_standing_timer_set = 0; } } recalculate: if ((new_node) || (nodeid == us->node_id) || (node->flags & NODE_FLAGS_FIRST) || (old_votes != node->votes) || (old_expected != node->expected_votes) || (old_flags != node->flags) || (old_state != node->state)) { recalculate_quorum(allow_downgrade, by_node); } if ((wait_for_all) && (!(node->flags & NODE_FLAGS_WFASTATUS)) && (node->flags & NODE_FLAGS_QUORATE)) { update_wait_for_all_status(0); } LEAVE(); } static void exec_votequorum_reconfigure_endian_convert (void *message) { struct req_exec_quorum_reconfigure *reconfigure = message; ENTER(); reconfigure->nodeid = swab32(reconfigure->nodeid); reconfigure->value = swab32(reconfigure->value); LEAVE(); } static void message_handler_req_exec_votequorum_reconfigure ( const void *message, unsigned int nodeid) { const struct req_exec_quorum_reconfigure *req_exec_quorum_reconfigure = message; struct cluster_node *node; struct list_head *nodelist; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node %u for %u", nodeid, req_exec_quorum_reconfigure->nodeid); switch(req_exec_quorum_reconfigure->param) { case VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES: list_iterate(nodelist, &cluster_members_list) { node = list_entry(nodelist, struct cluster_node, list); if (node->state == NODESTATE_MEMBER) { node->expected_votes = req_exec_quorum_reconfigure->value; } } votequorum_exec_send_expectedvotes_notification(); update_ev_barrier(req_exec_quorum_reconfigure->value); if (ev_tracking) { us->expected_votes = max(us->expected_votes, ev_tracking_barrier); } recalculate_quorum(1, 0); /* Allow decrease */ break; case VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES: node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid); if (!node) { LEAVE(); return; } node->votes = req_exec_quorum_reconfigure->value; recalculate_quorum(1, 0); /* Allow decrease */ break; case VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA: update_wait_for_all_status(0); log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node %d.", req_exec_quorum_reconfigure->nodeid); recalculate_quorum(0, 0); break; } LEAVE(); } static int votequorum_exec_exit_fn (void) { int ret = 0; ENTER(); /* * tell the other nodes we are leaving */ if (allow_downscale) { us->flags |= NODE_FLAGS_LEAVING; ret = votequorum_exec_send_nodeinfo(us->node_id); } if ((ev_tracking) && (ev_tracking_fd != -1)) { close(ev_tracking_fd); } LEAVE(); return ret; } static void votequorum_set_icmap_ro_keys(void) { icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE); } static char *votequorum_exec_init_fn (struct corosync_api_v1 *api) { char *error = NULL; ENTER(); /* * make sure we start clean */ list_init(&cluster_members_list); list_init(&trackers_list); qdevice = NULL; us = NULL; memset(cluster_nodes, 0, sizeof(cluster_nodes)); /* * Allocate a cluster_node for qdevice */ qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID); if (!qdevice) { LEAVE(); return ((char *)"Could not allocate node."); } qdevice->votes = 0; memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); /* * Allocate a cluster_node for us */ us = allocate_node(corosync_api->totem_nodeid_get()); if (!us) { LEAVE(); return ((char *)"Could not allocate node."); } icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id); us->state = NODESTATE_MEMBER; us->votes = 1; us->flags |= NODE_FLAGS_FIRST; error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP); if (error) { return error; } recalculate_quorum(0, 0); /* * Set RO keys in icmap */ votequorum_set_icmap_ro_keys(); /* * Listen for changes */ votequorum_exec_add_config_notification(); /* * Start us off with one node */ votequorum_exec_send_nodeinfo(us->node_id); LEAVE(); return (NULL); } /* * votequorum service core */ static void votequorum_last_man_standing_timer_fn(void *arg) { ENTER(); last_man_standing_timer_set = 0; if (cluster_is_quorate) { recalculate_quorum(1,1); } LEAVE(); } static void votequorum_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { int i, j; int found; int left_nodes; struct cluster_node *node; ENTER(); sync_in_progress = 1; sync_nodeinfo_sent = 0; sync_wait_for_poll_or_timeout = 0; if (member_list_entries > 1) { us->flags &= ~NODE_FLAGS_FIRST; } /* * we don't need to track which nodes have left directly, * since that info is in the node db, but we need to know * if somebody has left for last_man_standing */ left_nodes = 0; for (i = 0; i < quorum_members_entries; i++) { found = 0; for (j = 0; j < member_list_entries; j++) { if (quorum_members[i] == member_list[j]) { found = 1; break; } } if (found == 0) { left_nodes = 1; node = find_node_by_nodeid(quorum_members[i]); if (node) { node->state = NODESTATE_DEAD; } } } if (last_man_standing) { if (((member_list_entries >= quorum) && (left_nodes)) || ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) { if (last_man_standing_timer_set) { corosync_api->timer_delete(last_man_standing_timer); last_man_standing_timer_set = 0; } corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000, NULL, votequorum_last_man_standing_timer_fn, &last_man_standing_timer); last_man_standing_timer_set = 1; } } memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries); previous_quorum_members_entries = quorum_members_entries; memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries); quorum_members_entries = member_list_entries; memcpy(&quorum_ringid, ring_id, sizeof(*ring_id)); if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && us->flags & NODE_FLAGS_QDEVICE_ALIVE) { /* * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout */ if (qdevice_timer_set) { corosync_api->timer_delete(qdevice_timer); } corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice, qdevice_timer_fn, &qdevice_timer); qdevice_timer_set = 1; sync_wait_for_poll_or_timeout = 1; log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)", qdevice_name, qdevice_sync_timeout); } LEAVE(); } static int votequorum_sync_process (void) { if (!sync_nodeinfo_sent) { votequorum_exec_send_nodeinfo(us->node_id); votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); if (strlen(qdevice_name)) { votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER, qdevice_name); } votequorum_exec_send_nodelist_notification(NULL, 0LL); sync_nodeinfo_sent = 1; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) { /* * Waiting for qdevice to poll with new ringid or timeout */ return (-1); } return 0; } static void votequorum_sync_activate (void) { recalculate_quorum(0, 0); quorum_callback(quorum_members, quorum_members_entries, cluster_is_quorate, &quorum_ringid); votequorum_exec_send_quorum_notification(NULL, 0L); sync_in_progress = 0; } static void votequorum_sync_abort (void) { } char *votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn) { char *error; ENTER(); if (q_set_quorate_fn == NULL) { return ((char *)"Quorate function not set"); } corosync_api = api; quorum_callback = q_set_quorate_fn; error = corosync_service_link_and_init(corosync_api, &votequorum_service[0]); if (error) { return (error); } LEAVE(); return (NULL); } /* * Library Handler init/fini */ static int quorum_lib_init_fn (void *conn) { struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); ENTER(); list_init (&pd->list); pd->conn = conn; LEAVE(); return (0); } static int quorum_lib_exit_fn (void *conn) { struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); ENTER(); if (quorum_pd->tracking_enabled) { list_del (&quorum_pd->list); list_init (&quorum_pd->list); } LEAVE(); return (0); } /* * library internal functions */ static void qdevice_timer_fn(void *arg) { ENTER(); if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) || (!qdevice_timer_set)) { LEAVE(); return; } us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE; us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name); votequorum_exec_send_nodeinfo(us->node_id); qdevice_timer_set = 0; sync_wait_for_poll_or_timeout = 0; LEAVE(); } /* * Library Handler Functions */ static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message) { const struct req_lib_votequorum_getinfo *req_lib_votequorum_getinfo = message; struct res_lib_votequorum_getinfo res_lib_votequorum_getinfo; struct cluster_node *node; unsigned int highest_expected = 0; unsigned int total_votes = 0; cs_error_t error = CS_OK; uint32_t nodeid = req_lib_votequorum_getinfo->nodeid; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node %u", conn, req_lib_votequorum_getinfo->nodeid); if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { nodeid = us->node_id; } node = find_node_by_nodeid(nodeid); if (node) { struct cluster_node *iternode; struct list_head *nodelist; list_iterate(nodelist, &cluster_members_list) { iternode = list_entry(nodelist, struct cluster_node, list); if (iternode->state == NODESTATE_MEMBER) { highest_expected = max(highest_expected, iternode->expected_votes); total_votes += iternode->votes; } } if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { total_votes += qdevice->votes; } switch(node->state) { case NODESTATE_MEMBER: res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_MEMBER; break; case NODESTATE_DEAD: res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_DEAD; break; case NODESTATE_LEAVING: res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_LEAVING; break; default: res_lib_votequorum_getinfo.state = node->state; break; } res_lib_votequorum_getinfo.state = node->state; res_lib_votequorum_getinfo.votes = node->votes; res_lib_votequorum_getinfo.expected_votes = node->expected_votes; res_lib_votequorum_getinfo.highest_expected = highest_expected; res_lib_votequorum_getinfo.quorum = quorum; res_lib_votequorum_getinfo.total_votes = total_votes; res_lib_votequorum_getinfo.flags = 0; res_lib_votequorum_getinfo.nodeid = node->node_id; if (two_node) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_TWONODE; } if (cluster_is_quorate) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QUORATE; } if (wait_for_all) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_WAIT_FOR_ALL; } if (last_man_standing) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LAST_MAN_STANDING; } if (auto_tie_breaker != ATB_NONE) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER; } if (allow_downscale) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_ALLOW_DOWNSCALE; } memset(res_lib_votequorum_getinfo.qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name); res_lib_votequorum_getinfo.qdevice_votes = qdevice->votes; if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_REGISTERED; } if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_ALIVE; } if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_CAST_VOTE; } if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_MASTER_WINS; } } else { error = CS_ERR_NOT_EXIST; } res_lib_votequorum_getinfo.header.size = sizeof(res_lib_votequorum_getinfo); res_lib_votequorum_getinfo.header.id = MESSAGE_RES_VOTEQUORUM_GETINFO; res_lib_votequorum_getinfo.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_getinfo, sizeof(res_lib_votequorum_getinfo)); log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error); LEAVE(); } static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message) { const struct req_lib_votequorum_setexpected *req_lib_votequorum_setexpected = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; unsigned int newquorum; unsigned int total_votes; uint8_t allow_downscale_status = 0; ENTER(); allow_downscale_status = allow_downscale; allow_downscale = 0; /* * Validate new expected votes */ newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes); allow_downscale = allow_downscale_status; if (newquorum < total_votes / 2 || newquorum > total_votes) { error = CS_ERR_INVALID_PARAM; goto error_exit; } votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id, req_lib_votequorum_setexpected->expected_votes); error_exit: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message) { const struct req_lib_votequorum_setvotes *req_lib_votequorum_setvotes = message; struct res_lib_votequorum_status res_lib_votequorum_status; struct cluster_node *node; unsigned int newquorum; unsigned int total_votes; unsigned int saved_votes; cs_error_t error = CS_OK; unsigned int nodeid; ENTER(); nodeid = req_lib_votequorum_setvotes->nodeid; node = find_node_by_nodeid(nodeid); if (!node) { error = CS_ERR_NAME_NOT_FOUND; goto error_exit; } /* * Check votes is valid */ saved_votes = node->votes; node->votes = req_lib_votequorum_setvotes->votes; newquorum = calculate_quorum(1, 0, &total_votes); if (newquorum < total_votes / 2 || newquorum > total_votes) { node->votes = saved_votes; error = CS_ERR_INVALID_PARAM; goto error_exit; } votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid, req_lib_votequorum_setvotes->votes); error_exit: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_trackstart (void *conn, const void *message) { const struct req_lib_votequorum_trackstart *req_lib_votequorum_trackstart = message; struct res_lib_votequorum_status res_lib_votequorum_status; struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); cs_error_t error = CS_OK; ENTER(); /* * If an immediate listing of the current cluster membership * is requested, generate membership list */ if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CURRENT || req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES) { log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn); votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->context); votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context); } if (quorum_pd->tracking_enabled) { error = CS_ERR_EXIST; goto response_send; } /* * Record requests for tracking */ if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES || req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) { quorum_pd->track_flags = req_lib_votequorum_trackstart->track_flags; quorum_pd->tracking_enabled = 1; quorum_pd->tracking_context = req_lib_votequorum_trackstart->context; list_add (&quorum_pd->list, &trackers_list); } response_send: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_trackstop (void *conn, const void *message) { struct res_lib_votequorum_status res_lib_votequorum_status; struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); int error = CS_OK; ENTER(); if (quorum_pd->tracking_enabled) { error = CS_OK; quorum_pd->tracking_enabled = 0; list_del (&quorum_pd->list); list_init (&quorum_pd->list); } else { error = CS_ERR_NOT_EXIST; } res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_register (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_register *req_lib_votequorum_qdevice_register = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; ENTER(); if (!qdevice_can_operate) { log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information"); error = CS_ERR_ACCESS; goto out; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if ((!strncmp(req_lib_votequorum_qdevice_register->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) { goto out; } else { log_printf(LOGSYS_LEVEL_WARNING, "A new qdevice with different name (new: %s old: %s) is trying to re-register!", req_lib_votequorum_qdevice_register->name, qdevice_name); error = CS_ERR_EXIST; goto out; } } else { if (qdevice_reg_conn != NULL) { log_printf(LOGSYS_LEVEL_WARNING, "Registration request already in progress"); error = CS_ERR_TRY_AGAIN; goto out; } qdevice_reg_conn = conn; if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER, req_lib_votequorum_qdevice_register->name) != 0) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to send qdevice registration request to cluster"); error = CS_ERR_TRY_AGAIN; qdevice_reg_conn = NULL; } else { LEAVE(); return; } } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_unregister *req_lib_votequorum_qdevice_unregister = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; ENTER(); if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if (strncmp(req_lib_votequorum_qdevice_unregister->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { error = CS_ERR_INVALID_PARAM; goto out; } if (qdevice_timer_set) { corosync_api->timer_delete(qdevice_timer); qdevice_timer_set = 0; sync_wait_for_poll_or_timeout = 0; } us->flags &= ~NODE_FLAGS_QDEVICE_REGISTERED; us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE; us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS; votequorum_exec_send_nodeinfo(us->node_id); votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER, req_lib_votequorum_qdevice_unregister->name); } else { error = CS_ERR_NOT_EXIST; } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_update (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_update *req_lib_votequorum_qdevice_update = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; ENTER(); if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if (strncmp(req_lib_votequorum_qdevice_update->oldname, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { error = CS_ERR_INVALID_PARAM; goto out; } votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname, req_lib_votequorum_qdevice_update->newname); } else { error = CS_ERR_NOT_EXIST; } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_poll (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_poll *req_lib_votequorum_qdevice_poll = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; uint32_t oldflags; ENTER(); if (!qdevice_can_operate) { error = CS_ERR_ACCESS; goto out; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.rep.nodeid && req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) { log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (%u.%"PRIu64") != last sync " "ring id (%u.%"PRIu64"). Ignoring poll call.", req_lib_votequorum_qdevice_poll->ring_id.nodeid, req_lib_votequorum_qdevice_poll->ring_id.seq, quorum_ringid.rep.nodeid, quorum_ringid.seq); error = CS_ERR_MESSAGE_ERROR; goto out; } if (strncmp(req_lib_votequorum_qdevice_poll->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { error = CS_ERR_INVALID_PARAM; goto out; } if (qdevice_timer_set) { corosync_api->timer_delete(qdevice_timer); qdevice_timer_set = 0; } oldflags = us->flags; us->flags |= NODE_FLAGS_QDEVICE_ALIVE; if (req_lib_votequorum_qdevice_poll->cast_vote) { us->flags |= NODE_FLAGS_QDEVICE_CAST_VOTE; } else { us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; } if (us->flags != oldflags) { votequorum_exec_send_nodeinfo(us->node_id); } corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice, qdevice_timer_fn, &qdevice_timer); qdevice_timer_set = 1; sync_wait_for_poll_or_timeout = 0; } else { error = CS_ERR_NOT_EXIST; } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_master_wins *req_lib_votequorum_qdevice_master_wins = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; uint32_t oldflags = us->flags; ENTER(); if (!qdevice_can_operate) { error = CS_ERR_ACCESS; goto out; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if (strncmp(req_lib_votequorum_qdevice_master_wins->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { error = CS_ERR_INVALID_PARAM; goto out; } if (req_lib_votequorum_qdevice_master_wins->allow) { us->flags |= NODE_FLAGS_QDEVICE_MASTER_WINS; } else { us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS; } if (us->flags != oldflags) { votequorum_exec_send_nodeinfo(us->node_id); } update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow); } else { error = CS_ERR_NOT_EXIST; } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } diff --git a/lib/cfg.c b/lib/cfg.c index 7bb56de9..abf08bf0 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -1,661 +1,665 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. * Copyright (c) 2006-2013 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" /* * Data structure for instance data */ struct cfg_inst { qb_ipcc_connection_t *c; corosync_cfg_callbacks_t callbacks; cs_name_t comp_name; int comp_registered; int finalize; }; /* * All instances in one database */ static void cfg_inst_free (void *inst); DECLARE_HDB_DATABASE (cfg_hdb, cfg_inst_free); /* * Implementation */ cs_error_t corosync_cfg_initialize ( corosync_cfg_handle_t *cfg_handle, const corosync_cfg_callbacks_t *cfg_callbacks) { struct cfg_inst *cfg_inst; cs_error_t error = CS_OK; error = hdb_error_to_cs (hdb_handle_create (&cfg_hdb, sizeof (struct cfg_inst), cfg_handle)); if (error != CS_OK) { goto error_no_destroy; } error = hdb_error_to_cs (hdb_handle_get (&cfg_hdb, *cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { goto error_destroy; } cfg_inst->finalize = 0; cfg_inst->c = qb_ipcc_connect ("cfg", IPC_REQUEST_SIZE); if (cfg_inst->c == NULL) { error = qb_to_cs_error(-errno); goto error_put_destroy; } if (cfg_callbacks) { memcpy (&cfg_inst->callbacks, cfg_callbacks, sizeof (corosync_cfg_callbacks_t)); } (void)hdb_handle_put (&cfg_hdb, *cfg_handle); return (CS_OK); error_put_destroy: (void)hdb_handle_put (&cfg_hdb, *cfg_handle); error_destroy: (void)hdb_handle_destroy (&cfg_hdb, *cfg_handle); error_no_destroy: return (error); } cs_error_t corosync_cfg_fd_get ( corosync_cfg_handle_t cfg_handle, int32_t *selection_fd) { struct cfg_inst *cfg_inst; cs_error_t error; error = hdb_error_to_cs (hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } error = qb_to_cs_error (qb_ipcc_fd_get (cfg_inst->c, selection_fd)); (void)hdb_handle_put (&cfg_hdb, cfg_handle); return (error); } cs_error_t corosync_cfg_dispatch ( corosync_cfg_handle_t cfg_handle, cs_dispatch_flags_t dispatch_flags) { int timeout = -1; cs_error_t error; int cont = 1; /* always continue do loop except when set to 0 */ struct cfg_inst *cfg_inst; struct res_lib_cfg_testshutdown *res_lib_cfg_testshutdown; corosync_cfg_callbacks_t callbacks; struct qb_ipc_response_header *dispatch_data; char dispatch_buf[IPC_DISPATCH_SIZE]; error = hdb_error_to_cs (hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } /* * Timeout instantly for CS_DISPATCH_ONE_NONBLOCKING or CS_DISPATCH_ALL and * wait indefinately for CS_DISPATCH_ONE or CS_DISPATCH_BLOCKING */ if (dispatch_flags == CS_DISPATCH_ALL || dispatch_flags == CS_DISPATCH_ONE_NONBLOCKING) { timeout = 0; } dispatch_data = (struct qb_ipc_response_header *)dispatch_buf; do { error = qb_to_cs_error (qb_ipcc_event_recv ( cfg_inst->c, dispatch_buf, IPC_DISPATCH_SIZE, timeout)); if (error == CS_ERR_BAD_HANDLE) { error = CS_OK; goto error_put; } if (error == CS_ERR_TRY_AGAIN) { if (dispatch_flags == CS_DISPATCH_ONE_NONBLOCKING) { /* * Don't mask error */ goto error_put; } error = CS_OK; if (dispatch_flags == CS_DISPATCH_ALL) { break; /* exit do while cont is 1 loop */ } else { continue; /* next poll */ } } if (error != CS_OK) { goto error_put; } /* * Make copy of callbacks, message data, unlock instance, and call callback * A risk of this dispatch method is that the callback routines may * operate at the same time that cfgFinalize has been called in another thread. */ memcpy (&callbacks, &cfg_inst->callbacks, sizeof (corosync_cfg_callbacks_t)); /* * Dispatch incoming response */ switch (dispatch_data->id) { case MESSAGE_RES_CFG_TESTSHUTDOWN: if (callbacks.corosync_cfg_shutdown_callback == NULL) { break; } res_lib_cfg_testshutdown = (struct res_lib_cfg_testshutdown *)dispatch_data; callbacks.corosync_cfg_shutdown_callback(cfg_handle, res_lib_cfg_testshutdown->flags); break; default: error = CS_ERR_LIBRARY; goto error_nounlock; break; } if (cfg_inst->finalize) { /* * If the finalize has been called then get out of the dispatch. */ error = CS_ERR_BAD_HANDLE; goto error_put; } /* * Determine if more messages should be processed */ if (dispatch_flags == CS_DISPATCH_ONE || dispatch_flags == CS_DISPATCH_ONE_NONBLOCKING) { cont = 0; } } while (cont); error_put: (void)hdb_handle_put (&cfg_hdb, cfg_handle); error_nounlock: return (error); } static void cfg_inst_free (void *inst) { struct cfg_inst *cfg_inst = (struct cfg_inst *)inst; qb_ipcc_disconnect(cfg_inst->c); } cs_error_t corosync_cfg_finalize ( corosync_cfg_handle_t cfg_handle) { struct cfg_inst *cfg_inst; cs_error_t error; error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } /* * Another thread has already started finalizing */ if (cfg_inst->finalize) { (void)hdb_handle_put (&cfg_hdb, cfg_handle); return (CS_ERR_BAD_HANDLE); } cfg_inst->finalize = 1; (void)hdb_handle_destroy (&cfg_hdb, cfg_handle); (void)hdb_handle_put (&cfg_hdb, cfg_handle); return (error); } cs_error_t corosync_cfg_ring_status_get ( corosync_cfg_handle_t cfg_handle, char ***interface_names, char ***status, unsigned int *interface_count) { struct cfg_inst *cfg_inst; struct req_lib_cfg_ringstatusget req_lib_cfg_ringstatusget; struct res_lib_cfg_ringstatusget res_lib_cfg_ringstatusget; unsigned int i, j; cs_error_t error; struct iovec iov; error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } req_lib_cfg_ringstatusget.header.size = sizeof (struct req_lib_cfg_ringstatusget); req_lib_cfg_ringstatusget.header.id = MESSAGE_REQ_CFG_RINGSTATUSGET; iov.iov_base = (void *)&req_lib_cfg_ringstatusget, iov.iov_len = sizeof (struct req_lib_cfg_ringstatusget), error = qb_to_cs_error (qb_ipcc_sendv_recv(cfg_inst->c, &iov, 1, &res_lib_cfg_ringstatusget, sizeof (struct res_lib_cfg_ringstatusget), CS_IPC_TIMEOUT_MS)); + if (error != CS_OK) { + goto exit_handle_put; + } + *interface_count = res_lib_cfg_ringstatusget.interface_count; *interface_names = malloc (sizeof (char *) * *interface_count); if (*interface_names == NULL) { return (CS_ERR_NO_MEMORY); } memset (*interface_names, 0, sizeof (char *) * *interface_count); *status = malloc (sizeof (char *) * *interface_count); if (*status == NULL) { error = CS_ERR_NO_MEMORY; goto error_free_interface_names_array; } memset (*status, 0, sizeof (char *) * *interface_count); for (i = 0; i < res_lib_cfg_ringstatusget.interface_count; i++) { (*(interface_names))[i] = strdup (res_lib_cfg_ringstatusget.interface_name[i]); if ((*(interface_names))[i] == NULL) { error = CS_ERR_NO_MEMORY; goto error_free_interface_names; } } for (i = 0; i < res_lib_cfg_ringstatusget.interface_count; i++) { (*(status))[i] = strdup (res_lib_cfg_ringstatusget.interface_status[i]); if ((*(status))[i] == NULL) { error = CS_ERR_NO_MEMORY; goto error_free_status; } } - goto no_error; + goto exit_handle_put; error_free_status: for (j = 0; j < i; j++) { free ((*(status))[j]); } i = *interface_count; error_free_interface_names: for (j = 0; j < i; j++) { free ((*(interface_names))[j]); } - + free (*status); error_free_interface_names_array: free (*interface_names); -no_error: +exit_handle_put: (void)hdb_handle_put (&cfg_hdb, cfg_handle); return (error); } cs_error_t corosync_cfg_ring_reenable ( corosync_cfg_handle_t cfg_handle) { struct cfg_inst *cfg_inst; struct req_lib_cfg_ringreenable req_lib_cfg_ringreenable; struct res_lib_cfg_ringreenable res_lib_cfg_ringreenable; cs_error_t error; struct iovec iov; error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } req_lib_cfg_ringreenable.header.size = sizeof (struct req_lib_cfg_ringreenable); req_lib_cfg_ringreenable.header.id = MESSAGE_REQ_CFG_RINGREENABLE; iov.iov_base = (void *)&req_lib_cfg_ringreenable, iov.iov_len = sizeof (struct req_lib_cfg_ringreenable); error = qb_to_cs_error (qb_ipcc_sendv_recv (cfg_inst->c, &iov, 1, &res_lib_cfg_ringreenable, sizeof (struct res_lib_cfg_ringreenable), CS_IPC_TIMEOUT_MS)); (void)hdb_handle_put (&cfg_hdb, cfg_handle); return (error); } cs_error_t corosync_cfg_kill_node ( corosync_cfg_handle_t cfg_handle, unsigned int nodeid, const char *reason) { struct cfg_inst *cfg_inst; struct req_lib_cfg_killnode req_lib_cfg_killnode; struct res_lib_cfg_killnode res_lib_cfg_killnode; cs_error_t error; struct iovec iov; if (strlen(reason) >= CS_MAX_NAME_LENGTH) return CS_ERR_NAME_TOO_LONG; error = hdb_error_to_cs (hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } req_lib_cfg_killnode.header.id = MESSAGE_REQ_CFG_KILLNODE; req_lib_cfg_killnode.header.size = sizeof (struct req_lib_cfg_killnode); req_lib_cfg_killnode.nodeid = nodeid; strcpy((char *)req_lib_cfg_killnode.reason.value, reason); req_lib_cfg_killnode.reason.length = strlen(reason)+1; iov.iov_base = (void *)&req_lib_cfg_killnode; iov.iov_len = sizeof (struct req_lib_cfg_killnode); error = qb_to_cs_error (qb_ipcc_sendv_recv (cfg_inst->c, &iov, 1, &res_lib_cfg_killnode, sizeof (struct res_lib_cfg_killnode), CS_IPC_TIMEOUT_MS)); error = res_lib_cfg_killnode.header.error; (void)hdb_handle_put (&cfg_hdb, cfg_handle); return (error == CS_OK ? res_lib_cfg_killnode.header.error : error); } cs_error_t corosync_cfg_try_shutdown ( corosync_cfg_handle_t cfg_handle, corosync_cfg_shutdown_flags_t flags) { struct cfg_inst *cfg_inst; struct req_lib_cfg_tryshutdown req_lib_cfg_tryshutdown; struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; cs_error_t error; struct iovec iov; error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } req_lib_cfg_tryshutdown.header.id = MESSAGE_REQ_CFG_TRYSHUTDOWN; req_lib_cfg_tryshutdown.header.size = sizeof (struct req_lib_cfg_tryshutdown); req_lib_cfg_tryshutdown.flags = flags; iov.iov_base = (void *)&req_lib_cfg_tryshutdown; iov.iov_len = sizeof (req_lib_cfg_tryshutdown); error = qb_to_cs_error (qb_ipcc_sendv_recv (cfg_inst->c, &iov, 1, &res_lib_cfg_tryshutdown, sizeof (struct res_lib_cfg_tryshutdown), CS_IPC_TIMEOUT_MS)); (void)hdb_handle_put (&cfg_hdb, cfg_handle); return (error == CS_OK ? res_lib_cfg_tryshutdown.header.error : error); } cs_error_t corosync_cfg_replyto_shutdown ( corosync_cfg_handle_t cfg_handle, corosync_cfg_shutdown_reply_flags_t response) { struct cfg_inst *cfg_inst; struct req_lib_cfg_replytoshutdown req_lib_cfg_replytoshutdown; struct res_lib_cfg_replytoshutdown res_lib_cfg_replytoshutdown; struct iovec iov; cs_error_t error; error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } req_lib_cfg_replytoshutdown.header.id = MESSAGE_REQ_CFG_REPLYTOSHUTDOWN; req_lib_cfg_replytoshutdown.header.size = sizeof (struct req_lib_cfg_replytoshutdown); req_lib_cfg_replytoshutdown.response = response; iov.iov_base = (void *)&req_lib_cfg_replytoshutdown; iov.iov_len = sizeof (struct req_lib_cfg_replytoshutdown); error = qb_to_cs_error (qb_ipcc_sendv_recv (cfg_inst->c, &iov, 1, &res_lib_cfg_replytoshutdown, sizeof (struct res_lib_cfg_replytoshutdown), CS_IPC_TIMEOUT_MS)); return (error); } cs_error_t corosync_cfg_get_node_addrs ( corosync_cfg_handle_t cfg_handle, int nodeid, size_t max_addrs, int *num_addrs, corosync_cfg_node_address_t *addrs) { cs_error_t error; struct req_lib_cfg_get_node_addrs req_lib_cfg_get_node_addrs; struct res_lib_cfg_get_node_addrs *res_lib_cfg_get_node_addrs; struct cfg_inst *cfg_inst; int addrlen = 0; int i; struct iovec iov; const char *addr_buf; char response_buf[IPC_RESPONSE_SIZE]; error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } req_lib_cfg_get_node_addrs.header.size = sizeof (req_lib_cfg_get_node_addrs); req_lib_cfg_get_node_addrs.header.id = MESSAGE_REQ_CFG_GET_NODE_ADDRS; req_lib_cfg_get_node_addrs.nodeid = nodeid; iov.iov_base = (char *)&req_lib_cfg_get_node_addrs; iov.iov_len = sizeof (req_lib_cfg_get_node_addrs); error = qb_to_cs_error (qb_ipcc_sendv_recv ( cfg_inst->c, &iov, 1, response_buf, IPC_RESPONSE_SIZE, CS_IPC_TIMEOUT_MS)); res_lib_cfg_get_node_addrs = (struct res_lib_cfg_get_node_addrs *)response_buf; if (error != CS_OK) { goto error_put; } if (res_lib_cfg_get_node_addrs->family == AF_INET) addrlen = sizeof(struct sockaddr_in); if (res_lib_cfg_get_node_addrs->family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); for (i = 0, addr_buf = (char *)res_lib_cfg_get_node_addrs->addrs; i < max_addrs && inum_addrs; i++, addr_buf += TOTEMIP_ADDRLEN) { struct sockaddr_in *in; struct sockaddr_in6 *in6; addrs[i].address_length = addrlen; if (res_lib_cfg_get_node_addrs->family == AF_INET) { in = (struct sockaddr_in *)addrs[i].address; in->sin_family = AF_INET; memcpy(&in->sin_addr, addr_buf, sizeof(struct in_addr)); } if (res_lib_cfg_get_node_addrs->family == AF_INET6) { in6 = (struct sockaddr_in6 *)addrs[i].address; in6->sin6_family = AF_INET6; memcpy(&in6->sin6_addr, addr_buf, sizeof(struct in6_addr)); } } *num_addrs = res_lib_cfg_get_node_addrs->num_addrs; errno = error = res_lib_cfg_get_node_addrs->header.error; error_put: hdb_handle_put (&cfg_hdb, cfg_handle); return (error); } cs_error_t corosync_cfg_local_get ( corosync_cfg_handle_t handle, unsigned int *local_nodeid) { cs_error_t error; struct cfg_inst *cfg_inst; struct iovec iov; struct req_lib_cfg_local_get req_lib_cfg_local_get; struct res_lib_cfg_local_get res_lib_cfg_local_get; error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } req_lib_cfg_local_get.header.size = sizeof (struct qb_ipc_request_header); req_lib_cfg_local_get.header.id = MESSAGE_REQ_CFG_LOCAL_GET; iov.iov_base = (void *)&req_lib_cfg_local_get; iov.iov_len = sizeof (struct req_lib_cfg_local_get); error = qb_to_cs_error (qb_ipcc_sendv_recv ( cfg_inst->c, &iov, 1, &res_lib_cfg_local_get, sizeof (struct res_lib_cfg_local_get), CS_IPC_TIMEOUT_MS)); if (error != CS_OK) { goto error_exit; } error = res_lib_cfg_local_get.header.error; *local_nodeid = res_lib_cfg_local_get.local_nodeid; error_exit: (void)hdb_handle_put (&cfg_hdb, handle); return (error); } cs_error_t corosync_cfg_reload_config ( corosync_cfg_handle_t handle) { cs_error_t error; struct cfg_inst *cfg_inst; struct iovec iov; struct req_lib_cfg_reload_config req_lib_cfg_reload_config; struct res_lib_cfg_reload_config res_lib_cfg_reload_config; error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, handle, (void *)&cfg_inst)); if (error != CS_OK) { return (error); } req_lib_cfg_reload_config.header.size = sizeof (struct qb_ipc_request_header); req_lib_cfg_reload_config.header.id = MESSAGE_REQ_CFG_RELOAD_CONFIG; iov.iov_base = (void *)&req_lib_cfg_reload_config; iov.iov_len = sizeof (struct req_lib_cfg_reload_config); error = qb_to_cs_error (qb_ipcc_sendv_recv ( cfg_inst->c, &iov, 1, &res_lib_cfg_reload_config, sizeof (struct res_lib_cfg_reload_config), CS_IPC_TIMEOUT_MS)); if (error != CS_OK) { goto error_exit; } error = res_lib_cfg_reload_config.header.error; error_exit: (void)hdb_handle_put (&cfg_hdb, handle); return (error); } diff --git a/lib/cpg.c b/lib/cpg.c index c101487f..c831390b 100644 --- a/lib/cpg.c +++ b/lib/cpg.c @@ -1,1369 +1,1371 @@ /* * vi: set autoindent tabstop=4 shiftwidth=4 : * * Copyright (c) 2006-2015 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield (ccaulfi@redhat.com) * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * Provides a closed process group API using the coroipcc executive */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif /* * Maximum number of times to retry a send when transmitting * a large message fragment */ #define MAX_RETRIES 100 /* * ZCB files have following umask (umask is same as used in libqb) */ #define CPG_MEMORY_MAP_UMASK 077 struct cpg_inst { qb_ipcc_connection_t *c; int finalize; void *context; union { cpg_model_data_t model_data; cpg_model_v1_data_t model_v1_data; }; struct list_head iteration_list_head; uint32_t max_msg_size; char *assembly_buf; uint32_t assembly_buf_ptr; int assembling; /* Flag that says we have started assembling a message. * It's here to catch the situation where a node joins * the cluster/group in the middle of a CPG message send * so we don't pass on a partial message to the client. */ }; static void cpg_inst_free (void *inst); DECLARE_HDB_DATABASE(cpg_handle_t_db, cpg_inst_free); struct cpg_iteration_instance_t { cpg_iteration_handle_t cpg_iteration_handle; qb_ipcc_connection_t *conn; hdb_handle_t executive_iteration_handle; struct list_head list; }; DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL); /* * Internal (not visible by API) functions */ static cs_error_t coroipcc_msg_send_reply_receive ( qb_ipcc_connection_t *c, const struct iovec *iov, unsigned int iov_len, void *res_msg, size_t res_len) { return qb_to_cs_error(qb_ipcc_sendv_recv(c, iov, iov_len, res_msg, res_len, CS_IPC_TIMEOUT_MS)); } static void cpg_iteration_instance_finalize (struct cpg_iteration_instance_t *cpg_iteration_instance) { list_del (&cpg_iteration_instance->list); hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->cpg_iteration_handle); } static void cpg_inst_free (void *inst) { struct cpg_inst *cpg_inst = (struct cpg_inst *)inst; qb_ipcc_disconnect(cpg_inst->c); } static void cpg_inst_finalize (struct cpg_inst *cpg_inst, hdb_handle_t handle) { struct list_head *iter, *iter_next; struct cpg_iteration_instance_t *cpg_iteration_instance; /* * Traverse thru iteration instances and delete them */ for (iter = cpg_inst->iteration_list_head.next; iter != &cpg_inst->iteration_list_head;iter = iter_next) { iter_next = iter->next; cpg_iteration_instance = list_entry (iter, struct cpg_iteration_instance_t, list); cpg_iteration_instance_finalize (cpg_iteration_instance); } hdb_handle_destroy (&cpg_handle_t_db, handle); } /** * @defgroup cpg_coroipcc The closed process group API * @ingroup coroipcc * * @{ */ cs_error_t cpg_initialize ( cpg_handle_t *handle, cpg_callbacks_t *callbacks) { cpg_model_v1_data_t model_v1_data; memset (&model_v1_data, 0, sizeof (cpg_model_v1_data_t)); if (callbacks) { model_v1_data.cpg_deliver_fn = callbacks->cpg_deliver_fn; model_v1_data.cpg_confchg_fn = callbacks->cpg_confchg_fn; } return (cpg_model_initialize (handle, CPG_MODEL_V1, (cpg_model_data_t *)&model_v1_data, NULL)); } cs_error_t cpg_model_initialize ( cpg_handle_t *handle, cpg_model_t model, cpg_model_data_t *model_data, void *context) { cs_error_t error; struct cpg_inst *cpg_inst; if (model != CPG_MODEL_V1) { error = CS_ERR_INVALID_PARAM; goto error_no_destroy; } error = hdb_error_to_cs (hdb_handle_create (&cpg_handle_t_db, sizeof (struct cpg_inst), handle)); if (error != CS_OK) { goto error_no_destroy; } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, *handle, (void *)&cpg_inst)); if (error != CS_OK) { goto error_destroy; } cpg_inst->c = qb_ipcc_connect ("cpg", IPC_REQUEST_SIZE); if (cpg_inst->c == NULL) { error = qb_to_cs_error(-errno); goto error_put_destroy; } if (model_data != NULL) { switch (model) { case CPG_MODEL_V1: memcpy (&cpg_inst->model_v1_data, model_data, sizeof (cpg_model_v1_data_t)); if ((cpg_inst->model_v1_data.flags & ~(CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF)) != 0) { error = CS_ERR_INVALID_PARAM; goto error_destroy; } break; } } /* Allow space for corosync internal headers */ cpg_inst->max_msg_size = IPC_REQUEST_SIZE - 1024; cpg_inst->model_data.model = model; cpg_inst->context = context; list_init(&cpg_inst->iteration_list_head); hdb_handle_put (&cpg_handle_t_db, *handle); return (CS_OK); error_put_destroy: hdb_handle_put (&cpg_handle_t_db, *handle); error_destroy: hdb_handle_destroy (&cpg_handle_t_db, *handle); error_no_destroy: return (error); } cs_error_t cpg_finalize ( cpg_handle_t handle) { struct cpg_inst *cpg_inst; struct iovec iov; struct req_lib_cpg_finalize req_lib_cpg_finalize; struct res_lib_cpg_finalize res_lib_cpg_finalize; cs_error_t error; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } /* * Another thread has already started finalizing */ if (cpg_inst->finalize) { hdb_handle_put (&cpg_handle_t_db, handle); return (CS_ERR_BAD_HANDLE); } cpg_inst->finalize = 1; /* * Send service request */ req_lib_cpg_finalize.header.size = sizeof (struct req_lib_cpg_finalize); req_lib_cpg_finalize.header.id = MESSAGE_REQ_CPG_FINALIZE; iov.iov_base = (void *)&req_lib_cpg_finalize; iov.iov_len = sizeof (struct req_lib_cpg_finalize); error = coroipcc_msg_send_reply_receive (cpg_inst->c, &iov, 1, &res_lib_cpg_finalize, sizeof (struct res_lib_cpg_finalize)); cpg_inst_finalize (cpg_inst, handle); hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_fd_get ( cpg_handle_t handle, int *fd) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } error = qb_to_cs_error (qb_ipcc_fd_get (cpg_inst->c, fd)); hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_max_atomic_msgsize_get ( cpg_handle_t handle, uint32_t *size) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } *size = cpg_inst->max_msg_size; hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_context_get ( cpg_handle_t handle, void **context) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } *context = cpg_inst->context; hdb_handle_put (&cpg_handle_t_db, handle); return (CS_OK); } cs_error_t cpg_context_set ( cpg_handle_t handle, void *context) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } cpg_inst->context = context; hdb_handle_put (&cpg_handle_t_db, handle); return (CS_OK); } cs_error_t cpg_dispatch ( cpg_handle_t handle, cs_dispatch_flags_t dispatch_types) { int timeout = -1; cs_error_t error; int cont = 1; /* always continue do loop except when set to 0 */ struct cpg_inst *cpg_inst; struct res_lib_cpg_confchg_callback *res_cpg_confchg_callback; struct res_lib_cpg_deliver_callback *res_cpg_deliver_callback; struct res_lib_cpg_partial_deliver_callback *res_cpg_partial_deliver_callback; struct res_lib_cpg_totem_confchg_callback *res_cpg_totem_confchg_callback; struct cpg_inst cpg_inst_copy; struct qb_ipc_response_header *dispatch_data; struct cpg_address member_list[CPG_MEMBERS_MAX]; struct cpg_address left_list[CPG_MEMBERS_MAX]; struct cpg_address joined_list[CPG_MEMBERS_MAX]; struct cpg_name group_name; mar_cpg_address_t *left_list_start; mar_cpg_address_t *joined_list_start; unsigned int i; struct cpg_ring_id ring_id; uint32_t totem_member_list[CPG_MEMBERS_MAX]; int32_t errno_res; char dispatch_buf[IPC_DISPATCH_SIZE]; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } /* * Timeout instantly for CS_DISPATCH_ONE_NONBLOCKING or CS_DISPATCH_ALL and * wait indefinitely for CS_DISPATCH_ONE or CS_DISPATCH_BLOCKING */ if (dispatch_types == CS_DISPATCH_ALL || dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { timeout = 0; } dispatch_data = (struct qb_ipc_response_header *)dispatch_buf; do { errno_res = qb_ipcc_event_recv ( cpg_inst->c, dispatch_buf, IPC_DISPATCH_SIZE, timeout); error = qb_to_cs_error (errno_res); if (error == CS_ERR_BAD_HANDLE) { error = CS_OK; goto error_put; } if (error == CS_ERR_TRY_AGAIN) { if (dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { /* * Don't mask error */ goto error_put; } error = CS_OK; if (dispatch_types == CS_DISPATCH_ALL) { break; /* exit do while cont is 1 loop */ } else { continue; /* next poll */ } } if (error != CS_OK) { goto error_put; } /* * Make copy of callbacks, message data, unlock instance, and call callback * A risk of this dispatch method is that the callback routines may * operate at the same time that cpgFinalize has been called. */ memcpy (&cpg_inst_copy, cpg_inst, sizeof (struct cpg_inst)); switch (cpg_inst_copy.model_data.model) { case CPG_MODEL_V1: /* * Dispatch incoming message */ switch (dispatch_data->id) { case MESSAGE_RES_CPG_DELIVER_CALLBACK: if (cpg_inst_copy.model_v1_data.cpg_deliver_fn == NULL) { break; } res_cpg_deliver_callback = (struct res_lib_cpg_deliver_callback *)dispatch_data; marshall_from_mar_cpg_name_t ( &group_name, &res_cpg_deliver_callback->group_name); cpg_inst_copy.model_v1_data.cpg_deliver_fn (handle, &group_name, res_cpg_deliver_callback->nodeid, res_cpg_deliver_callback->pid, &res_cpg_deliver_callback->message, res_cpg_deliver_callback->msglen); break; case MESSAGE_RES_CPG_PARTIAL_DELIVER_CALLBACK: res_cpg_partial_deliver_callback = (struct res_lib_cpg_partial_deliver_callback *)dispatch_data; marshall_from_mar_cpg_name_t ( &group_name, &res_cpg_partial_deliver_callback->group_name); if (res_cpg_partial_deliver_callback->type == LIBCPG_PARTIAL_FIRST) { /* * Allocate a buffer to contain a full message. */ cpg_inst->assembly_buf = malloc(res_cpg_partial_deliver_callback->msglen); if (!cpg_inst->assembly_buf) { error = CS_ERR_NO_MEMORY; goto error_put; } cpg_inst->assembling = 1; cpg_inst->assembly_buf_ptr = 0; } if (cpg_inst->assembling) { memcpy(cpg_inst->assembly_buf + cpg_inst->assembly_buf_ptr, res_cpg_partial_deliver_callback->message, res_cpg_partial_deliver_callback->fraglen); cpg_inst->assembly_buf_ptr += res_cpg_partial_deliver_callback->fraglen; if (res_cpg_partial_deliver_callback->type == LIBCPG_PARTIAL_LAST) { cpg_inst_copy.model_v1_data.cpg_deliver_fn (handle, &group_name, res_cpg_partial_deliver_callback->nodeid, res_cpg_partial_deliver_callback->pid, cpg_inst->assembly_buf, res_cpg_partial_deliver_callback->msglen); free(cpg_inst->assembly_buf); cpg_inst->assembling = 0; } } break; case MESSAGE_RES_CPG_CONFCHG_CALLBACK: if (cpg_inst_copy.model_v1_data.cpg_confchg_fn == NULL) { break; } res_cpg_confchg_callback = (struct res_lib_cpg_confchg_callback *)dispatch_data; for (i = 0; i < res_cpg_confchg_callback->member_list_entries; i++) { marshall_from_mar_cpg_address_t (&member_list[i], &res_cpg_confchg_callback->member_list[i]); } left_list_start = res_cpg_confchg_callback->member_list + res_cpg_confchg_callback->member_list_entries; for (i = 0; i < res_cpg_confchg_callback->left_list_entries; i++) { marshall_from_mar_cpg_address_t (&left_list[i], &left_list_start[i]); } joined_list_start = res_cpg_confchg_callback->member_list + res_cpg_confchg_callback->member_list_entries + res_cpg_confchg_callback->left_list_entries; for (i = 0; i < res_cpg_confchg_callback->joined_list_entries; i++) { marshall_from_mar_cpg_address_t (&joined_list[i], &joined_list_start[i]); } marshall_from_mar_cpg_name_t ( &group_name, &res_cpg_confchg_callback->group_name); cpg_inst_copy.model_v1_data.cpg_confchg_fn (handle, &group_name, member_list, res_cpg_confchg_callback->member_list_entries, left_list, res_cpg_confchg_callback->left_list_entries, joined_list, res_cpg_confchg_callback->joined_list_entries); break; case MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK: if (cpg_inst_copy.model_v1_data.cpg_totem_confchg_fn == NULL) { break; } res_cpg_totem_confchg_callback = (struct res_lib_cpg_totem_confchg_callback *)dispatch_data; marshall_from_mar_cpg_ring_id_t (&ring_id, &res_cpg_totem_confchg_callback->ring_id); for (i = 0; i < res_cpg_totem_confchg_callback->member_list_entries; i++) { totem_member_list[i] = res_cpg_totem_confchg_callback->member_list[i]; } cpg_inst_copy.model_v1_data.cpg_totem_confchg_fn (handle, ring_id, res_cpg_totem_confchg_callback->member_list_entries, totem_member_list); break; default: error = CS_ERR_LIBRARY; goto error_put; break; } /* - switch (dispatch_data->id) */ break; /* case CPG_MODEL_V1 */ } /* - switch (cpg_inst_copy.model_data.model) */ if (cpg_inst_copy.finalize || cpg_inst->finalize) { /* * If the finalize has been called then get out of the dispatch. */ cpg_inst->finalize = 1; error = CS_ERR_BAD_HANDLE; goto error_put; } /* * Determine if more messages should be processed */ if (dispatch_types == CS_DISPATCH_ONE || dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { cont = 0; } } while (cont); error_put: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_join ( cpg_handle_t handle, const struct cpg_name *group) { cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov[2]; struct req_lib_cpg_join req_lib_cpg_join; struct res_lib_cpg_join response; if (group->length > CPG_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } /* Now join */ req_lib_cpg_join.header.size = sizeof (struct req_lib_cpg_join); req_lib_cpg_join.header.id = MESSAGE_REQ_CPG_JOIN; req_lib_cpg_join.pid = getpid(); req_lib_cpg_join.flags = 0; switch (cpg_inst->model_data.model) { case CPG_MODEL_V1: req_lib_cpg_join.flags = cpg_inst->model_v1_data.flags; break; } marshall_to_mar_cpg_name_t (&req_lib_cpg_join.group_name, group); iov[0].iov_base = (void *)&req_lib_cpg_join; iov[0].iov_len = sizeof (struct req_lib_cpg_join); do { error = coroipcc_msg_send_reply_receive (cpg_inst->c, iov, 1, &response, sizeof (struct res_lib_cpg_join)); if (error != CS_OK) { goto error_exit; } } while (response.header.error == CS_ERR_BUSY); error = response.header.error; error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_leave ( cpg_handle_t handle, const struct cpg_name *group) { cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov[2]; struct req_lib_cpg_leave req_lib_cpg_leave; struct res_lib_cpg_leave res_lib_cpg_leave; if (group->length > CPG_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } req_lib_cpg_leave.header.size = sizeof (struct req_lib_cpg_leave); req_lib_cpg_leave.header.id = MESSAGE_REQ_CPG_LEAVE; req_lib_cpg_leave.pid = getpid(); marshall_to_mar_cpg_name_t (&req_lib_cpg_leave.group_name, group); iov[0].iov_base = (void *)&req_lib_cpg_leave; iov[0].iov_len = sizeof (struct req_lib_cpg_leave); do { error = coroipcc_msg_send_reply_receive (cpg_inst->c, iov, 1, &res_lib_cpg_leave, sizeof (struct res_lib_cpg_leave)); if (error != CS_OK) { goto error_exit; } } while (res_lib_cpg_leave.header.error == CS_ERR_BUSY); error = res_lib_cpg_leave.header.error; error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_membership_get ( cpg_handle_t handle, struct cpg_name *group_name, struct cpg_address *member_list, int *member_list_entries) { cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov; struct req_lib_cpg_membership_get req_lib_cpg_membership_get; struct res_lib_cpg_membership_get res_lib_cpg_membership_get; unsigned int i; if (group_name->length > CPG_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } if (member_list == NULL) { return (CS_ERR_INVALID_PARAM); } if (member_list_entries == NULL) { return (CS_ERR_INVALID_PARAM); } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } req_lib_cpg_membership_get.header.size = sizeof (struct req_lib_cpg_membership_get); req_lib_cpg_membership_get.header.id = MESSAGE_REQ_CPG_MEMBERSHIP; marshall_to_mar_cpg_name_t (&req_lib_cpg_membership_get.group_name, group_name); iov.iov_base = (void *)&req_lib_cpg_membership_get; iov.iov_len = sizeof (struct req_lib_cpg_membership_get); error = coroipcc_msg_send_reply_receive (cpg_inst->c, &iov, 1, &res_lib_cpg_membership_get, sizeof (res_lib_cpg_membership_get)); if (error != CS_OK) { goto error_exit; } error = res_lib_cpg_membership_get.header.error; /* * Copy results to caller */ *member_list_entries = res_lib_cpg_membership_get.member_count; if (member_list) { for (i = 0; i < res_lib_cpg_membership_get.member_count; i++) { marshall_from_mar_cpg_address_t (&member_list[i], &res_lib_cpg_membership_get.member_list[i]); } } error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_local_get ( cpg_handle_t handle, unsigned int *local_nodeid) { cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov; struct req_lib_cpg_local_get req_lib_cpg_local_get; struct res_lib_cpg_local_get res_lib_cpg_local_get; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } req_lib_cpg_local_get.header.size = sizeof (struct qb_ipc_request_header); req_lib_cpg_local_get.header.id = MESSAGE_REQ_CPG_LOCAL_GET; iov.iov_base = (void *)&req_lib_cpg_local_get; iov.iov_len = sizeof (struct req_lib_cpg_local_get); error = coroipcc_msg_send_reply_receive (cpg_inst->c, &iov, 1, &res_lib_cpg_local_get, sizeof (res_lib_cpg_local_get)); if (error != CS_OK) { goto error_exit; } error = res_lib_cpg_local_get.header.error; *local_nodeid = res_lib_cpg_local_get.local_nodeid; error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_flow_control_state_get ( cpg_handle_t handle, cpg_flow_control_state_t *flow_control_state) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } *flow_control_state = CPG_FLOW_CONTROL_DISABLED; error = CS_OK; hdb_handle_put (&cpg_handle_t_db, handle); return (error); } static int memory_map (char *path, const char *file, void **buf, size_t bytes) { int32_t fd; void *addr; int32_t res; char *buffer; int32_t i; size_t written; size_t page_size; long int sysconf_page_size; mode_t old_umask; snprintf (path, PATH_MAX, "/dev/shm/%s", file); old_umask = umask(CPG_MEMORY_MAP_UMASK); fd = mkstemp (path); (void)umask(old_umask); if (fd == -1) { snprintf (path, PATH_MAX, LOCALSTATEDIR "/run/%s", file); old_umask = umask(CPG_MEMORY_MAP_UMASK); fd = mkstemp (path); (void)umask(old_umask); if (fd == -1) { return (-1); } } res = ftruncate (fd, bytes); if (res == -1) { goto error_close_unlink; } sysconf_page_size = sysconf(_SC_PAGESIZE); if (sysconf_page_size <= 0) { goto error_close_unlink; } page_size = sysconf_page_size; buffer = malloc (page_size); if (buffer == NULL) { goto error_close_unlink; } memset (buffer, 0, page_size); for (i = 0; i < (bytes / page_size); i++) { retry_write: written = write (fd, buffer, page_size); if (written == -1 && errno == EINTR) { goto retry_write; } if (written != page_size) { free (buffer); goto error_close_unlink; } } free (buffer); addr = mmap (NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { goto error_close_unlink; } #ifdef MADV_NOSYNC madvise(addr, bytes, MADV_NOSYNC); #endif res = close (fd); if (res) { + munmap(addr, bytes); + return (-1); } *buf = addr; return 0; error_close_unlink: close (fd); unlink(path); return -1; } cs_error_t cpg_zcb_alloc ( cpg_handle_t handle, size_t size, void **buffer) { void *buf = NULL; char path[PATH_MAX]; mar_req_coroipcc_zc_alloc_t req_coroipcc_zc_alloc; struct qb_ipc_response_header res_coroipcs_zc_alloc; size_t map_size; struct iovec iovec; struct coroipcs_zc_header *hdr; cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } map_size = size + sizeof (struct req_lib_cpg_mcast) + sizeof (struct coroipcs_zc_header); assert(memory_map (path, "corosync_zerocopy-XXXXXX", &buf, map_size) != -1); if (strlen(path) >= CPG_ZC_PATH_LEN) { unlink(path); munmap (buf, map_size); return (CS_ERR_NAME_TOO_LONG); } req_coroipcc_zc_alloc.header.size = sizeof (mar_req_coroipcc_zc_alloc_t); req_coroipcc_zc_alloc.header.id = MESSAGE_REQ_CPG_ZC_ALLOC; req_coroipcc_zc_alloc.map_size = map_size; strcpy (req_coroipcc_zc_alloc.path_to_file, path); iovec.iov_base = (void *)&req_coroipcc_zc_alloc; iovec.iov_len = sizeof (mar_req_coroipcc_zc_alloc_t); error = coroipcc_msg_send_reply_receive ( cpg_inst->c, &iovec, 1, &res_coroipcs_zc_alloc, sizeof (struct qb_ipc_response_header)); if (error != CS_OK) { goto error_exit; } hdr = (struct coroipcs_zc_header *)buf; hdr->map_size = map_size; *buffer = ((char *)buf) + sizeof (struct coroipcs_zc_header) + sizeof (struct req_lib_cpg_mcast); error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_zcb_free ( cpg_handle_t handle, void *buffer) { cs_error_t error; unsigned int res; struct cpg_inst *cpg_inst; mar_req_coroipcc_zc_free_t req_coroipcc_zc_free; struct qb_ipc_response_header res_coroipcs_zc_free; struct iovec iovec; struct coroipcs_zc_header *header = (struct coroipcs_zc_header *)((char *)buffer - sizeof (struct coroipcs_zc_header) - sizeof (struct req_lib_cpg_mcast)); error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } req_coroipcc_zc_free.header.size = sizeof (mar_req_coroipcc_zc_free_t); req_coroipcc_zc_free.header.id = MESSAGE_REQ_CPG_ZC_FREE; req_coroipcc_zc_free.map_size = header->map_size; req_coroipcc_zc_free.server_address = header->server_address; iovec.iov_base = (void *)&req_coroipcc_zc_free; iovec.iov_len = sizeof (mar_req_coroipcc_zc_free_t); error = coroipcc_msg_send_reply_receive ( cpg_inst->c, &iovec, 1, &res_coroipcs_zc_free, sizeof (struct qb_ipc_response_header)); if (error != CS_OK) { goto error_exit; } res = munmap ((void *)header, header->map_size); if (res == -1) { error = qb_to_cs_error(-errno); goto error_exit; } error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_zcb_mcast_joined ( cpg_handle_t handle, cpg_guarantee_t guarantee, void *msg, size_t msg_len) { cs_error_t error; struct cpg_inst *cpg_inst; struct req_lib_cpg_mcast *req_lib_cpg_mcast; struct res_lib_cpg_mcast res_lib_cpg_mcast; mar_req_coroipcc_zc_execute_t req_coroipcc_zc_execute; struct coroipcs_zc_header *hdr; struct iovec iovec; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } if (msg_len > IPC_REQUEST_SIZE) { error = CS_ERR_TOO_BIG; goto error_exit; } req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)(((char *)msg) - sizeof (struct req_lib_cpg_mcast)); req_lib_cpg_mcast->header.size = sizeof (struct req_lib_cpg_mcast) + msg_len; req_lib_cpg_mcast->header.id = MESSAGE_REQ_CPG_MCAST; req_lib_cpg_mcast->guarantee = guarantee; req_lib_cpg_mcast->msglen = msg_len; hdr = (struct coroipcs_zc_header *)(((char *)req_lib_cpg_mcast) - sizeof (struct coroipcs_zc_header)); req_coroipcc_zc_execute.header.size = sizeof (mar_req_coroipcc_zc_execute_t); req_coroipcc_zc_execute.header.id = MESSAGE_REQ_CPG_ZC_EXECUTE; req_coroipcc_zc_execute.server_address = hdr->server_address; iovec.iov_base = (void *)&req_coroipcc_zc_execute; iovec.iov_len = sizeof (mar_req_coroipcc_zc_execute_t); error = coroipcc_msg_send_reply_receive ( cpg_inst->c, &iovec, 1, &res_lib_cpg_mcast, sizeof(res_lib_cpg_mcast)); if (error != CS_OK) { goto error_exit; } error = res_lib_cpg_mcast.header.error; error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } static cs_error_t send_fragments ( struct cpg_inst *cpg_inst, cpg_guarantee_t guarantee, size_t msg_len, const struct iovec *iovec, unsigned int iov_len) { int i; cs_error_t error = CS_OK; struct iovec iov[2]; struct req_lib_cpg_partial_mcast req_lib_cpg_mcast; struct res_lib_cpg_partial_send res_lib_cpg_partial_send; size_t sent = 0; size_t iov_sent = 0; int retry_count; req_lib_cpg_mcast.header.id = MESSAGE_REQ_CPG_PARTIAL_MCAST; req_lib_cpg_mcast.guarantee = guarantee; req_lib_cpg_mcast.msglen = msg_len; iov[0].iov_base = (void *)&req_lib_cpg_mcast; iov[0].iov_len = sizeof (struct req_lib_cpg_partial_mcast); i=0; iov_sent = 0 ; qb_ipcc_fc_enable_max_set(cpg_inst->c, 2); while (error == CS_OK && sent < msg_len) { retry_count = 0; if ( (iovec[i].iov_len - iov_sent) > cpg_inst->max_msg_size) { iov[1].iov_len = cpg_inst->max_msg_size; } else { iov[1].iov_len = iovec[i].iov_len - iov_sent; } if (sent == 0) { req_lib_cpg_mcast.type = LIBCPG_PARTIAL_FIRST; } else if ((sent + iov[1].iov_len) == msg_len) { req_lib_cpg_mcast.type = LIBCPG_PARTIAL_LAST; } else { req_lib_cpg_mcast.type = LIBCPG_PARTIAL_CONTINUED; } req_lib_cpg_mcast.fraglen = iov[1].iov_len; req_lib_cpg_mcast.header.size = sizeof (struct req_lib_cpg_partial_mcast) + iov[1].iov_len; iov[1].iov_base = (char *)iovec[i].iov_base + iov_sent; resend: error = coroipcc_msg_send_reply_receive (cpg_inst->c, iov, 2, &res_lib_cpg_partial_send, sizeof (res_lib_cpg_partial_send)); if (error == CS_ERR_TRY_AGAIN) { fprintf(stderr, "sleep. counter=%d\n", retry_count); if (++retry_count > MAX_RETRIES) { goto error_exit; } usleep(10000); goto resend; } iov_sent += iov[1].iov_len; sent += iov[1].iov_len; /* Next iovec */ if (iov_sent >= iovec[i].iov_len) { i++; iov_sent = 0; } error = res_lib_cpg_partial_send.header.error; } error_exit: qb_ipcc_fc_enable_max_set(cpg_inst->c, 1); return error; } cs_error_t cpg_mcast_joined ( cpg_handle_t handle, cpg_guarantee_t guarantee, const struct iovec *iovec, unsigned int iov_len) { int i; cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov[64]; struct req_lib_cpg_mcast req_lib_cpg_mcast; size_t msg_len = 0; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } for (i = 0; i < iov_len; i++ ) { msg_len += iovec[i].iov_len; } if (msg_len > cpg_inst->max_msg_size) { error = send_fragments(cpg_inst, guarantee, msg_len, iovec, iov_len); goto error_exit; } req_lib_cpg_mcast.header.size = sizeof (struct req_lib_cpg_mcast) + msg_len; req_lib_cpg_mcast.header.id = MESSAGE_REQ_CPG_MCAST; req_lib_cpg_mcast.guarantee = guarantee; req_lib_cpg_mcast.msglen = msg_len; iov[0].iov_base = (void *)&req_lib_cpg_mcast; iov[0].iov_len = sizeof (struct req_lib_cpg_mcast); memcpy (&iov[1], iovec, iov_len * sizeof (struct iovec)); qb_ipcc_fc_enable_max_set(cpg_inst->c, 2); error = qb_to_cs_error(qb_ipcc_sendv(cpg_inst->c, iov, iov_len + 1)); qb_ipcc_fc_enable_max_set(cpg_inst->c, 1); error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_iteration_initialize( cpg_handle_t handle, cpg_iteration_type_t iteration_type, const struct cpg_name *group, cpg_iteration_handle_t *cpg_iteration_handle) { cs_error_t error; struct iovec iov; struct cpg_inst *cpg_inst; struct cpg_iteration_instance_t *cpg_iteration_instance; struct req_lib_cpg_iterationinitialize req_lib_cpg_iterationinitialize; struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize; if (group && group->length > CPG_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } if (cpg_iteration_handle == NULL) { return (CS_ERR_INVALID_PARAM); } if ((iteration_type == CPG_ITERATION_ONE_GROUP && group == NULL) || (iteration_type != CPG_ITERATION_ONE_GROUP && group != NULL)) { return (CS_ERR_INVALID_PARAM); } if (iteration_type != CPG_ITERATION_NAME_ONLY && iteration_type != CPG_ITERATION_ONE_GROUP && iteration_type != CPG_ITERATION_ALL) { return (CS_ERR_INVALID_PARAM); } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } error = hdb_error_to_cs (hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance_t), cpg_iteration_handle)); if (error != CS_OK) { goto error_put_cpg_db; } error = hdb_error_to_cs (hdb_handle_get (&cpg_iteration_handle_t_db, *cpg_iteration_handle, (void *)&cpg_iteration_instance)); if (error != CS_OK) { goto error_destroy; } cpg_iteration_instance->conn = cpg_inst->c; list_init (&cpg_iteration_instance->list); req_lib_cpg_iterationinitialize.header.size = sizeof (struct req_lib_cpg_iterationinitialize); req_lib_cpg_iterationinitialize.header.id = MESSAGE_REQ_CPG_ITERATIONINITIALIZE; req_lib_cpg_iterationinitialize.iteration_type = iteration_type; if (group) { marshall_to_mar_cpg_name_t (&req_lib_cpg_iterationinitialize.group_name, group); } iov.iov_base = (void *)&req_lib_cpg_iterationinitialize; iov.iov_len = sizeof (struct req_lib_cpg_iterationinitialize); error = coroipcc_msg_send_reply_receive (cpg_inst->c, &iov, 1, &res_lib_cpg_iterationinitialize, sizeof (struct res_lib_cpg_iterationinitialize)); if (error != CS_OK) { goto error_put_destroy; } cpg_iteration_instance->executive_iteration_handle = res_lib_cpg_iterationinitialize.iteration_handle; cpg_iteration_instance->cpg_iteration_handle = *cpg_iteration_handle; list_add (&cpg_iteration_instance->list, &cpg_inst->iteration_list_head); hdb_handle_put (&cpg_iteration_handle_t_db, *cpg_iteration_handle); hdb_handle_put (&cpg_handle_t_db, handle); return (res_lib_cpg_iterationinitialize.header.error); error_put_destroy: hdb_handle_put (&cpg_iteration_handle_t_db, *cpg_iteration_handle); error_destroy: hdb_handle_destroy (&cpg_iteration_handle_t_db, *cpg_iteration_handle); error_put_cpg_db: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_iteration_next( cpg_iteration_handle_t handle, struct cpg_iteration_description_t *description) { cs_error_t error; struct cpg_iteration_instance_t *cpg_iteration_instance; struct req_lib_cpg_iterationnext req_lib_cpg_iterationnext; struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext; if (description == NULL) { return CS_ERR_INVALID_PARAM; } error = hdb_error_to_cs (hdb_handle_get (&cpg_iteration_handle_t_db, handle, (void *)&cpg_iteration_instance)); if (error != CS_OK) { goto error_exit; } req_lib_cpg_iterationnext.header.size = sizeof (struct req_lib_cpg_iterationnext); req_lib_cpg_iterationnext.header.id = MESSAGE_REQ_CPG_ITERATIONNEXT; req_lib_cpg_iterationnext.iteration_handle = cpg_iteration_instance->executive_iteration_handle; error = qb_to_cs_error (qb_ipcc_send (cpg_iteration_instance->conn, &req_lib_cpg_iterationnext, req_lib_cpg_iterationnext.header.size)); if (error != CS_OK) { goto error_put; } error = qb_to_cs_error (qb_ipcc_recv (cpg_iteration_instance->conn, &res_lib_cpg_iterationnext, sizeof(struct res_lib_cpg_iterationnext), -1)); if (error != CS_OK) { goto error_put; } marshall_from_mar_cpg_iteration_description_t( description, &res_lib_cpg_iterationnext.description); error = res_lib_cpg_iterationnext.header.error; error_put: hdb_handle_put (&cpg_iteration_handle_t_db, handle); error_exit: return (error); } cs_error_t cpg_iteration_finalize ( cpg_iteration_handle_t handle) { cs_error_t error; struct iovec iov; struct cpg_iteration_instance_t *cpg_iteration_instance; struct req_lib_cpg_iterationfinalize req_lib_cpg_iterationfinalize; struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize; error = hdb_error_to_cs (hdb_handle_get (&cpg_iteration_handle_t_db, handle, (void *)&cpg_iteration_instance)); if (error != CS_OK) { goto error_exit; } req_lib_cpg_iterationfinalize.header.size = sizeof (struct req_lib_cpg_iterationfinalize); req_lib_cpg_iterationfinalize.header.id = MESSAGE_REQ_CPG_ITERATIONFINALIZE; req_lib_cpg_iterationfinalize.iteration_handle = cpg_iteration_instance->executive_iteration_handle; iov.iov_base = (void *)&req_lib_cpg_iterationfinalize; iov.iov_len = sizeof (struct req_lib_cpg_iterationfinalize); error = coroipcc_msg_send_reply_receive (cpg_iteration_instance->conn, &iov, 1, &res_lib_cpg_iterationfinalize, sizeof (struct req_lib_cpg_iterationfinalize)); if (error != CS_OK) { goto error_put; } cpg_iteration_instance_finalize (cpg_iteration_instance); hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->cpg_iteration_handle); return (res_lib_cpg_iterationfinalize.header.error); error_put: hdb_handle_put (&cpg_iteration_handle_t_db, handle); error_exit: return (error); } /** @} */ diff --git a/test/cpgbound.c b/test/cpgbound.c index c4d37fcd..f3ac3750 100644 --- a/test/cpgbound.c +++ b/test/cpgbound.c @@ -1,126 +1,130 @@ /* * Copyright (c) 2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include static void cpg_deliver_fn ( cpg_handle_t handle, const struct cpg_name *group_name, uint32_t nodeid, uint32_t pid, void *m, size_t msg_len) { } static void cpg_confchg_fn ( cpg_handle_t handle, const struct cpg_name *group_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { } static cpg_callbacks_t callbacks = { cpg_deliver_fn, cpg_confchg_fn }; static struct cpg_name group_name = { .value = "cpg_bm", .length = 6 }; static unsigned char buffer[2000000]; int main (void) { cpg_handle_t handle; cs_error_t result; unsigned int i = 0; struct iovec iov; int res; unsigned int msg_size; result = cpg_initialize (&handle, &callbacks); if (result != CS_OK) { printf ("Couldn't initialize CPG service %d\n", result); exit (0); } res = cpg_join (handle, &group_name); if (res != CS_OK) { printf ("cpg_join failed with result %d\n", res); exit (1); } iov.iov_base = (void *)buffer; /* * Demonstrate cpg_mcast_joined */ msg_size = 1025000; for (i = 0; i < 1000000000; i++) { iov.iov_len = msg_size; try_again_one: result = cpg_mcast_joined (handle, CPG_TYPE_AGREED, &iov, 1); if (result == CS_ERR_TRY_AGAIN) { goto try_again_one; } if (result == CS_ERR_INVALID_PARAM) { printf ("found boundary at %d\n", msg_size); exit (1); } msg_size += 1; printf ("msg size %d\n", msg_size); result = cpg_dispatch (handle, CS_DISPATCH_ALL); + if (result != CS_OK && result != CS_ERR_TRY_AGAIN) { + printf ("cpg_dispatch failed with result %d\n", res); + exit (1); + } } cpg_finalize (handle); return (0); } diff --git a/test/cpghum.c b/test/cpghum.c index 6d1fe60b..5772f818 100644 --- a/test/cpghum.c +++ b/test/cpghum.c @@ -1,432 +1,437 @@ /* * Copyright (c) 2015 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static cpg_handle_t handle; static pthread_t thread; #ifndef timersub #define timersub(a, b, result) \ do { \ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ if ((result)->tv_usec < 0) { \ --(result)->tv_sec; \ (result)->tv_usec += 1000000; \ } \ } while (0) #endif /* timersub */ static int alarm_notice; #define ONE_MEG 1048576 #define DATASIZE (ONE_MEG*20) static char data[DATASIZE]; static int send_counter = 0; static int do_syslog = 0; static int quiet = 0; static volatile int stopped; // stats static unsigned int length_errors=0; static unsigned int crc_errors=0; static unsigned int sequence_errors=0; static unsigned int packets_sent=0; static unsigned int packets_recvd=0; static unsigned int send_retries=0; static unsigned int send_fails=0; static void cpg_bm_confchg_fn ( cpg_handle_t handle_in, const struct cpg_name *group_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { } static unsigned int g_recv_count; static unsigned int g_recv_length; static unsigned int g_write_size; static int g_recv_counter = 0; static void cpg_bm_deliver_fn ( cpg_handle_t handle_in, const struct cpg_name *group_name, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { int *value = msg; uLong crc=0; uLong recv_crc = value[1] & 0xFFFFFFFF; packets_recvd++; g_recv_length = msg_len; // Basic check, packets should all be the right size if (g_write_size && (msg_len != g_write_size)) { length_errors++; fprintf(stderr, "%s: message sizes don't match. got %lu, expected %u\n", group_name->value, msg_len, g_write_size); if (do_syslog) { syslog(LOG_ERR, "%s: message sizes don't match. got %lu, expected %u\n", group_name->value, msg_len, g_write_size); } } // Sequence counters are incrementing in step? if (*value != g_recv_counter) { sequence_errors++; fprintf(stderr, "%s: counters don't match. got %d, expected %d\n", group_name->value, *value, g_recv_counter); if (do_syslog) { syslog(LOG_ERR, "%s: counters don't match. got %d, expected %d\n", group_name->value, *value, g_recv_counter); } // Catch up or we'll be printing errors for ever g_recv_counter = *value +1; } else { g_recv_counter++; } // Check crc crc = crc32(0, NULL, 0); crc = crc32(crc, (Bytef *)&value[2], msg_len-sizeof(int)*2) & 0xFFFFFFFF; if (crc != recv_crc) { crc_errors++; fprintf(stderr, "%s: CRCs don't match. got %lx, expected %lx\n", group_name->value, recv_crc, crc); if (do_syslog) { syslog(LOG_ERR, "%s: CRCs don't match. got %lx, expected %lx\n", group_name->value, recv_crc, crc); } } g_recv_count++; } static cpg_model_v1_data_t model1_data = { .cpg_deliver_fn = cpg_bm_deliver_fn, .cpg_confchg_fn = cpg_bm_confchg_fn, }; static cpg_callbacks_t callbacks = { .cpg_deliver_fn = cpg_bm_deliver_fn, .cpg_confchg_fn = cpg_bm_confchg_fn }; static struct cpg_name group_name = { .value = "cpghum", .length = 7 }; static void cpg_test ( cpg_handle_t handle_in, int write_size, int delay_time, int print_time) { struct timeval tv1, tv2, tv_elapsed; struct iovec iov; unsigned int res; int i; unsigned int *dataint = (unsigned int *)data; uLong crc; alarm_notice = 0; iov.iov_base = data; iov.iov_len = write_size; g_recv_count = 0; alarm (print_time); gettimeofday (&tv1, NULL); do { dataint[0] = send_counter++; for (i=2; i<(DATASIZE-sizeof(int)*2)/4; i++) { dataint[i] = rand(); } crc = crc32(0, NULL, 0); dataint[1] = crc32(crc, (Bytef*)&dataint[2], write_size-sizeof(int)*2); resend: res = cpg_mcast_joined (handle_in, CPG_TYPE_AGREED, &iov, 1); if (res == CS_ERR_TRY_AGAIN) { usleep(10000); send_retries++; goto resend; } if (res != CS_OK) { fprintf(stderr, "send failed: %d\n", res); send_fails++; } else { packets_sent++; } usleep(delay_time*1000); } while (alarm_notice == 0 && (res == CS_OK || res == CS_ERR_TRY_AGAIN) && stopped == 0); gettimeofday (&tv2, NULL); timersub (&tv2, &tv1, &tv_elapsed); if (!quiet) { printf ("%s: %5d message%s received, ", group_name.value, g_recv_count, g_recv_count==1?"":"s"); printf ("%5d bytes per write\n", write_size); } } static void sigalrm_handler (int num) { alarm_notice = 1; } static void sigint_handler (int num) { stopped = 1; } static void* dispatch_thread (void *arg) { cpg_dispatch (handle, CS_DISPATCH_BLOCKING); return NULL; } static void usage(char *cmd) { fprintf(stderr, "%s [OPTIONS]\n", cmd); fprintf(stderr, "\n"); fprintf(stderr, "%s sends CPG messages to all registered users of the CPG.\n", cmd); fprintf(stderr, "The messages have a sequence number and a CRC so that missing or\n"); fprintf(stderr, "corrupted messages will be detected and reported.\n"); fprintf(stderr, "\n"); fprintf(stderr, "%s can also be asked to simply listen for (and check) packets\n", cmd); fprintf(stderr, "so that there is another node in the cluster connected to the CPG.\n"); fprintf(stderr, "\n"); fprintf(stderr, "When -l is present, packet size is only checked if specified by -w or -W\n"); fprintf(stderr, "and it, obviously, must match that of the sender.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Multiple copies, in different CPGs, can also be run on the same or\n"); fprintf(stderr, "different nodes by using the -n option.\n"); fprintf(stderr, "\n"); fprintf(stderr, "%s can't handle more than 1 sender in the same CPG as it messes with the\n", cmd); fprintf(stderr, "sequence numbers.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -w Write size in Kbytes, default 4\n"); fprintf(stderr, " -W Write size in bytes, default 4096\n"); fprintf(stderr, " -n CPG name to use, default 'cpghum'\n"); fprintf(stderr, " -d Delay between sending packets (mS), default 1000\n"); fprintf(stderr, " -r Number of repetitions, default 100\n"); fprintf(stderr, " -p Delay between printing output(S), default 10s\n"); fprintf(stderr, " -l Listen and check CRCs only, don't send (^C to quit)\n"); fprintf(stderr, " -m cpg_initialise() model. Default 1.\n"); fprintf(stderr, " -s Also send errors to syslog (for daemon log correlation).\n"); fprintf(stderr, " -q Quiet. Don't print messages every 10 seconds (see also -p)\n"); fprintf(stderr, "\n"); } int main (int argc, char *argv[]) { int i; unsigned int res; uint32_t maxsize; int opt; int bs; int write_size = 4096; int delay_time = 1000; int repetitions = 100; int print_time = 10; int have_size = 0; int listen_only = 0; int model = 1; while ( (opt = getopt(argc, argv, "qlsn:d:r:p:m:w:W:")) != -1 ) { switch (opt) { case 'w': // Write size in K bs = atoi(optarg); if (bs > 0) { write_size = bs*1024; have_size = 1; } break; case 'W': // Write size in bytes bs = atoi(optarg); if (bs > 0) { write_size = bs; have_size = 1; } break; case 'n': + if (strlen(optarg) >= CPG_MAX_NAME_LENGTH) { + fprintf(stderr, "CPG name too long\n"); + exit(1); + } + strcpy(group_name.value, optarg); group_name.length = strlen(group_name.value); break; case 'd': delay_time = atoi(optarg); break; case 'r': repetitions = atoi(optarg); break; case 'p': print_time = atoi(optarg); break; case 'l': listen_only = 1; break; case 's': do_syslog = 1; break; case 'q': quiet = 1; break; case 'm': model = atoi(optarg); if (model < 0 || model > 1) { fprintf(stderr, "%s: Model must be 0-1\n", argv[0]); exit(1); } break; case '?': usage(basename(argv[0])); exit(0); } } qb_log_init("cpghum", LOG_USER, LOG_EMERG); qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_DEBUG); qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); g_write_size = write_size; signal (SIGALRM, sigalrm_handler); signal (SIGINT, sigint_handler); switch (model) { case 0: res = cpg_initialize (&handle, &callbacks); break; case 1: res = cpg_model_initialize (&handle, CPG_MODEL_V1, (cpg_model_data_t *)&model1_data, NULL); break; default: res=999; // can't get here but it keeps the compiler happy break; } if (res != CS_OK) { printf ("cpg_initialize failed with result %d\n", res); exit (1); } pthread_create (&thread, NULL, dispatch_thread, NULL); res = cpg_join (handle, &group_name); if (res != CS_OK) { printf ("cpg_join failed with result %d\n", res); exit (1); } if (listen_only) { - int secs; + int secs = 0; if (!quiet) { printf("-- Listening on CPG %s\n", group_name.value); printf("-- Ignore any starting \"counters don't match\" error while we catch up\n"); } /* Only check packet size if specified on the command-line */ if (!have_size) { g_write_size = 0; } while (!stopped) { sleep(1); if (++secs > print_time && !quiet) { printf ("%s: %5d message%s received. %d bytes\n", group_name.value, g_recv_count, g_recv_count==1?"":"s", g_recv_length); secs = 0; g_recv_count = 0; } } } else { cpg_max_atomic_msgsize_get (handle, &maxsize); if ( write_size > maxsize) { fprintf(stderr, "INFO: packet size (%d) is larger than the maximum atomic size (%d), libcpg will fragment\n", write_size, maxsize); } for (i = 0; i < repetitions && !stopped; i++) { cpg_test (handle, write_size, delay_time, print_time); signal (SIGALRM, sigalrm_handler); } } res = cpg_finalize (handle); if (res != CS_OK) { printf ("cpg_finalize failed with result %d\n", res); exit (1); } printf("\n"); printf("Stats:\n"); if (!listen_only) { printf(" packets sent: %d\n", packets_sent); printf(" send failures: %d\n", send_fails); printf(" send retries: %d\n", send_retries); } if (have_size) { printf(" length errors: %d\n", length_errors); } printf(" packets recvd: %d\n", packets_recvd); printf(" sequence errors: %d\n", sequence_errors); printf(" crc errors: %d\n", crc_errors); printf("\n"); return (0); } diff --git a/test/cpgverify.c b/test/cpgverify.c index 5349c515..928eff33 100644 --- a/test/cpgverify.c +++ b/test/cpgverify.c @@ -1,191 +1,195 @@ /* * Copyright (c) 2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct my_msg { unsigned int msg_size; unsigned char sha1[20]; unsigned char buffer[0]; }; static int deliveries = 0; PK11Context* sha1_context; static void cpg_deliver_fn ( cpg_handle_t handle, const struct cpg_name *group_name, uint32_t nodeid, uint32_t pid, void *m, size_t msg_len) { const struct my_msg *msg2 = m; unsigned char sha1_compare[20]; unsigned int i; unsigned int sha1_len; printf ("msg '%s'\n", msg2->buffer); PK11_DigestBegin(sha1_context); PK11_DigestOp(sha1_context, msg2->buffer, msg2->msg_size); PK11_DigestFinal(sha1_context, sha1_compare, &sha1_len, sizeof(sha1_compare)); printf ("SIZE %d HASH: ", msg2->msg_size); for (i = 0; i < 20; i++) { printf ("%x", sha1_compare[i]); } printf ("\n"); if (memcmp (sha1_compare, msg2->sha1, 20) != 0) { printf ("incorrect hash\n"); exit (1); } deliveries++; } static void cpg_confchg_fn ( cpg_handle_t handle, const struct cpg_name *group_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { } static cpg_callbacks_t callbacks = { cpg_deliver_fn, cpg_confchg_fn }; static struct cpg_name group_name = { .value = "cpg_bm", .length = 6 }; static unsigned char buffer[200000]; int main (int argc, char *argv[]) { cpg_handle_t handle; cs_error_t result; int i = 0; int j; struct my_msg msg; struct iovec iov[2]; const char *options = "i:"; int iter = 1000; int opt; int run_forever = 1; unsigned int sha1_len; while ((opt = getopt(argc, argv, options)) != -1) { switch (opt) { case 'i': run_forever = 0; iter = atoi(optarg); break; } } result = cpg_initialize (&handle, &callbacks); if (result != CS_OK) { printf ("Couldn't initialize CPG service %d\n", result); exit (0); } if (NSS_NoDB_Init(".") != SECSuccess) { printf ("Couldn't initialize nss\n"); exit (0); } if ((sha1_context = PK11_CreateDigestContext(SEC_OID_SHA1)) == NULL) { printf ("Couldn't initialize nss\n"); exit (0); } result = cpg_join (handle, &group_name); if (result != CS_OK) { printf ("cpg_join failed with result %d\n", result); exit (1); } iov[0].iov_base = (void *)&msg; iov[0].iov_len = sizeof (struct my_msg); iov[1].iov_base = (void *)buffer; /* * Demonstrate cpg_mcast_joined */ i = 0; do { msg.msg_size = 100 + rand() % 100000; iov[1].iov_len = msg.msg_size; for (j = 0; j < msg.msg_size; j++) { buffer[j] = j; } sprintf ((char *)buffer, "cpg_mcast_joined: This is message %12d", i); PK11_DigestBegin(sha1_context); PK11_DigestOp(sha1_context, buffer, msg.msg_size); PK11_DigestFinal(sha1_context, msg.sha1, &sha1_len, sizeof(msg.sha1)); try_again_one: result = cpg_mcast_joined (handle, CPG_TYPE_AGREED, iov, 2); if (result == CS_ERR_TRY_AGAIN) { goto try_again_one; } result = cpg_dispatch (handle, CS_DISPATCH_ALL); + if (result != CS_OK && result != CS_ERR_TRY_AGAIN) { + printf("cpg_dispatch failed with result %d\n", result); + exit(1); + } i++; } while (run_forever || i < iter); PK11_DestroyContext(sha1_context, PR_TRUE); cpg_finalize (handle); return (0); } diff --git a/test/testsam.c b/test/testsam.c index 208db301..d31262ad 100644 --- a/test/testsam.c +++ b/test/testsam.c @@ -1,1479 +1,1476 @@ /* * Copyright (c) 2009-2011 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * Provides test of SAM API */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern const char *__progname; static int test2_sig_delivered = 0; static int test5_hc_cb_count = 0; static int test6_sig_delivered = 0; /* * First test will just register SAM, with policy restart. First instance will * sleep one second, send hc and sleep another 3 seconds. This should force restart. * Second instance will sleep one second, send hc, stop hc and sleep 3 seconds. * Then start hc again and sleep 3 seconds. This should force restart again. * Last instance just calls initialize again. This should end with error. * Then call start, followed by stop and start again. Finally, we will call finalize * twice. One should succeed, second should fail. After this, we will call every function * (none should succeed). */ static int test1 (void) { cs_error_t error; unsigned int instance_id; int i; printf ("%s: initialize\n", __FUNCTION__); error = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART); if (error != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", error); return 1; } printf ("%s: register\n", __FUNCTION__); error = sam_register (&instance_id); if (error != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", error); return 1; } if (instance_id == 1 || instance_id == 2) { printf ("%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", error); return 1; } for (i = 0; i < 10; i++) { printf ("%s iid %d: sleep 1\n", __FUNCTION__, instance_id); sleep (1); printf ("%s iid %d: hc send\n", __FUNCTION__, instance_id); error = sam_hc_send (); if (error != CS_OK) { fprintf (stderr, "Can't send hc. Error %d\n", error); return 1; } } if (instance_id == 2) { printf ("%s iid %d: stop\n", __FUNCTION__, instance_id); error = sam_stop (); if (error != CS_OK) { fprintf (stderr, "Can't send hc. Error %d\n", error); return 1; } } printf ("%s iid %d: sleep 3\n", __FUNCTION__, instance_id); sleep (3); printf ("%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", error); return 1; } printf ("%s iid %d: sleep 3\n", __FUNCTION__, instance_id); sleep (3); return 0; } if (instance_id == 3) { error = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART); if (error == CS_OK) { fprintf (stderr, "Can initialize SAM API after initialization"); return 1; } error = sam_start (); if (error != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", error); return 1; } error = sam_stop (); if (error != CS_OK) { fprintf (stderr, "Can't stop hc. Error %d\n", error); return 1; } error = sam_finalize (); if (error != CS_OK) { fprintf (stderr, "Can't finalize sam. Error %d\n", error); return 1; } error = sam_finalize (); if (error == CS_OK) { fprintf (stderr, "Can finalize sam after finalization!\n"); return 1; } if (sam_initialize (2, SAM_RECOVERY_POLICY_RESTART) == CS_OK || sam_start () == CS_OK || sam_stop () == CS_OK || sam_register (NULL) == CS_OK || sam_hc_send () == CS_OK || sam_hc_callback_register (NULL) == CS_OK) { fprintf (stderr, "Can call one of function after finalization!\n"); return 1; } return 0; } return 1; } static void test2_signal (int sig) { printf ("%s\n", __FUNCTION__); test2_sig_delivered = 1; } /* * This tests recovery policy quit and callback. */ static int test2 (void) { cs_error_t error; unsigned int instance_id; printf ("%s: initialize\n", __FUNCTION__); error = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT); if (error != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", error); return 1; } printf ("%s: register\n", __FUNCTION__); error = sam_register (&instance_id); if (error != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", error); return 1; } if (instance_id == 1) { signal (SIGTERM, test2_signal); printf ("%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", error); return 1; } printf ("%s iid %d: sleep 1\n", __FUNCTION__, instance_id); sleep (1); printf ("%s iid %d: hc send\n", __FUNCTION__, instance_id); error = sam_hc_send (); if (error != CS_OK) { fprintf (stderr, "Can't send hc. Error %d\n", error); return 1; } printf ("%s iid %d: wait for delivery of signal\n", __FUNCTION__, instance_id); while (!test2_sig_delivered) { sleep (1); } printf ("%s iid %d: wait for real kill\n", __FUNCTION__, instance_id); sleep (3); } return 1; } /* * Smoke test. Better to turn off coredump ;) This has no time limit, just restart process * when it dies. */ static int test3 (void) { cs_error_t error; unsigned int instance_id; printf ("%s: initialize\n", __FUNCTION__); error = sam_initialize (0, SAM_RECOVERY_POLICY_RESTART); if (error != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", error); return 1; } printf ("%s: register\n", __FUNCTION__); error = sam_register (&instance_id); if (error != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", error); return 1; } if (instance_id < 100) { printf ("%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", error); return 1; } printf ("%s iid %d: Sending signal\n", __FUNCTION__, instance_id); kill(getpid(), SIGSEGV); return 1; } return 0; } /* * Test sam_data_store, sam_data_restore and sam_data_getsize */ static int test4 (void) { size_t size; cs_error_t err; int i; unsigned int instance_id; char saved_data[128]; char saved_data2[128]; printf ("%s: sam_data_getsize 1\n", __FUNCTION__); err = sam_data_getsize (&size); if (err != CS_ERR_BAD_HANDLE) { fprintf (stderr, "Test should return CS_ERR_BAD_HANDLE. Error returned %d\n", err); return 1; } printf ("%s: sam_data_getsize 2\n", __FUNCTION__); err = sam_data_getsize (NULL); if (err != CS_ERR_INVALID_PARAM) { fprintf (stderr, "Test should return CS_ERR_INVALID_PARAM. Error returned %d\n", err); return 1; } printf ("%s: sam_data_store 1\n", __FUNCTION__); err = sam_data_store (NULL, 0); if (err != CS_ERR_BAD_HANDLE) { fprintf (stderr, "Test should return CS_ERR_BAD_HANDLE. Error returned %d\n", err); return 1; } printf ("%s: sam_data_restore 1\n", __FUNCTION__); err = sam_data_restore (saved_data, sizeof (saved_data)); if (err != CS_ERR_BAD_HANDLE) { fprintf (stderr, "Test should return CS_ERR_BAD_HANDLE. Error returned %d\n", err); return 1; } printf ("%s: sam_initialize\n", __FUNCTION__); err = sam_initialize (0, SAM_RECOVERY_POLICY_RESTART); if (err != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", err); return 1; } printf ("%s: sam_data_getsize 3\n", __FUNCTION__); err = sam_data_getsize (&size); if (err != CS_OK) { fprintf (stderr, "Test should return CS_ERR_BAD_HANDLE. Error returned %d\n", err); return 1; } if (size != 0) { fprintf (stderr, "Test should return size of 0. Returned %zx\n", size); return 1; } printf ("%s: sam_data_restore 2\n", __FUNCTION__); err = sam_data_restore (NULL, sizeof (saved_data)); if (err != CS_ERR_INVALID_PARAM) { fprintf (stderr, "Test should return CS_ERR_INVALID_PARAM. Error returned %d\n", err); return 1; } /* * Store some real data */ for (i = 0; i < sizeof (saved_data); i++) { saved_data[i] = (char)(i + 5); } printf ("%s: sam_data_store 2\n", __FUNCTION__); err = sam_data_store (saved_data, sizeof (saved_data)); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } printf ("%s: sam_data_getsize 4\n", __FUNCTION__); err = sam_data_getsize (&size); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } if (size != sizeof (saved_data)) { fprintf (stderr, "Test should return size of 0. Returned %zx\n", size); return 1; } printf ("%s: sam_data_restore 3\n", __FUNCTION__); err = sam_data_restore (saved_data2, sizeof (saved_data2) - 1); if (err != CS_ERR_INVALID_PARAM) { fprintf (stderr, "Test should return CS_ERR_INVALID_PARAM. Error returned %d\n", err); return 1; } printf ("%s: sam_data_restore 4\n", __FUNCTION__); err = sam_data_restore (saved_data2, sizeof (saved_data2)); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } if (memcmp (saved_data, saved_data2, sizeof (saved_data2)) != 0) { fprintf (stderr, "Retored data are not same\n"); return 1; } memset (saved_data2, 0, sizeof (saved_data2)); printf ("%s: sam_data_store 3\n", __FUNCTION__); err = sam_data_store (NULL, 1); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } printf ("%s: sam_data_getsize 5\n", __FUNCTION__); err = sam_data_getsize (&size); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } if (size != 0) { fprintf (stderr, "Test should return size of 0. Returned %zx\n", size); return 1; } printf ("%s: sam_data_store 4\n", __FUNCTION__); err = sam_data_store (saved_data, sizeof (saved_data)); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } printf ("%s: register\n", __FUNCTION__); err = sam_register (&instance_id); if (err != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", err); return 1; } if (instance_id == 1) { printf ("%s iid %d: sam_start\n", __FUNCTION__, instance_id); err = sam_start (); if (err != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", err); return 1; } printf ("%s iid %d: sam_data_getsize 6\n", __FUNCTION__, instance_id); err = sam_data_getsize (&size); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } if (size != sizeof (saved_data2)) { fprintf (stderr, "Test should return size of 0. Returned %zx\n", size); return 1; } printf ("%s iid %d: sam_data_restore 5\n", __FUNCTION__, instance_id); err = sam_data_restore (saved_data2, sizeof (saved_data2)); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } if (memcmp (saved_data, saved_data2, sizeof (saved_data2)) != 0) { fprintf (stderr, "Retored data are not same\n"); return 1; } for (i = 0; i < sizeof (saved_data); i++) { saved_data[i] = (char)(i - 5); } printf ("%s iid %d: sam_data_store 5\n", __FUNCTION__, instance_id); err = sam_data_store (saved_data, sizeof (saved_data) - 7); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } exit (1); } if (instance_id == 2) { printf ("%s iid %d: sam_start\n", __FUNCTION__, instance_id); err = sam_start (); if (err != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", err); return 1; } printf ("%s iid %d: sam_data_getsize 7\n", __FUNCTION__, instance_id); err = sam_data_getsize (&size); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } if (size != sizeof (saved_data2) - 7) { fprintf (stderr, "Test should return size of 0. Returned %zx\n", size); return 1; } printf ("%s iid %d: sam_data_restore 6\n", __FUNCTION__, instance_id); err = sam_data_restore (saved_data2, sizeof (saved_data2)); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } for (i = 0; i < sizeof (saved_data); i++) { saved_data[i] = (char)(i - 5); } if (memcmp (saved_data, saved_data2, sizeof (saved_data2) - 7) != 0) { fprintf (stderr, "Retored data are not same\n"); return 1; } printf ("%s iid %d: sam_data_store 6\n", __FUNCTION__, instance_id); err = sam_data_store (NULL, 0); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } exit (1); } if (instance_id == 3) { printf ("%s iid %d: sam_data_getsize 8\n", __FUNCTION__, instance_id); err = sam_data_getsize (&size); if (err != CS_OK) { fprintf (stderr, "Test should return CS_OK. Error returned %d\n", err); return 1; } if (size != 0) { fprintf (stderr, "Test should return size of 0. Returned %zx\n", size); return 1; } } return (0); } static int test5_hc_cb (void) { cs_error_t res; printf ("%s %d\n", __FUNCTION__, ++test5_hc_cb_count); res = sam_data_store (&test5_hc_cb_count, sizeof (test5_hc_cb_count)); if (res != CS_OK) return 1; if (test5_hc_cb_count > 10) return 1; return 0; } /* * Test event driven healtchecking. */ static int test5 (void) { cs_error_t error; unsigned int instance_id; int hc_cb_count; printf ("%s: initialize\n", __FUNCTION__); error = sam_initialize (100, SAM_RECOVERY_POLICY_RESTART); if (error != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", error); return 1; } printf ("%s: register\n", __FUNCTION__); error = sam_register (&instance_id); if (error != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", error); return 1; } if (instance_id == 1) { printf ("%s iid %d: hc callback register\n", __FUNCTION__, instance_id); error = sam_hc_callback_register (test5_hc_cb); if (error != CS_OK) { fprintf (stderr, "Can't register hc cb. Error %d\n", error); return 1; } printf ("%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", error); return 1; } sleep (2); printf ("%s iid %d: Failed. Wasn't killed.\n", __FUNCTION__, instance_id); return 1; } if (instance_id == 2) { error = sam_data_restore (&hc_cb_count, sizeof (hc_cb_count)); if (error != CS_OK) { fprintf (stderr, "sam_data_restore should return CS_OK. Error returned %d\n", error); return 1; } if (hc_cb_count != 11) { fprintf (stderr, "%s iid %d: Premature killed. hc_cb_count should be 11 and it is %d\n", __FUNCTION__, instance_id - 1, hc_cb_count); return 1; } return 0; } return 1; } static void test6_signal (int sig) { cs_error_t error; printf ("%s\n", __FUNCTION__); test6_sig_delivered++; if ((error = sam_data_store (&test6_sig_delivered, sizeof (test6_sig_delivered))) != CS_OK) { fprintf (stderr, "Can't store data! Error : %d\n", error); } } /* * Test warn signal set. */ static int test6 (void) { cs_error_t error; unsigned int instance_id; int test6_sig_del; printf ("%s: initialize\n", __FUNCTION__); error = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART); if (error != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", error); return 1; } printf ("%s: register\n", __FUNCTION__); error = sam_register (&instance_id); if (error != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", error); return 1; } if (instance_id == 1) { error = sam_warn_signal_set (SIGUSR1); if (error != CS_OK) { fprintf (stderr, "Can't set warn signal. Error %d\n", error); return 1; } signal (SIGUSR1, test6_signal); printf ("%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", error); return 1; } printf ("%s iid %d: sleep 1\n", __FUNCTION__, instance_id); sleep (1); printf ("%s iid %d: hc send\n", __FUNCTION__, instance_id); error = sam_hc_send (); if (error != CS_OK) { fprintf (stderr, "Can't send hc. Error %d\n", error); return 1; } printf ("%s iid %d: wait for delivery of signal\n", __FUNCTION__, instance_id); while (!test6_sig_delivered) { sleep (1); } printf ("%s iid %d: wait for real kill\n", __FUNCTION__, instance_id); sleep (3); printf ("%s iid %d: wasn't killed\n", __FUNCTION__, instance_id); return (1); } if (instance_id == 2) { error = sam_data_restore (&test6_sig_del, sizeof (test6_sig_del)); if (error != CS_OK) { fprintf (stderr, "Can't restore data. Error %d\n", error); return 1; } if (test6_sig_del != 1) { fprintf (stderr, "Previous test failed. Signal was not delivered\n"); return 1; } error = sam_warn_signal_set (SIGKILL); if (error != CS_OK) { fprintf (stderr, "Can't set warn signal. Error %d\n", error); return 1; } signal (SIGUSR1, test6_signal); printf ("%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", error); return 1; } printf ("%s iid %d: sleep 1\n", __FUNCTION__, instance_id); sleep (1); printf ("%s iid %d: hc send\n", __FUNCTION__, instance_id); error = sam_hc_send (); if (error != CS_OK) { fprintf (stderr, "Can't send hc. Error %d\n", error); return 1; } printf ("%s iid %d: wait for delivery of signal\n", __FUNCTION__, instance_id); while (!test6_sig_delivered) { sleep (1); } printf ("%s iid %d: wasn't killed\n", __FUNCTION__, instance_id); return (1); } if (instance_id == 3) { error = sam_data_restore (&test6_sig_del, sizeof (test6_sig_del)); if (error != CS_OK) { fprintf (stderr, "Can't restore data. Error %d\n", error); return 1; } if (test6_sig_del != 1) { fprintf (stderr, "Previous test failed. Signal WAS delivered\n"); return 1; } return (0); } return 1; } static void *test7_thread (void *arg) { /* Wait 5s */ sleep (5); exit (0); } /* * Test quorum */ static int test7 (void) { cmap_handle_t cmap_handle; cs_error_t err; unsigned int instance_id; pthread_t kill_thread; char *str; err = cmap_initialize (&cmap_handle); if (err != CS_OK) { printf ("Could not initialize Cluster Map API instance error %d. Test skipped\n", err); return (1); } if (cmap_get_string(cmap_handle, "quorum.provider", &str) != CS_OK) { printf ("Could not get \"provider\" key: %d. Test skipped\n", err); return (1); } if (strcmp(str, "testquorum") != 0) { printf ("Provider is not testquorum. Test skipped\n"); free(str); return (1); } free(str); /* * Set to not quorate */ err = cmap_set_uint8(cmap_handle, "quorum.quorate", 0); if (err != CS_OK) { printf ("Can't set map key. Error %d\n", err); return (2); } printf ("%s: initialize\n", __FUNCTION__); err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUORUM_RESTART); if (err != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", err); return 2; } printf ("%s: register\n", __FUNCTION__); err = sam_register (&instance_id); if (err != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", err); return 2; } if (instance_id == 1) { /* * Sam start should block forever, but 10s for us should be enough */ pthread_create (&kill_thread, NULL, test7_thread, NULL); printf ("%s iid %d: start - should block forever (waiting 5s)\n", __FUNCTION__, instance_id); err = sam_start (); if (err != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", err); return 2; } printf ("%s iid %d: wasn't killed\n", __FUNCTION__, instance_id); return (2); } if (instance_id == 2) { /* * Set to quorate */ err = cmap_set_uint8(cmap_handle, "quorum.quorate", 1); if (err != CS_OK) { printf ("Can't set map key. Error %d\n", err); return (2); } printf ("%s iid %d: start\n", __FUNCTION__, instance_id); err = sam_start (); if (err != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", err); return 2; } /* * Set corosync unquorate */ err = cmap_set_uint8(cmap_handle, "quorum.quorate", 0); if (err != CS_OK) { printf ("Can't set map key. Error %d\n", err); return (2); } printf ("%s iid %d: sleep 3\n", __FUNCTION__, instance_id); sleep (3); printf ("%s iid %d: wasn't killed\n", __FUNCTION__, instance_id); return (2); } if (instance_id == 3) { return (0); } return (2); } /* * Test cmap integration + quit policy */ static int test8 (pid_t pid, pid_t old_pid, int test_n) { cmap_handle_t cmap_handle; cs_error_t err; uint64_t tstamp1, tstamp2; int32_t msec_diff; unsigned int instance_id; char key_name[CMAP_KEYNAME_MAXLEN]; char *str; err = cmap_initialize (&cmap_handle); if (err != CS_OK) { printf ("Could not initialize Cluster Map API instance error %d. Test skipped\n", err); return (1); } printf ("%s test %d\n", __FUNCTION__, test_n); if (test_n == 2) { /* * Object should not exist */ printf ("%s Testing if object exists (it shouldn't)\n", __FUNCTION__); snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.state", pid); err = cmap_get_string(cmap_handle, key_name, &str); if (err == CS_OK) { printf ("Could find key \"%s\": %d.\n", key_name, err); free(str); return (2); } } if (test_n == 1 || test_n == 2) { printf ("%s: initialize\n", __FUNCTION__); err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT | SAM_RECOVERY_POLICY_CMAP); if (err != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", err); return 2; } printf ("%s: register\n", __FUNCTION__); err = sam_register (&instance_id); if (err != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", err); return 2; } snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.recovery", pid); err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"recovery\" key: %d.\n", err); return (2); } if (strcmp(str, "quit") != 0) { printf ("Recovery key \"%s\" is not \"quit\".\n", key_name); free(str); return (2); } free(str); snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.state", pid); err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "stopped") != 0) { printf ("State key is not \"stopped\".\n"); free(str); return (2); } free(str); printf ("%s iid %d: start\n", __FUNCTION__, instance_id); err = sam_start (); if (err != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", err); return 2; } err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "running") != 0) { printf ("State key is not \"running\".\n"); free(str); return (2); } free(str); printf ("%s iid %d: stop\n", __FUNCTION__, instance_id); err = sam_stop (); if (err != CS_OK) { fprintf (stderr, "Can't stop hc. Error %d\n", err); return 2; } err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "stopped") != 0) { printf ("State key is not \"stopped\".\n"); free(str); return (2); } free(str); printf ("%s iid %d: sleeping 5\n", __FUNCTION__, instance_id); sleep (5); err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "stopped") != 0) { printf ("State key is not \"stopped\".\n"); free(str); return (2); } free(str); printf ("%s iid %d: start 2\n", __FUNCTION__, instance_id); err = sam_start (); if (err != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", err); return 2; } err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "running") != 0) { printf ("State key is not \"running\".\n"); free(str); return (2); } free(str); if (test_n == 2) { printf ("%s iid %d: sleeping 5. Should be killed\n", __FUNCTION__, instance_id); sleep (5); return (2); } else { printf ("%s iid %d: Test HC\n", __FUNCTION__, instance_id); err = sam_hc_send (); if (err != CS_OK) { fprintf (stderr, "Can't send hc. Error %d\n", err); return 2; } snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.last_updated", pid); err = cmap_get_uint64(cmap_handle, key_name, &tstamp1); if (err != CS_OK) { printf ("Could not get \"last_updated\" key: %d.\n", err); return (2); } printf ("%s iid %d: Sleep 1\n", __FUNCTION__, instance_id); sleep (1); err = sam_hc_send (); if (err != CS_OK) { fprintf (stderr, "Can't send hc. Error %d\n", err); return 2; } sleep (1); err = cmap_get_uint64(cmap_handle, key_name, &tstamp2); if (err != CS_OK) { printf ("Could not get \"last_updated\" key: %d.\n", err); return (2); } msec_diff = (tstamp2 - tstamp1)/CS_TIME_NS_IN_MSEC; if (msec_diff < 500 || msec_diff > 2000) { printf ("Difference %d is not within <500, 2000> interval.\n", msec_diff); return (2); } printf ("%s iid %d: stop 2\n", __FUNCTION__, instance_id); err = sam_stop (); if (err != CS_OK) { fprintf (stderr, "Can't stop hc. Error %d\n", err); return 2; } snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.state", pid); err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "stopped") != 0) { printf ("State key is not \"stopped\".\n"); free(str); return (2); } free(str); printf ("%s iid %d: exiting\n", __FUNCTION__, instance_id); return (0); } } if (test_n == 3) { printf ("%s Testing if status is failed\n", __FUNCTION__); /* * Previous should be FAILED */ snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.state", pid); err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "failed") != 0) { printf ("State key is not \"failed\".\n"); free(str); return (2); } free(str); return (0); } return (2); } /* * Test cmap integration + restart policy */ static int test9 (pid_t pid, pid_t old_pid, int test_n) { cs_error_t err; cmap_handle_t cmap_handle; unsigned int instance_id; char *str; char key_name[CMAP_KEYNAME_MAXLEN]; err = cmap_initialize (&cmap_handle); if (err != CS_OK) { printf ("Could not initialize Cluster Map API instance error %d. Test skipped\n", err); return (1); } printf ("%s test %d\n", __FUNCTION__, test_n); if (test_n == 1) { printf ("%s: initialize\n", __FUNCTION__); err = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART | SAM_RECOVERY_POLICY_CMAP); if (err != CS_OK) { fprintf (stderr, "Can't initialize SAM API. Error %d\n", err); return 2; } printf ("%s: register\n", __FUNCTION__); err = sam_register (&instance_id); if (err != CS_OK) { fprintf (stderr, "Can't register. Error %d\n", err); return 2; } printf ("%s: iid %d\n", __FUNCTION__, instance_id); if (instance_id < 3) { snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.recovery", pid); err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"recovery\" key: %d.\n", err); return (2); } if (strcmp(str, "restart") != 0) { printf ("Recovery key \"%s\" is not \"restart\".\n", str); free(str); return (2); } free(str); snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.state", pid); err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "stopped") != 0) { printf ("State key is not \"stopped\".\n"); free(str); return (2); } free(str); printf ("%s iid %d: start\n", __FUNCTION__, instance_id); err = sam_start (); if (err != CS_OK) { fprintf (stderr, "Can't start hc. Error %d\n", err); return 2; } err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "running") != 0) { printf ("State key is not \"running\".\n"); free(str); return (2); } free(str); printf ("%s iid %d: waiting for kill\n", __FUNCTION__, instance_id); sleep (10); return (2); } if (instance_id == 3) { printf ("%s iid %d: mark failed\n", __FUNCTION__, instance_id); - if (err != CS_OK) { - fprintf (stderr, "Can't start hc. Error %d\n", err); - return 2; - } + err = sam_mark_failed (); if (err != CS_OK) { fprintf (stderr, "Can't mark failed. Error %d\n", err); return 2; } sleep (10); return (2); } return (2); } if (test_n == 2) { printf ("%s Testing if status is failed\n", __FUNCTION__); /* * Previous should be FAILED */ snprintf(key_name, CMAP_KEYNAME_MAXLEN, "resources.process.%d.state", pid); err = cmap_get_string(cmap_handle, key_name, &str); if (err != CS_OK) { printf ("Could not get \"state\" key: %d.\n", err); return (2); } if (strcmp(str, "failed") != 0) { printf ("State key is not \"failed\".\n"); free(str); return (2); } free(str); return (0); } return (2); } int main(int argc, char *argv[]) { pid_t pid, old_pid; int err; int stat; int all_passed = 1; int no_skipped = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 1; } if (pid == 0) { err = test1 (); sam_finalize (); return err; } waitpid (pid, &stat, 0); fprintf (stderr, "test1 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : "failed")); if (WEXITSTATUS (stat) != 0) all_passed = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 1; } if (pid == 0) { err = test2 (); sam_finalize (); return (err); } waitpid (pid, &stat, 0); fprintf (stderr, "test2 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : "failed")); if (WEXITSTATUS (stat) != 0) all_passed = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 1; } if (pid == 0) { err = test3 (); sam_finalize (); return (err); } waitpid (pid, &stat, 0); fprintf (stderr, "test3 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : "failed")); if (WEXITSTATUS (stat) != 0) all_passed = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 1; } if (pid == 0) { err = test4 (); sam_finalize (); return (err); } waitpid (pid, &stat, 0); fprintf (stderr, "test4 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : "failed")); if (WEXITSTATUS (stat) != 0) all_passed = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 1; } if (pid == 0) { err = test5 (); sam_finalize (); return (err); } waitpid (pid, &stat, 0); fprintf (stderr, "test5 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : "failed")); if (WEXITSTATUS (stat) != 0) all_passed = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 1; } if (pid == 0) { err = test6 (); sam_finalize (); return (err); } waitpid (pid, &stat, 0); fprintf (stderr, "test6 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : "failed")); if (WEXITSTATUS (stat) != 0) all_passed = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 2; } if (pid == 0) { err = test7 (); sam_finalize (); return (err); } waitpid (pid, &stat, 0); fprintf (stderr, "test7 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed"))); if (WEXITSTATUS (stat) == 1) no_skipped++; if (WEXITSTATUS (stat) > 1) all_passed = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 2; } if (pid == 0) { err = test8 (getpid (), 0, 1); sam_finalize (); return (err); } waitpid (pid, &stat, 0); old_pid = pid; if (WEXITSTATUS (stat) == 0) { pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 2; } if (pid == 0) { err = test8 (getpid (), old_pid, 2); sam_finalize (); return (err); } waitpid (pid, &stat, 0); old_pid = pid; if (WEXITSTATUS (stat) == 0) { pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 2; } if (pid == 0) { err = test8 (old_pid, 0, 3); sam_finalize (); return (err); } waitpid (pid, &stat, 0); } } fprintf (stderr, "test8 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed"))); if (WEXITSTATUS (stat) == 1) no_skipped++; if (WEXITSTATUS (stat) > 1) all_passed = 0; pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 2; } if (pid == 0) { err = test9 (getpid (), 0, 1); sam_finalize (); return (err); } waitpid (pid, &stat, 0); old_pid = pid; if (WEXITSTATUS (stat) == 0) { pid = fork (); if (pid == -1) { fprintf (stderr, "Can't fork\n"); return 2; } if (pid == 0) { err = test9 (old_pid, 0, 2); sam_finalize (); return (err); } waitpid (pid, &stat, 0); } fprintf (stderr, "test9 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed"))); if (WEXITSTATUS (stat) == 1) no_skipped++; if (WEXITSTATUS (stat) > 1) all_passed = 0; if (all_passed) fprintf (stderr, "All tests passed (%d skipped)\n", no_skipped); return (all_passed ? 0 : 1); } diff --git a/tools/corosync-cfgtool.c b/tools/corosync-cfgtool.c index f33caaae..94ffce46 100644 --- a/tools/corosync-cfgtool.c +++ b/tools/corosync-cfgtool.c @@ -1,309 +1,318 @@ /* * Copyright (c) 2006-2013 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define cs_repeat(result, max, code) \ do { \ int counter = 0; \ do { \ result = code; \ if (result == CS_ERR_TRY_AGAIN) { \ sleep(1); \ counter++; \ } else { \ break; \ } \ } while (counter < max); \ } while (0) static int ringstatusget_do (char *interface_name) { cs_error_t result; corosync_cfg_handle_t handle; unsigned int interface_count; char **interface_names; char **interface_status; unsigned int i; unsigned int nodeid; int rc = 0; printf ("Printing ring status.\n"); result = corosync_cfg_initialize (&handle, NULL); if (result != CS_OK) { printf ("Could not initialize corosync configuration API error %d\n", result); exit (1); } result = corosync_cfg_local_get(handle, &nodeid); if (result != CS_OK) { printf ("Could not get the local node id, the error is: %d\n", result); } else { printf ("Local node ID %u\n", nodeid); } result = corosync_cfg_ring_status_get (handle, &interface_names, &interface_status, &interface_count); if (result != CS_OK) { printf ("Could not get the ring status, the error is: %d\n", result); } else { for (i = 0; i < interface_count; i++) { if ( (interface_name && (interface_name[0]=='\0' || strcasecmp (interface_name, interface_names[i]) == 0)) || !interface_name ) { printf ("RING ID %d\n", i); printf ("\tid\t= %s\n", interface_names[i]); printf ("\tstatus\t= %s\n", interface_status[i]); if (strstr(interface_status[i], "FAULTY")) { rc = 1; } } } + + for (i = 0; i < interface_count; i++) { + free(interface_status[i]); + free(interface_names[i]); + } + free(interface_status); + free(interface_names); } + (void)corosync_cfg_finalize (handle); return rc; } static void ringreenable_do (void) { cs_error_t result; corosync_cfg_handle_t handle; printf ("Re-enabling all failed rings.\n"); result = corosync_cfg_initialize (&handle, NULL); if (result != CS_OK) { printf ("Could not initialize corosync configuration API error %d\n", result); exit (1); } result = corosync_cfg_ring_reenable (handle); if (result != CS_OK) { printf ("Could not re-enable ring error %d\n", result); } (void)corosync_cfg_finalize (handle); } static int reload_config_do (void) { cs_error_t result; corosync_cfg_handle_t handle; int rc; rc = 0; printf ("Reloading corosync.conf...\n"); result = corosync_cfg_initialize (&handle, NULL); if (result != CS_OK) { printf ("Could not initialize corosync configuration API error %s\n", cs_strerror(result)); exit (1); } result = corosync_cfg_reload_config (handle); if (result != CS_OK) { printf ("Could not reload configuration. Error %s\n", cs_strerror(result)); rc = (int)result; } else { printf ("Done\n"); } (void)corosync_cfg_finalize (handle); return (rc); } static void shutdown_do(void) { cs_error_t result; corosync_cfg_handle_t handle; corosync_cfg_callbacks_t callbacks; callbacks.corosync_cfg_shutdown_callback = NULL; result = corosync_cfg_initialize (&handle, &callbacks); if (result != CS_OK) { printf ("Could not initialize corosync configuration API error %d\n", result); exit (1); } printf ("Shutting down corosync\n"); cs_repeat(result, 30, corosync_cfg_try_shutdown (handle, COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST)); if (result != CS_OK) { printf ("Could not shutdown (error = %d)\n", result); } (void)corosync_cfg_finalize (handle); } static void showaddrs_do(int nodeid) { cs_error_t result; corosync_cfg_handle_t handle; corosync_cfg_callbacks_t callbacks; int numaddrs; int i; corosync_cfg_node_address_t addrs[INTERFACE_MAX]; result = corosync_cfg_initialize (&handle, &callbacks); if (result != CS_OK) { printf ("Could not initialize corosync configuration API error %d\n", result); exit (1); } if (corosync_cfg_get_node_addrs(handle, nodeid, INTERFACE_MAX, &numaddrs, addrs) == CS_OK) { for (i=0; iss_family == AF_INET6) saddr = &sin6->sin6_addr; else saddr = &sin->sin_addr; inet_ntop(ss->ss_family, saddr, buf, sizeof(buf)); if (i != 0) { printf(" "); } printf("%s", buf); } printf("\n"); } (void)corosync_cfg_finalize (handle); } static void killnode_do(unsigned int nodeid) { cs_error_t result; corosync_cfg_handle_t handle; printf ("Killing node %d\n", nodeid); result = corosync_cfg_initialize (&handle, NULL); if (result != CS_OK) { printf ("Could not initialize corosync configuration API error %d\n", result); exit (1); } result = corosync_cfg_kill_node (handle, nodeid, "Killed by corosync-cfgtool"); if (result != CS_OK) { printf ("Could not kill node (error = %d)\n", result); } (void)corosync_cfg_finalize (handle); } static void usage_do (void) { printf ("corosync-cfgtool [-i ] -s] [-r] [-H] [service_name] [-k] [nodeid] [-a] [nodeid]\n\n"); printf ("A tool for displaying and configuring active parameters within corosync.\n"); printf ("options:\n"); printf ("\t-s\tDisplays the status of the current rings on this node.\n"); printf ("\t-r\tReset redundant ring state cluster wide after a fault to\n"); printf ("\t\tre-enable redundant ring operation.\n"); printf ("\t-a\tDisplay the IP address(es) of a node\n"); printf ("\t-k\tKill a node identified by node id.\n"); printf ("\t-R\tReload corosync.conf on all nodes.\n"); printf ("\t-H\tShutdown corosync cleanly on this node.\n"); } int main (int argc, char *argv[]) { const char *options = "i:srRk:a:hH"; int opt; unsigned int nodeid; char interface_name[128] = ""; int rc=0; if (argc == 1) { usage_do (); } while ( (opt = getopt(argc, argv, options)) != -1 ) { switch (opt) { case 'i': strncpy(interface_name, optarg, sizeof(interface_name)); + interface_name[sizeof(interface_name) - 1] = '\0'; break; case 's': rc = ringstatusget_do (interface_name); break; case 'R': rc = reload_config_do (); break; case 'r': ringreenable_do (); break; case 'k': nodeid = atoi (optarg); killnode_do(nodeid); break; case 'H': shutdown_do(); break; case 'a': showaddrs_do( atoi(optarg) ); break; case 'h': usage_do(); break; } } return (rc); }