diff --git a/exec/main.c b/exec/main.c index 82fb8087..7dc7724b 100644 --- a/exec/main.c +++ b/exec/main.c @@ -1,1429 +1,1429 @@ /* * Copyright (c) 2002-2006 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** * \mainpage Corosync * * This is the doxygen generated developer documentation for the Corosync * project. For more information about Corosync, please see the project * web site, corosync.org. * * \section license License * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "quorum.h" #include "totemsrp.h" #include "logconfig.h" #include "totemconfig.h" #include "main.h" #include "sync.h" #include "timer.h" #include "util.h" #include "apidef.h" #include "service.h" #include "schedwrk.h" #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define IPC_LOGSYS_SIZE 1024*64 #else #define IPC_LOGSYS_SIZE 8192*128 #endif LOGSYS_DECLARE_SYSTEM ("corosync", LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG, LOG_DAEMON, LOG_INFO); LOGSYS_DECLARE_SUBSYS ("MAIN"); #define SERVER_BACKLOG 5 static int sched_priority = 0; static unsigned int service_count = 32; static struct totem_logging_configuration totem_logging_configuration; static struct corosync_api_v1 *api = NULL; static int sync_in_process = 1; static qb_loop_t *corosync_poll_handle; struct sched_param global_sched_param; static corosync_timer_handle_t corosync_stats_timer_handle; static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; static int ip_version = AF_INET; qb_loop_t *cs_poll_handle_get (void) { return (corosync_poll_handle); } int cs_poll_dispatch_add (qb_loop_t * handle, int fd, int events, void *data, int (*dispatch_fn) (int fd, int revents, void *data)) { return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data, dispatch_fn); } int cs_poll_dispatch_delete(qb_loop_t * handle, int fd) { return qb_loop_poll_del(handle, fd); } void corosync_state_dump (void) { int i; for (i = 0; i < SERVICES_COUNT_MAX; i++) { if (corosync_service[i] && corosync_service[i]->exec_dump_fn) { corosync_service[i]->exec_dump_fn (); } } } static void corosync_blackbox_write_to_file (void) { char fname[PATH_MAX]; char fdata_fname[PATH_MAX]; char time_str[PATH_MAX]; struct tm cur_time_tm; time_t cur_time_t; ssize_t res; cur_time_t = time(NULL); localtime_r(&cur_time_t, &cur_time_tm); strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm); snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld", get_run_dir(), time_str, (long long int)getpid()); if ((res = qb_log_blackbox_write_to_file(fname)) < 0) { LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file"); } snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_run_dir()); unlink(fdata_fname); if (symlink(fname, fdata_fname) == -1) { log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'", fname, fdata_fname); } } static void unlink_all_completed (void) { api->timer_delete (corosync_stats_timer_handle); qb_loop_stop (corosync_poll_handle); icmap_fini(); } void corosync_shutdown_request (void) { corosync_service_unlink_all (api, unlink_all_completed); } static int32_t sig_diag_handler (int num, void *data) { corosync_state_dump (); return 0; } static int32_t sig_exit_handler (int num, void *data) { log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal"); corosync_service_unlink_all (api, unlink_all_completed); return 0; } static void sigsegv_handler (int num) { (void)signal (SIGSEGV, SIG_DFL); corosync_blackbox_write_to_file (); qb_log_fini(); raise (SIGSEGV); } /* * QB wrapper for real signal handler */ static int32_t sig_segv_handler (int num, void *data) { sigsegv_handler(num); return 0; } static void sigabrt_handler (int num) { (void)signal (SIGABRT, SIG_DFL); corosync_blackbox_write_to_file (); qb_log_fini(); raise (SIGABRT); } /* * QB wrapper for real signal handler */ static int32_t sig_abrt_handler (int num, void *data) { sigabrt_handler(num); return 0; } #define LOCALHOST_IP inet_addr("127.0.0.1") static void *corosync_group_handle; static struct totempg_group corosync_group = { .group = "a", .group_len = 1 }; static void serialize_lock (void) { } static void serialize_unlock (void) { } static void corosync_sync_completed (void) { log_printf (LOGSYS_LEVEL_NOTICE, "Completed service synchronization, ready to provide service."); sync_in_process = 0; cs_ipcs_sync_state_changed(sync_in_process); cs_ipc_allow_connections(1); /* * Inform totem to start using new message queue again */ totempg_trans_ack(); } static int corosync_sync_callbacks_retrieve ( int service_id, struct sync_callbacks *callbacks) { if (corosync_service[service_id] == NULL) { return (-1); } if (callbacks == NULL) { return (0); } callbacks->name = corosync_service[service_id]->name; callbacks->sync_init = corosync_service[service_id]->sync_init; callbacks->sync_process = corosync_service[service_id]->sync_process; callbacks->sync_activate = corosync_service[service_id]->sync_activate; callbacks->sync_abort = corosync_service[service_id]->sync_abort; return (0); } static struct memb_ring_id corosync_ring_id; static void member_object_joined (unsigned int nodeid) { char member_ip[ICMAP_KEYNAME_MAXLEN]; char member_join_count[ICMAP_KEYNAME_MAXLEN]; char member_status[ICMAP_KEYNAME_MAXLEN]; snprintf(member_ip, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.ip", nodeid); snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid); snprintf(member_status, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.status", nodeid); if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) { icmap_inc(member_join_count); icmap_set_string(member_status, "joined"); } else { icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid)); icmap_set_uint32(member_join_count, 1); icmap_set_string(member_status, "joined"); } log_printf (LOGSYS_LEVEL_DEBUG, "Member joined: %s", api->totem_ifaces_print (nodeid)); } static void member_object_left (unsigned int nodeid) { char member_status[ICMAP_KEYNAME_MAXLEN]; snprintf(member_status, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.status", nodeid); icmap_set_string(member_status, "left"); log_printf (LOGSYS_LEVEL_DEBUG, "Member left: %s", api->totem_ifaces_print (nodeid)); } static void confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int i; int abort_activate = 0; if (sync_in_process == 1) { abort_activate = 1; } sync_in_process = 1; cs_ipcs_sync_state_changed(sync_in_process); memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id)); for (i = 0; i < left_list_entries; i++) { member_object_left (left_list[i]); } for (i = 0; i < joined_list_entries; i++) { member_object_joined (joined_list[i]); } /* * Call configuration change for all services */ for (i = 0; i < service_count; i++) { if (corosync_service[i] && corosync_service[i]->confchg_fn) { corosync_service[i]->confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } } if (abort_activate) { sync_abort (); } if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) { sync_save_transitional (member_list, member_list_entries, ring_id); } if (configuration_type == TOTEM_CONFIGURATION_REGULAR) { sync_start (member_list, member_list_entries, ring_id); } } static void priv_drop (void) { return; /* TODO: we are still not dropping privs */ } static void corosync_tty_detach (void) { int devnull; /* * Disconnect from TTY if this is not a debug run */ switch (fork ()) { case -1: corosync_exit_error (COROSYNC_DONE_FORK); break; case 0: /* * child which is disconnected, run this process */ break; default: exit (0); break; } /* Create new session */ (void)setsid(); /* * Map stdin/out/err to /dev/null. */ devnull = open("/dev/null", O_RDWR); if (devnull == -1) { corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0 || dup2(devnull, 2) < 0) { close(devnull); corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } close(devnull); } static void corosync_mlockall (void) { int res; struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; #ifndef RLIMIT_MEMLOCK #define RLIMIT_MEMLOCK RLIMIT_VMEM #endif setrlimit (RLIMIT_MEMLOCK, &rlimit); res = mlockall (MCL_CURRENT | MCL_FUTURE); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults"); }; } static void corosync_totem_stats_updater (void *data) { totempg_stats_t * stats; uint32_t total_mtt_rx_token; uint32_t total_backlog_calc; uint32_t total_token_holdtime; int t, prev, i; int32_t token_count; char key_name[ICMAP_KEYNAME_MAXLEN]; stats = api->totem_get_stats(); icmap_set_uint32("runtime.totem.pg.msg_reserved", stats->msg_reserved); icmap_set_uint32("runtime.totem.pg.msg_queue_avail", stats->msg_queue_avail); icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_tx", stats->mrp->srp->orf_token_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_rx", stats->mrp->srp->orf_token_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_tx", stats->mrp->srp->memb_merge_detect_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_rx", stats->mrp->srp->memb_merge_detect_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_tx", stats->mrp->srp->memb_join_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_rx", stats->mrp->srp->memb_join_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_tx", stats->mrp->srp->mcast_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_retx", stats->mrp->srp->mcast_retx); icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_rx", stats->mrp->srp->mcast_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_tx", stats->mrp->srp->memb_commit_token_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_rx", stats->mrp->srp->memb_commit_token_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_tx", stats->mrp->srp->token_hold_cancel_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_rx", stats->mrp->srp->token_hold_cancel_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_entered", stats->mrp->srp->operational_entered); icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_token_lost", stats->mrp->srp->operational_token_lost); icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_entered", stats->mrp->srp->gather_entered); icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_token_lost", stats->mrp->srp->gather_token_lost); icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_entered", stats->mrp->srp->commit_entered); icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_token_lost", stats->mrp->srp->commit_token_lost); icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_entered", stats->mrp->srp->recovery_entered); icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_token_lost", stats->mrp->srp->recovery_token_lost); icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts); icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped); icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather); icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures", stats->mrp->srp->continuous_sendmsg_failures); icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0); if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER || stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) { log_printf (LOGSYS_LEVEL_WARNING, "Totem is unable to form a cluster because of an " "operating system or network fault. The most common " "cause of this message is that the local firewall is " "configured improperly."); icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1); } else { icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0); } for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.rrp.%u.faulty", i); icmap_set_uint8(key_name, stats->mrp->srp->rrp->faulty[i]); } total_mtt_rx_token = 0; total_token_holdtime = 0; total_backlog_calc = 0; token_count = 0; t = stats->mrp->srp->latest_token; while (1) { if (t == 0) prev = TOTEM_TOKEN_STATS_MAX - 1; else prev = t - 1; if (prev == stats->mrp->srp->earliest_token) break; /* if tx == 0, then dropped token (not ours) */ if (stats->mrp->srp->token[t].tx != 0 || (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) { total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx); total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx); total_backlog_calc += stats->mrp->srp->token[t].backlog_calc; token_count++; } t = prev; } if (token_count) { icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", (total_mtt_rx_token / token_count)); icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", (total_token_holdtime / token_count)); icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", (total_backlog_calc / token_count)); } cs_ipcs_stats_update(); api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void corosync_totem_stats_init (void) { icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", 0); icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", 0); icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", 0); /* start stats timer */ api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { const struct qb_ipc_request_header *header; int32_t service; int32_t fn_id; uint32_t id; header = msg; if (endian_conversion_required) { id = swab32 (header->id); } else { id = header->id; } /* * Call the proper executive handler */ service = id >> 16; fn_id = id & 0xffff; if (!corosync_service[service]) { return; } if (fn_id >= corosync_service[service]->exec_engine_count) { log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)", fn_id, service, corosync_service[service]->exec_engine_count); return; } icmap_fast_inc(service_stats_rx[service][fn_id]); if (endian_conversion_required) { assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL); corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn ((void *)msg); } corosync_service[service]->exec_engine[fn_id].exec_handler_fn (msg, nodeid); } int main_mcast ( const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee) { const struct qb_ipc_request_header *req = iovec->iov_base; int32_t service; int32_t fn_id; service = req->id >> 16; fn_id = req->id & 0xffff; if (corosync_service[service]) { icmap_fast_inc(service_stats_tx[service][fn_id]); } return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee)); } static void corosync_ring_id_create_or_load ( struct memb_ring_id *memb_ring_id, const struct totem_ip_address *addr) { int fd; int res = 0; char filename[PATH_MAX]; snprintf (filename, sizeof(filename), "%s/ringid_%s", get_run_dir(), totemip_print (addr)); fd = open (filename, O_RDONLY, 0700); /* * If file can be opened and read, read the ring id */ if (fd != -1) { res = read (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); } /* * If file could not be opened or read, create a new ring id */ if ((fd == -1) || (res != sizeof (uint64_t))) { memb_ring_id->seq = 0; umask(0); fd = open (filename, O_CREAT|O_RDWR, 0700); if (fd != -1) { res = write (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR, "Couldn't write ringid file '%s'", filename); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR, "Couldn't create ringid file '%s'", filename); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } totemip_copy(&memb_ring_id->rep, addr); assert (!totemip_zero_check(&memb_ring_id->rep)); } static void corosync_ring_id_store ( const struct memb_ring_id *memb_ring_id, const struct totem_ip_address *addr) { char filename[PATH_MAX]; int fd; int res; snprintf (filename, sizeof(filename), "%s/ringid_%s", get_run_dir(), totemip_print (addr)); fd = open (filename, O_WRONLY, 0700); if (fd == -1) { fd = open (filename, O_CREAT|O_RDWR, 0700); } if (fd == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "Couldn't store new ring id %llx to stable storage", memb_ring_id->seq); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } log_printf (LOGSYS_LEVEL_DEBUG, "Storing new sequence id for ring %llx", memb_ring_id->seq); res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq)); close (fd); if (res != sizeof(memb_ring_id->seq)) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "Couldn't store new ring id %llx to stable storage", memb_ring_id->seq); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } static qb_loop_timer_handle recheck_the_q_level_timer; void corosync_recheck_the_q_level(void *data) { totempg_check_q_level(corosync_group_handle); if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) { qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer); } } struct sending_allowed_private_data_struct { int reserved_msgs; }; int corosync_sending_allowed ( unsigned int service, unsigned int id, const void *msg, void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; struct iovec reserve_iovec; struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; int sending_allowed; reserve_iovec.iov_base = (char *)header; reserve_iovec.iov_len = header->size; pd->reserved_msgs = totempg_groups_joined_reserve ( corosync_group_handle, &reserve_iovec, 1); if (pd->reserved_msgs == -1) { return -EINVAL; } sending_allowed = QB_FALSE; if (corosync_quorum_is_quorate() == 1 || corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { // we are quorate // now check flow control if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs && sync_in_process == 0) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs == 0) { return -ENOBUFS; } else /* (sync_in_process) */ { return -EINPROGRESS; } } else { return -EHOSTUNREACH; } return (sending_allowed); } void corosync_sending_allowed_release (void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; if (pd->reserved_msgs == -1) { return; } totempg_groups_joined_release (pd->reserved_msgs); } int message_source_is_local (const mar_message_source_t *source) { int ret = 0; assert (source != NULL); if (source->nodeid == totempg_my_nodeid_get ()) { ret = 1; } return ret; } void message_source_set ( mar_message_source_t *source, void *conn) { assert ((source != NULL) && (conn != NULL)); memset (source, 0, sizeof (mar_message_source_t)); source->nodeid = totempg_my_nodeid_get (); source->conn = conn; } struct scheduler_pause_timeout_data { struct totem_config *totem_config; qb_loop_timer_handle handle; unsigned long long tv_prev; unsigned long long max_tv_diff; }; static void timer_function_scheduler_timeout (void *data) { struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data; unsigned long long tv_current; unsigned long long tv_diff; tv_current = qb_util_nano_current_get (); if (timeout_data->tv_prev == 0) { /* * Initial call -> just pretent everything is ok */ timeout_data->tv_prev = tv_current; timeout_data->max_tv_diff = 0; } tv_diff = tv_current - timeout_data->tv_prev; timeout_data->tv_prev = tv_current; if (tv_diff > timeout_data->max_tv_diff) { log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms " "(threshold is %0.4f ms). Consider token timeout increase.", (float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC); } /* * Set next threshold, because token_timeout can change */ timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8; qb_loop_timer_add (corosync_poll_handle, QB_LOOP_MED, timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3, timeout_data, timer_function_scheduler_timeout, &timeout_data->handle); } static void corosync_setscheduler (void) { #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER) int res; sched_priority = sched_get_priority_max (SCHED_RR); if (sched_priority != -1) { global_sched_param.sched_priority = sched_priority; res = sched_setscheduler (0, SCHED_RR, &global_sched_param); if (res == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "Could not set SCHED_RR at priority %d", global_sched_param.sched_priority); global_sched_param.sched_priority = 0; #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET qb_log_thread_priority_set (SCHED_OTHER, 0); #endif } else { /* * Turn on SCHED_RR in logsys system */ #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET res = qb_log_thread_priority_set (SCHED_RR, sched_priority); #else res = -1; #endif if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not set logsys thread priority." " Can't continue because of priority inversions."); corosync_exit_error (COROSYNC_DONE_LOGSETUP); } } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not get maximum scheduler priority"); sched_priority = 0; } #else log_printf(LOGSYS_LEVEL_WARNING, "The Platform is missing process priority setting features. Leaving at default."); #endif } /* The basename man page contains scary warnings about thread-safety and portability, hence this */ static const char *corosync_basename(const char *file_name) { char *base; base = strrchr (file_name, '/'); if (base) { return base + 1; } return file_name; } static void _logsys_log_printf(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) __attribute__((format(printf, 6, 7))); static void _logsys_log_printf(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) { va_list ap; va_start(ap, format); qb_log_from_external_source_va(function_name, corosync_basename(file_name), format, level, file_line, subsys, ap); va_end(ap); } static void fplay_key_change_notify_fn ( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) { fprintf(stderr,"Writetofile\n"); corosync_blackbox_write_to_file (); } if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) { fprintf(stderr,"statefump\n"); corosync_state_dump (); } } static void corosync_fplay_control_init (void) { icmap_track_t track = NULL; icmap_set_string("runtime.blackbox.dump_flight_data", "no"); icmap_set_string("runtime.blackbox.dump_state", "no"); icmap_track_add("runtime.blackbox.dump_flight_data", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, fplay_key_change_notify_fn, NULL, &track); icmap_track_add("runtime.blackbox.dump_state", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, fplay_key_change_notify_fn, NULL, &track); } /* * Set RO flag for keys, which ether doesn't make sense to change by user (statistic) * or which when changed are not reflected by runtime (totem.crypto_cipher, ...). * * Also some RO keys cannot be determined in this stage, so they are set later in * other functions (like nodelist.local_node_pos, ...) */ static void set_icmap_ro_keys_flag (void) { /* * Set RO flag for all keys of internal configuration and runtime statistics */ icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.connections.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE); icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE); /* * Set RO flag for constrete keys of configuration which can't be changed * during runtime */ icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE); icmap_set_ro_access("qb.ipc_type", CS_FALSE, CS_TRUE); icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE); icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE); } static void main_service_ready (void) { int res; /* * This must occur after totempg is initialized because "this_ip" must be set */ res = corosync_service_defaults_link_and_init (api); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services"); corosync_exit_error (COROSYNC_DONE_INIT_SERVICES); } cs_ipcs_init(); corosync_totem_stats_init (); corosync_fplay_control_init (); sync_init ( corosync_sync_callbacks_retrieve, corosync_sync_completed); } static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid) { struct flock lock; enum e_corosync_done err; char pid_s[17]; int fd_flag; int lf; err = COROSYNC_DONE_EXIT; lf = open (lockfile, O_WRONLY | O_CREAT, 0640); if (lf == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file."); - return (COROSYNC_DONE_AQUIRE_LOCK); + return (COROSYNC_DONE_ACQUIRE_LOCK); } retry_fcntl: lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; if (fcntl (lf, F_SETLK, &lock) == -1) { switch (errno) { case EINTR: goto retry_fcntl; break; case EAGAIN: case EACCES: log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running."); err = COROSYNC_DONE_ALREADY_RUNNING; goto error_close; break; default: log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s", strerror(errno)); - err = COROSYNC_DONE_AQUIRE_LOCK; + err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close; break; } } if (ftruncate (lf, 0) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s", strerror (errno)); - err = COROSYNC_DONE_AQUIRE_LOCK; + err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } memset (pid_s, 0, sizeof (pid_s)); snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid); retry_write: if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) { if (errno == EINTR) { goto retry_write; } else { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. " "Error was %s", strerror (errno)); - err = COROSYNC_DONE_AQUIRE_LOCK; + err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } } if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. " "Error was %s", strerror (errno)); - err = COROSYNC_DONE_AQUIRE_LOCK; + err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } fd_flag |= FD_CLOEXEC; if (fcntl (lf, F_SETFD, fd_flag) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. " "Error was %s", strerror (errno)); - err = COROSYNC_DONE_AQUIRE_LOCK; + err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } return (err); error_close_unlink: unlink (lockfile); error_close: close (lf); return (err); } int main (int argc, char **argv, char **envp) { const char *error_string; struct totem_config totem_config; int res, ch; int background, setprio, testonly; struct stat stat_out; enum e_corosync_done flock_err; uint64_t totem_config_warnings; struct scheduler_pause_timeout_data scheduler_pause_timeout_data; /* default configuration */ background = 1; setprio = 1; testonly = 0; while ((ch = getopt (argc, argv, "fprtv")) != EOF) { switch (ch) { case 'f': background = 0; break; case 'p': setprio = 0; break; case 'r': setprio = 1; break; case 't': testonly = 1; break; case 'v': printf ("Corosync Cluster Engine, version '%s'\n", VERSION); printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n"); logsys_system_fini(); return EXIT_SUCCESS; break; default: fprintf(stderr, \ "usage:\n"\ " -f : Start application in foreground.\n"\ " -p : Do not set process priority.\n"\ " -t : Test configuration and exit.\n"\ " -r : Set round robin realtime scheduling (default).\n"\ " -v : Display version and SVN revision of Corosync and exit.\n"); logsys_system_fini(); return EXIT_FAILURE; } } /* * Set round robin realtime scheduling with priority 99 * Lock all memory to avoid page faults which may interrupt * application healthchecking */ if (setprio) { corosync_setscheduler (); } corosync_mlockall (); /* * Other signals are registered later via qb_loop_signal_add */ (void)signal (SIGSEGV, sigsegv_handler); (void)signal (SIGABRT, sigabrt_handler); #if MSG_NOSIGNAL != 0 (void)signal (SIGPIPE, SIG_IGN); #endif if (icmap_init() != CS_OK) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component."); corosync_exit_error (COROSYNC_DONE_ICMAP); } set_icmap_ro_keys_flag(); /* * Initialize the corosync_api_v1 definition */ api = apidef_get (); res = coroparse_configparse(icmap_get_global_map(), &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } res = corosync_log_config_read (&error_string); if (res == -1) { /* * if we are here, we _must_ flush the logsys queue * and try to inform that we couldn't read the config. * this is a desperate attempt before certain death * and there is no guarantee that we can print to stderr * nor that logsys is sending the messages where we expect. */ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); fprintf(stderr, "%s", error_string); syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); } if (!testonly) { log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.", VERSION); log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES ""); } /* * Make sure required directory is present */ res = stat (get_run_dir(), &stat_out); if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) { log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.", get_run_dir()); corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT); } res = chdir(get_run_dir()); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to run directory %s. " "Please make sure it has correct context and rights.", get_run_dir()); corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT); } res = totem_config_read (&totem_config, &error_string, &totem_config_warnings); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) { log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored."); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) { log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated."); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) { log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used."); } if (totem_config_warnings != 0) { log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist."); } res = totem_config_keyread (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } res = totem_config_validate (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } if (testonly) { corosync_exit_error (COROSYNC_DONE_EXIT); } ip_version = totem_config.ip_version; totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load; totem_config.totem_memb_ring_id_store = corosync_ring_id_store; totem_config.totem_logging_configuration = totem_logging_configuration; totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem," "totemmrp.c,totemrrp.c,totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c," "totempg.c,totemiba.c,totemudp.c,totemudpu.c,totemnet.c"); totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR; totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE; totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG; totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE; totem_config.totem_logging_configuration.log_printf = _logsys_log_printf; logsys_config_apply(); /* * Now we are fully initialized. */ if (background) { corosync_tty_detach (); } corosync_poll_handle = qb_loop_create (); memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data)); scheduler_pause_timeout_data.totem_config = &totem_config; timer_function_scheduler_timeout (&scheduler_pause_timeout_data); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW, SIGUSR2, NULL, sig_diag_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGINT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGSEGV, NULL, sig_segv_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGABRT, NULL, sig_abrt_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGQUIT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGTERM, NULL, sig_exit_handler, NULL); if (logsys_thread_start() != 0) { log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread"); corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); } if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) { corosync_exit_error (flock_err); } /* * if totempg_initialize doesn't have root priveleges, it cannot * bind to a specific interface. This only matters if * there is more then one interface in a system, so * in this case, only a warning is printed */ /* * Join multicast group and setup delivery * and configuration change functions */ totempg_initialize ( corosync_poll_handle, &totem_config); totempg_service_ready_register ( main_service_ready); totempg_groups_initialize ( &corosync_group_handle, deliver_fn, confchg_fn); totempg_groups_join ( corosync_group_handle, &corosync_group, 1); /* * Drop root privleges to user 'corosync' * TODO: Don't really need full root capabilities; * needed capabilities are: * CAP_NET_RAW (bindtodevice) * CAP_SYS_NICE (setscheduler) * CAP_IPC_LOCK (mlockall) */ priv_drop (); schedwrk_init ( serialize_lock, serialize_unlock); /* * Start main processing loop */ qb_loop_run (corosync_poll_handle); /* * Exit was requested */ totempg_finalize (); /* * free the loop resources */ qb_loop_destroy (corosync_poll_handle); /* * free up the icmap */ /* * Remove pid lock file */ unlink (corosync_lock_file); corosync_exit_error (COROSYNC_DONE_EXIT); return EXIT_SUCCESS; } diff --git a/exec/util.h b/exec/util.h index 9efd35e5..df5eeb40 100644 --- a/exec/util.h +++ b/exec/util.h @@ -1,88 +1,88 @@ /* * Copyright (c) 2002-2004 MontaVista Software, Inc. * Copyright (c) 2004 Open Source Development Lab * Copyright (c) 2006-2011 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com), Mark Haverkamp (markh@osdl.org) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef UTIL_H_DEFINED #define UTIL_H_DEFINED #include #include /** * Get the time of day and convert to nanoseconds */ extern cs_time_t clust_time_now(void); enum e_corosync_done { COROSYNC_DONE_EXIT = 0, COROSYNC_DONE_FORK = 4, COROSYNC_DONE_LOGCONFIGREAD = 7, COROSYNC_DONE_MAINCONFIGREAD = 8, COROSYNC_DONE_LOGSETUP = 9, COROSYNC_DONE_ICMAP = 12, COROSYNC_DONE_INIT_SERVICES = 13, COROSYNC_DONE_FATAL_ERR = 15, COROSYNC_DONE_DIR_NOT_PRESENT = 16, - COROSYNC_DONE_AQUIRE_LOCK = 17, + COROSYNC_DONE_ACQUIRE_LOCK = 17, COROSYNC_DONE_ALREADY_RUNNING = 18, COROSYNC_DONE_STD_TO_NULL_REDIR = 19, COROSYNC_DONE_SERVICE_ENGINE_INIT = 20, COROSYNC_DONE_STORE_RINGID = 21, COROSYNC_DONE_PLOAD = 99 }; /** * Compare two names. returns non-zero on match. */ extern int name_match(cs_name_t *name1, cs_name_t *name2); #define corosync_exit_error(err) _corosync_exit_error ((err), __FILE__, __LINE__) extern void _corosync_exit_error (enum e_corosync_done err, const char *file, unsigned int line) __attribute__((noreturn)); void _corosync_out_of_memory_error (void) __attribute__((noreturn)); extern char *getcs_name_t (cs_name_t *name); extern void setcs_name_t (cs_name_t *name, char *str); extern int cs_name_tisEqual (cs_name_t *str1, char *str2); /** * Get the short name of a service from the service_id. */ const char * short_service_name_get(uint32_t service_id, char *buf, size_t buf_size); /* * Return run directory (ether COROSYNC_RUN_DIR env or LOCALSTATEDIR/lib/corosync) */ const char *get_run_dir(void); #endif /* UTIL_H_DEFINED */ diff --git a/man/corosync-qdevice.8 b/man/corosync-qdevice.8 index fbeb328b..338806ea 100644 --- a/man/corosync-qdevice.8 +++ b/man/corosync-qdevice.8 @@ -1,318 +1,318 @@ .\"/* .\" * Copyright (C) 2016 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Jan Friesse .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of Red Hat, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH COROSYNC-QDEVICE 8 2016-06-29 .SH NAME corosync-qdevice \- QDevice daemon .SH SYNOPSIS .B "corosync-qdevice [-dfh] [-S option=value[,option2=value2,...]]" .SH DESCRIPTION .B corosync-qdevice is a daemon running on each node of a cluster. It provides a configured number of votes to the quorum subsystem based on a third-party arbitrator's decision. Its primary use is to allow a cluster to sustain more node failures than standard quorum rules allow. It is recommended for clusters with an even number of nodes and highly recommended for 2 node clusters. .SH OPTIONS .TP .B -d Forcefully turn on debug information without the need to change corosync.conf. .TP .B -f Do not daemonize, run in the foreground. .TP .B -h Show short help text .TP .B -S Set advanced settings described in its own section below. This option shouldn't be generally used because most of the options are not safe to change. .SH CONFIGURATION .B corosync-qdevice reads its configuration from corosync.conf file. The main configuration is within .B quorum.device sub-key. Each model also has its own configuration within a similarly named sub-key. .TP .B model Specifies the model to be used. This parameter is required. .B corosync-qdevice is modular and is able to support multiple different models. The model basically defines what type of arbitrator is used. Currently only .I net is supported. .TP .B timeout Specifies how often .B corosync-qdevice should call the votequorum_poll function. It is also used by the net model to adjust its hearbeat timeout. It is recommended that you don't change this value. Default is 10000. .TP .B sync_timeout Specifies how often .B corosync-qdevice should call the votequorum_poll function during a sync phase. It is recommended that you don't change this value. Default is 30000. .TP .B votes The number of votes provided to the cluster by qdevice. Default is (number_of_nodes - 1) or generally sum(votes_per_node) - 1. .PP .B quorum.device.net holds the configuration for model 'net'. .TP .B tls Can be one of .I on, off or required and specifies if tls should be used. .I on means a connection with TLS is attempted first, but if the server doesn't advertise TLS support then non-TLS will be used. .I off is used then TLS is not required and it's then not even tried. This mode is the only one which doesn't need a properly initialized NSS database. .I required means TLS is required and if the server doesn't support TLS, qdevice will exit with error message. Default is on. .TP .B host Specifies the IP address or host name of the qnetd server to be used. This parameter is required. .TP .B port Specifies TCP port of qnetd server. Default is 5403. .TP .B algorithm Decision algorithm. Can be one of the .I ffsplit or .I lms. (actually there are also .I test and .I 2nodelms , both of which are mainly for developers and shouldn't be used for production clusters). For a description of what each algorithm means and how the algorithms differ see their individual sections. Default value is ffsplit. .TP .B tie_breaker can be one of .I lowest, .I highest or valid_node_id (number) values. It's used as a fallback if qdevice has to decide between two or more equal partitions. .I lowest means the partition with the lowest node id is chosen. .I highest means the partition with highest node id is chosen. And valid_node_id means that the partition containing the node with the given node id is chosen. Default is 'lowest'. .TP .B connect_timeout Timeout when .B corosync-qdevice is trying to connect to .B corosync-qnetd host. Default is 0.8 * quorum.sync_timeout. .TP .B force_ip_version can be one of .I 0|4|6 and forces the software to use the given IP version. .I 0 -(default value) means IPv6 is prefered and IPv4 should be used as a fallback. +(default value) means IPv6 is preferred and IPv4 should be used as a fallback. .PP Logging configuration is within the .B logging directive. .B corosync-qdevice parses and supports most of the options with exception of .B to_logfile, .B logfile and .B logfile_priority. The .B logger_subsys sub-directive can be also used if .B subsys is set to QDEVICE. .PP For .B corosync-qdevice to work correctly, the .B nodelist directive has to be used and properly configured. Also the net model requires that .B totem.cluster_name option is set. .SH MODEL NET TLS CONFIGURATION For model net to work using TLS, it's necessary to create the NSS database, import Qnetd CA certificate, and get/distribute a valid client certificate. If pcs is used (recommended) the following steps are not needed because pcs does them automatically. .B corosync-qdevice-net-certutil is the tool to perform required actions semi-automatically. Please consult the help output of it and its man page. For a first time configuration it may make sense to start with the .B -Q option. If TLS is not required just edit corosync.conf file and set .B quorum.device.net.tls to .I off. .SH MODEL NET ALGORITHMS Algorithms are used to change behavior of how .B corosync-qnetd provides votes to a given node/partition. Currently there are two algorithms supported. .TP .B ffsplit This one makes sense only for clusters with even number of nodes. It provides exactly one vote to the partition with the highest number of active nodes. If there are two exactly similar partitions, it provides its vote to the partition that has the most clients connected to the qnetd server. If this number is also equal, then the tie_breaker is used. It is able to transition its vote if the currently active partition becomes partitioned and a non-active partition still has at least 50% of the active nodes. Because of this, a vote is not provided if the qnetd connection is not active. To use this algorithm it's required to set the number of votes per node to 1 (default) and the qdevice number of votes has to be also 1. This is achieved by setting .B quorum.device.votes key in corosync.conf file to 1. .TP .B lms Last-man-standing. If the node is the only one left in the cluster that can see the qnetd server then we return a vote. If more than one node can see the qnetd server but some nodes can't see each other then the cluster is divided up into 'partitions' based on their ring_id and this algorithm returns a vote to the largest active partition or, if there is more than 1 equal partiton, the partition that contains the tie_breaker node (lowest, highest, etc). For LMS to work, the number of qdevice votes has to be set to default (so just delete .B quorum.device.votes key from corosync.conf). .SH ADVANCED SETTINGS Set by using .B -S option. The default value is shown in parentheses) Options beginning with .B net_ prefix are specific to model net. .TP .B lock_file Lock file location. (/var/run/corosync-qdevice/corosync-qdevice.pid) .TP .B local_socket_file Internal IPC socket file location. (/var/run/corosync-qdevice/corosync-qdevice.sock) .TP .B local_socket_backlog Parameter passed to listen syscall. (10) .TP .B max_cs_try_again How many times to retry the call to a corosync function which has returned CS_ERR_TRY_AGAIN. (10) .TP .B votequorum_device_name Name used for qdevice registration. (Qdevice) .TP .B ipc_max_clients Maximum allowed simultaneous IPC clients. (10) .TP .B ipc_max_receive_size Maximum size of a message received by IPC client. (4096) .TP .B ipc_max_send_size Maximum size of a message allowed to be sent to an IPC client. (65536) .TP .B master_wins Force enable/disable master wins. (default is model) .TP .B net_nss_db_dir NSS database directory. (/etc/corosync/qdevice/net/nssdb) .TP .B net_initial_msg_receive_size Initial (used during connection parameters negotiation) maximum size of the receive buffer for message (maximum allowed message size received from qnetd). (32768) .TP .B net_initial_msg_send_size Initial (used during connection parameter negotiation) maximum size of one send buffer (message) to be sent to server. (32768) .TP .B net_min_msg_send_size Minimum required size of one send buffer (message) to be sent to server. (32768) .TP .B net_max_msg_receive_size Maximum allowed size of receive buffer for a message sent by server. (16777216) .TP .B net_max_send_buffers Maximum number of send buffers. (10) .TP .B net_nss_qnetd_cn Canonical name of qnetd server certificate. (Qnetd Server) .TP .B net_nss_client_cert_nickname NSS nickname of qdevice client certificate. (Cluster Cert) .TP .B net_heartbeat_interval_min Minimum heartbeat timeout accepted by client in ms. (1000) .TP .B net_heartbeat_interval_max Maximum heartbeat timeout accepted by client in ms. (120000) .TP .B net_min_connect_timeout Minimum connection timeout accepted by client in ms. (1000) .TP .B net_max_connect_timeout Maximum connection timeout accepted by client in ms. (120000) .TP .B net_test_algorithm_enabled Enable test algorithm. (if built with --enable-debug on, otherwise off) .SH SEE ALSO .BR corosync-qdevice-tool (8) .BR corosync-qdevice-net-certutil (8) .BR corosync-qnetd (8) .BR corosync.conf (5) .SH AUTHOR Jan Friesse .PP diff --git a/man/corosync-qnetd.8 b/man/corosync-qnetd.8 index 856e6e18..fa582243 100644 --- a/man/corosync-qnetd.8 +++ b/man/corosync-qnetd.8 @@ -1,227 +1,227 @@ .\"/* .\" * Copyright (C) 2016 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Jan Friesse .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of Red Hat, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH COROSYNC-QNETD 8 2016-06-29 .SH NAME corosync-qnetd \- QNet daemon .SH SYNOPSIS .B "corosync-qnetd [-46dfhv] [-l listen_addr] [-p listen_port] [-s tls] .B [-c client_cert_required] [-m max_clients] [-S option=value[,option2=value2,...]]" .SH DESCRIPTION .B corosync-qnetd is a daemon running outside of the cluster with the purpose of providing a vote to the .B corosync-qdevice model net. It's designed to support multiple clusters and be almost configuration and state free. New clusters are handled dynamically and no configuration file exists. It's also able to run as non-root user - which is recommended. Connection between the .B corosync-qdevice model net client can be optionally configured with TLS client certificate checking. The communication protocol between server and client is designed to be very simple and allow backwards compatibility. .SH OPTIONS .TP .B -4 and its counterpart .B -6 are used to force IPv4 or IPv6 communication. The default is to listen on both address families. .TP .B -d Turn on debug logging. By default the messages sent to syslog are purely operational, this option sends additional debug messages. For even more detail use the .B -d parameter twice. .TP .B -f Do not daemonize, run in the foreground. .TP .B -h Show short help text .TP .B -v Show version and supported communication protocol messages/options. .TP .B -l IP address to listen on. By default the daemon listens on all addresses (wildcard). .TP .B -p TCP port to listen on. Default port is 5403. .TP .B -s Determines if TLS should be used and can be one of .I on/off/required (the default is .I on ). .I on means TLS is enabled but the client is not required to start TLS, .I off means TLS is completely disabled, and .I required means TLS is required. .I on and .I required require the NSS database to be properly initialized by running the .B corosync-qnetd-certutil command. .TP .B -c can be set to .I on/off. This option only makes sense if TLS is enabled. When .B -c is .I on a client is required to send its client certificate (default). .TP .B -m Maximum simultaneous clients. The default is 0 which means no limit. .TP .B -S Set advanced settings described in its own section below. This option shouldn't be generally used because most of the options are not safe to change. .SH UNPRIVILEGED USER CONFIGURATION It's generally recommended to run .B corosync-qnetd as a non root user. If you get a package from a distribution its highly possible that the packager has done all the hard work for you. If the installation is performed from source code, a few steps have to be taken. First it's necessary to create an unprivileged user/group. The following commands can be used (executed as root): .nf # groupadd -r coroqnetd # useradd -r -g coroqnetd -d / -s /sbin/nologin -c "User for corosync-qnetd" coroqnetd .fi The next step is to set the correct owner and group on /etc/corosync/qnetd and /var/run/corosync-qnetd directories. .nf # chown -R coroqnetd:coroqnetd /etc/corosync/qnetd /var/run/corosync-qnetd .fi Some systems have the /var/run directory on a tmpfs file system which gets discarded after a reboot. The solution is to use an initscript which takes care of the /var/run/corosync-qnetd creation and sets the correct owner and permissions. For systems with systemd it's possible to use a tmpfile.d configuration file (installed by default if systemd is enabled during corosync compilation). The last step is to make sure .B corosync-qnetd is really executed as an unprivileged user. For initscript systems it's enough to set the line COROSYNC_QNETD_RUNAS in /etc/(sysconfig|default)/corosync-qnetd file. If the file is not already installed then use the one provided in the corosync source code (init/corosync-qnetd.sysconfig.example). For systemd, overwrite/copy the corosync-qnetd.service unit file and uncomment/change the "User=" directive. .SH TLS CONFIGURATION For TLS to work its necessary to create the NSS database. If pcs is used then the following steps are not needed because pcs does them automatically. .B corosync-qnetd-certutil is the tool to perform required actions. Just run: .nf # corosync-qnetd-certutil -i .fi If TLS is not required then simply edit /etc/(sysconfig|default)/corosync-qnetd or systemd unit file and add the parameter .B -s .I off in the proper place. .SH ADVANCED SETTINGS Set by the .B -S -option. The default value is shown in parantheses. +option. The default value is shown in parentheses. .TP .B listen_backlog Parameter passed to the listen syscall on the network socket. (10) .TP .B max_client_send_buffers Maximum number of send buffers for one client. (32) .TP .B max_client_send_size Maximum size of one send buffer (message) to be sent to a client. (32768) .TP .B max_client_receive_size Maximum size of the receive buffer for a client message (maximum allowed message size received by client). (32768) .TP .B nss_db_dir NSS database directory. (/etc/corosync/qnetd/nssdb) .TP .B cert_nickname NSS nickname of qnetd server certificate. (QNetd Cert) .TP .B heartbeat_interval_min Minimum heartbeat timeout accepted by server in ms. (1000) .TP .B heartbeat_interval_max Maximum heartbeat timeout accepted by server in ms. (120000) .TP .B dpd_enabled Dead peer detection enabled. (on) .TP .B dpd_interval How often the DPD algorithm detects dead peers in ms. (10000) .TP .B lock_file Lock file location. (/var/run/corosync-qnetd/corosync-qnetd.pid) .TP .B local_socket_file Internal IPC socket file location. (/var/run/corosync-qnetd/corosync-qnetd.sock) .TP .B local_socket_backlog Parameter passed to listen syscall on the local socket. (10) .TP .B ipc_max_clients Maximum allowed simultaneous IPC clients. (10) .TP .B ipc_max_receive_size Maximum size of a message received by IPC client. (4096) .TP .B ipc_max_send_size Maximum size of a message sent to an IPC client. (10485760) .SH SEE ALSO .BR corosync-qnetd-tool (8) .BR corosync-qnetd-certutil (8) .BR corosync-qdevice (8) .SH AUTHOR Jan Friesse .PP diff --git a/man/quorum_initialize.3.in b/man/quorum_initialize.3.in index e55c284a..227c2c08 100644 --- a/man/quorum_initialize.3.in +++ b/man/quorum_initialize.3.in @@ -1,113 +1,113 @@ .\"/* .\" * Copyright (c) 2012 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Fabio M. Di Nitto .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of the MontaVista Software, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH QUORUM_INITIALIZE 3 @BUILDDATE@ "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME quorum_initialize \- Create a new connection to the Quorum service .SH SYNOPSIS .B #include .sp .BI "int quorum_initialize(quorum_handle_t *" handle ", quorum_callbacks_t *" callbacks ", uint32_t *" quorum_type ");" .SH DESCRIPTION The .B quorum_initialize function is used to initialize a connection to the quorum API. .PP Each application may have several connections to the quorum API. Each application uses the .I handle argument to uniquely identify the connection. The .I handle argument is then used in other function calls to identify the connection to be used for communication with the quorum service. .PP -Every time the voting configuraton changes (eg a node joins or leave the cluster) or the quorum status change, +Every time the voting configuration changes (eg a node joins or leave the cluster) or the quorum status change, the callback is called. The callback function is described by the following type definitions: .nf typedef void (*quorum_notification_fn_t) ( quorum_handle_t handle, uint32_t quorate, uint64_t ring_seq, uint32_t view_list_entries, uint32_t *view_list ); .fi .PP The .I callbacks argument is of the type: .nf typedef struct { quorum_notification_fn_t quorum_notify_fn; } quorum_callbacks_t; .fi .PP The .I quorum_type argument is set to: .nf #define QUORUM_FREE 0 #define QUORUM_SET 1 .fi .PP .I QUORUM_FREE value means that no quorum algorithm is loaded and that no callbacks will take place. .PP .I QUORUM_SET value means that one quorum algorithm is configured and that callbacks will take place. .PP When a configuration change occurs, the callback is called from the .B quorum_dispatch() function. .PP .SH RETURN VALUE This call returns the CS_OK value if successful, otherwise an error is returned. .PP .SH ERRORS @COMMONIPCERRORS@ .SH "SEE ALSO" .BR quorum_overview (8), .BR quorum_finalize (3), .BR quorum_getquorate (3), .BR quorum_trackstart (3), .BR quorum_trackstop (3), .BR quorum_fd_get (3), .BR quorum_dispatch (3), .BR quorum_context_set (3), .BR quorum_context_get (3) .PP diff --git a/man/quorum_trackstart.3.in b/man/quorum_trackstart.3.in index 044a0480..9e3ff3fc 100644 --- a/man/quorum_trackstart.3.in +++ b/man/quorum_trackstart.3.in @@ -1,77 +1,77 @@ .\"/* .\" * Copyright (c) 2012 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Fabio M. Di Nitto .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of the MontaVista Software, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH QUORUM_TRACKSTART 3 @BUILDDATE@ "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME quorum_trackstart \- Enable callbacks notification. .SH SYNOPSIS .B #include .sp .BI "int quorum_trackstart(quorum_handle_t *" handle ", unsigned int " flags ");" .SH DESCRIPTION The .B quorum_trackstart function is used to enable callbacks notification from the quorum API. .PP -Every time the voting configuraton changes (eg a node joins or leave the cluster) +Every time the voting configuration changes (eg a node joins or leave the cluster) or the quorum status change, the notification is queued. .PP The notification is dispatched via .B quorum_dispatch() function that will execute the callback. .PP The .I flags argument is defined by one or more of the following values and values can be bitwise-or'd .nf #define CS_TRACK_CURRENT 0x01 #define CS_TRACK_CHANGES 0x02 #define CS_TRACK_CHANGES_ONLY 0x04 .fi .SH RETURN VALUE This call returns the CS_OK value if successful, otherwise an error is returned. .PP .SH ERRORS @COMMONIPCERRORS@ .SH "SEE ALSO" .BR quorum_overview (8), .BR quorum_initialize (3), .BR quorum_finalize (3), .BR quorum_getquorate (3), .BR quorum_trackstop (3), .BR quorum_fd_get (3), .BR quorum_dispatch (3), .BR quorum_context_set (3), .BR quorum_context_get (3) .PP diff --git a/man/votequorum_trackstart.3.in b/man/votequorum_trackstart.3.in index b440713c..af3310b0 100644 --- a/man/votequorum_trackstart.3.in +++ b/man/votequorum_trackstart.3.in @@ -1,83 +1,83 @@ .\"/* .\" * Copyright (c) 2012 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Fabio M. Di Nitto .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of the MontaVista Software, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH VOTEQUORUM_TRACKSTART 3 @BUILDDATE@ "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME votequorum_trackstart \- Enable callbacks notification. .SH SYNOPSIS .B #include .sp .BI "int votequorum_trackstart(votequorum_handle_t *" handle ", uint64_t " context ", unsigned int " flags ");" .SH DESCRIPTION The .B votequorum_trackstart function is used to enable callbacks notification from the votequorum API. .PP -Every time the voting configuraton changes (eg a node joins or leave the cluster) +Every time the voting configuration changes (eg a node joins or leave the cluster) or the quorum status change or the expected votes changes, the notification is queued. .PP The notification is dispatched via .B votequorum_dispatch() function that will execute the callback. .PP The .I context option allows one to set a tracking context. .PP The .I flags argument is defined by one or more of the following values and values can be bitwise-or'd .nf #define CS_TRACK_CURRENT 0x01 #define CS_TRACK_CHANGES 0x02 #define CS_TRACK_CHANGES_ONLY 0x04 .fi .SH RETURN VALUE This call returns the CS_OK value if successful, otherwise an error is returned. .PP .SH ERRORS @COMMONIPCERRORS@ .SH "SEE ALSO" .BR votequorum_overview (8), .BR votequorum_initialize (3), .BR votequorum_finalize (3), .BR votequorum_getinfo (3), .BR votequorum_trackstop (3), .BR votequorum_fd_get (3), .BR votequorum_dispatch (3), .BR votequorum_context_set (3), .BR votequorum_context_get (3), .BR votequorum_setexpected (3), .BR votequorum_setvotes (3) .PP diff --git a/qdevices/corosync-qdevice.c b/qdevices/corosync-qdevice.c index fd932f10..9916c69b 100644 --- a/qdevices/corosync-qdevice.c +++ b/qdevices/corosync-qdevice.c @@ -1,261 +1,261 @@ /* * Copyright (c) 2015-2016 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "dynar.h" #include "dynar-str.h" #include "dynar-getopt-lex.h" #include "qdevice-advanced-settings.h" #include "qdevice-config.h" #include "qdevice-cmap.h" #include "qdevice-ipc.h" #include "qdevice-log.h" #include "qdevice-model.h" #include "qdevice-votequorum.h" #include "utils.h" struct qdevice_instance *global_instance; static void signal_int_handler(int sig) { qdevice_log(LOG_DEBUG, "SIGINT received - closing local unix socket"); qdevice_ipc_close(global_instance); } static void signal_term_handler(int sig) { qdevice_log(LOG_DEBUG, "SIGTERM received - closing server socket"); qdevice_ipc_close(global_instance); } static void signal_handlers_register(void) { struct sigaction act; act.sa_handler = signal_int_handler; sigemptyset(&act.sa_mask); act.sa_flags = SA_RESTART; sigaction(SIGINT, &act, NULL); act.sa_handler = signal_term_handler; sigemptyset(&act.sa_mask); act.sa_flags = SA_RESTART; sigaction(SIGTERM, &act, NULL); } static void usage(void) { printf("usage: %s [-dfh] [-S option=value[,option2=value2,...]]\n", QDEVICE_PROGRAM_NAME); } static void cli_parse_long_opt(struct qdevice_advanced_settings *advanced_settings, const char *long_opt) { struct dynar_getopt_lex lex; struct dynar dynar_long_opt; const char *opt; const char *val; int res; dynar_init(&dynar_long_opt, strlen(long_opt) + 1); if (dynar_str_cpy(&dynar_long_opt, long_opt) != 0) { errx(1, "Can't alloc memory for long option"); } dynar_getopt_lex_init(&lex, &dynar_long_opt); while (dynar_getopt_lex_token_next(&lex) == 0 && strcmp(dynar_data(&lex.option), "") != 0) { opt = dynar_data(&lex.option); val = dynar_data(&lex.value); res = qdevice_advanced_settings_set(advanced_settings, opt, val); switch (res) { case -1: errx(1, "Unknown option '%s'", opt); break; case -2: errx(1, "Invalid value '%s' for option '%s'", val, opt); break; } } dynar_getopt_lex_destroy(&lex); dynar_destroy(&dynar_long_opt); } static void cli_parse(int argc, char * const argv[], int *foreground, int *force_debug, struct qdevice_advanced_settings *advanced_settings) { int ch; *foreground = 0; *force_debug = 0; while ((ch = getopt(argc, argv, "dfhS:")) != -1) { switch (ch) { case 'd': *force_debug = 1; break; case 'f': *foreground = 1; break; case 'S': cli_parse_long_opt(advanced_settings, optarg); break; case 'h': case '?': usage(); exit(1); break; } } } int main(int argc, char * const argv[]) { struct qdevice_instance instance; struct qdevice_advanced_settings advanced_settings; int foreground; int force_debug; int lock_file; int another_instance_running; if (qdevice_advanced_settings_init(&advanced_settings) != 0) { errx(1, "Can't alloc memory for advanced settings"); } cli_parse(argc, argv, &foreground, &force_debug, &advanced_settings); qdevice_instance_init(&instance, &advanced_settings); qdevice_cmap_init(&instance); qdevice_log_init(&instance, force_debug); /* * Daemonize */ if (!foreground) { utils_tty_detach(); } if ((lock_file = utils_flock(advanced_settings.lock_file, getpid(), &another_instance_running)) == -1) { if (another_instance_running) { qdevice_log(LOG_ERR, "Another instance is running"); } else { - qdevice_log_err(LOG_ERR, "Can't aquire lock"); + qdevice_log_err(LOG_ERR, "Can't acquire lock"); } exit(1); } qdevice_log(LOG_DEBUG, "Initializing votequorum"); qdevice_votequorum_init(&instance); qdevice_log(LOG_DEBUG, "Initializing local socket"); if (qdevice_ipc_init(&instance) != 0) { return (1); } qdevice_log(LOG_DEBUG, "Registering qdevice models"); qdevice_model_register_all(); qdevice_log(LOG_DEBUG, "Configuring qdevice"); if (qdevice_instance_configure_from_cmap(&instance) != 0) { return (1); } qdevice_log(LOG_DEBUG, "Configuring master_wins"); if (qdevice_votequorum_master_wins(&instance, (advanced_settings.master_wins == QDEVICE_ADVANCED_SETTINGS_MASTER_WINS_FORCE_ON ? 1 : 0)) != 0) { return (1); } qdevice_log(LOG_DEBUG, "Getting configuration node list"); if (qdevice_cmap_store_config_node_list(&instance) != 0) { return (1); } qdevice_log(LOG_DEBUG, "Initializing qdevice model"); if (qdevice_model_init(&instance) != 0) { return (1); } qdevice_log(LOG_DEBUG, "Initializing cmap tracking"); if (qdevice_cmap_add_track(&instance) != 0) { return (1); } qdevice_log(LOG_DEBUG, "Waiting for ring id"); if (qdevice_votequorum_wait_for_ring_id(&instance) != 0) { return (1); } global_instance = &instance; signal_handlers_register(); qdevice_log(LOG_DEBUG, "Running qdevice model"); if (qdevice_model_run(&instance) != 0) { return (1); } qdevice_log(LOG_DEBUG, "Removing cmap tracking"); if (qdevice_cmap_del_track(&instance) != 0) { return (1); } qdevice_log(LOG_DEBUG, "Destorying qdevice model"); qdevice_model_destroy(&instance); qdevice_ipc_destroy(&instance); qdevice_votequorum_destroy(&instance); qdevice_cmap_destroy(&instance); qdevice_log_close(&instance); qdevice_instance_destroy(&instance); qdevice_advanced_settings_destroy(&advanced_settings); return (0); } diff --git a/qdevices/qdevice-instance.c b/qdevices/qdevice-instance.c index 862e0029..75bd5372 100644 --- a/qdevices/qdevice-instance.c +++ b/qdevices/qdevice-instance.c @@ -1,99 +1,99 @@ /* * Copyright (c) 2015-2016 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include "qdevice-instance.h" #include "qdevice-log.h" #include "qdevice-model.h" int qdevice_instance_init(struct qdevice_instance *instance, const struct qdevice_advanced_settings *advanced_settings) { memset(instance, 0, sizeof(*instance)); node_list_init(&instance->config_node_list); instance->vq_last_poll = ((time_t) -1); instance->advanced_settings = advanced_settings; return (0); } int qdevice_instance_destroy(struct qdevice_instance *instance) { node_list_free(&instance->config_node_list); return (0); } int qdevice_instance_configure_from_cmap(struct qdevice_instance *instance) { char *str; if (cmap_get_string(instance->cmap_handle, "quorum.device.model", &str) != CS_OK) { qdevice_log(LOG_ERR, "Can't read quorum.device.model cmap key."); return (-1); } if (qdevice_model_str_to_type(str, &instance->model_type) != 0) { qdevice_log(LOG_ERR, "Configured device model %s is not supported.", str); free(str); return (-1); } free(str); if (cmap_get_uint32(instance->cmap_handle, "runtime.votequorum.this_node_id", &instance->node_id) != CS_OK) { - qdevice_log(LOG_ERR, "Unable to retrive this node nodeid."); + qdevice_log(LOG_ERR, "Unable to retrieve this node nodeid."); return (-1); } if (cmap_get_uint32(instance->cmap_handle, "quorum.device.timeout", &instance->heartbeat_interval) != CS_OK) { instance->heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT; } if (cmap_get_uint32(instance->cmap_handle, "quorum.device.sync_timeout", &instance->sync_heartbeat_interval) != CS_OK) { instance->sync_heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT; } return (0); } diff --git a/qdevices/qdevice-net-msg-received.c b/qdevices/qdevice-net-msg-received.c index b5c2f219..f9148305 100644 --- a/qdevices/qdevice-net-msg-received.c +++ b/qdevices/qdevice-net-msg-received.c @@ -1,942 +1,942 @@ /* * Copyright (c) 2015-2016 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include "qdevice-log.h" #include "qdevice-net-algorithm.h" #include "qdevice-net-cast-vote-timer.h" #include "qdevice-net-msg-received.h" #include "qdevice-net-send.h" #include "qdevice-net-votequorum.h" #include "qdevice-net-echo-request-timer.h" #include "msg.h" #include "utils.h" /* * -1 - Incompatible tls combination * 0 - Don't use TLS * 1 - Use TLS */ static int qdevice_net_msg_received_check_tls_compatibility(enum tlv_tls_supported server_tls, enum tlv_tls_supported client_tls) { int res; res = -1; switch (server_tls) { case TLV_TLS_UNSUPPORTED: switch (client_tls) { case TLV_TLS_UNSUPPORTED: res = 0; break; case TLV_TLS_SUPPORTED: res = 0; break; case TLV_TLS_REQUIRED: res = -1; break; } break; case TLV_TLS_SUPPORTED: switch (client_tls) { case TLV_TLS_UNSUPPORTED: res = 0; break; case TLV_TLS_SUPPORTED: res = 1; break; case TLV_TLS_REQUIRED: res = 1; break; } break; case TLV_TLS_REQUIRED: switch (client_tls) { case TLV_TLS_UNSUPPORTED: res = -1; break; case TLV_TLS_SUPPORTED: res = 1; break; case TLV_TLS_REQUIRED: res = 1; break; } break; } return (res); } static void qdevice_net_msg_received_log_msg_decode_error(int ret) { switch (ret) { case -1: qdevice_log(LOG_WARNING, "Received message with option with invalid length"); break; case -2: qdevice_log(LOG_CRIT, "Can't allocate memory"); break; case -3: qdevice_log(LOG_WARNING, "Received inconsistent msg (tlv len > msg size)"); break; case -4: qdevice_log(LOG_ERR, "Received message with option with invalid value"); break; default: - qdevice_log(LOG_ERR, "Unknown error occured when decoding message"); + qdevice_log(LOG_ERR, "Unknown error occurred when decoding message"); break; } } static int qdevice_net_msg_received_unexpected_msg(struct qdevice_net_instance *instance, const struct msg_decoded *msg, const char *msg_str) { qdevice_log(LOG_ERR, "Received unexpected %s message. Disconnecting from server", msg_str); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG; return (-1); } static int qdevice_net_msg_received_init(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { return (qdevice_net_msg_received_unexpected_msg(instance, msg, "init")); } static int qdevice_net_msg_received_preinit(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { return (qdevice_net_msg_received_unexpected_msg(instance, msg, "preinit")); } static int qdevice_net_msg_check_seq_number(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { if (!msg->seq_number_set || msg->seq_number != instance->last_msg_seq_num) { qdevice_log(LOG_ERR, "Received message doesn't contain seq_number or " "it's not expected one."); return (-1); } return (0); } static int qdevice_net_msg_received_preinit_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { int res; struct send_buffer_list_entry *send_buffer; qdevice_log(LOG_DEBUG, "Received preinit reply msg"); if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_PREINIT_REPLY) { qdevice_log(LOG_ERR, "Received unexpected preinit reply message. " "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG; return (-1); } if (qdevice_net_msg_check_seq_number(instance, msg) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } /* * Check TLS support */ if (!msg->tls_supported_set || !msg->tls_client_cert_required_set) { qdevice_log(LOG_ERR, "Required tls_supported or tls_client_cert_required " "option is unset"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } res = qdevice_net_msg_received_check_tls_compatibility(msg->tls_supported, instance->tls_supported); if (res == -1) { qdevice_log(LOG_ERR, "Incompatible tls configuration (server %u client %u)", msg->tls_supported, instance->tls_supported); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_INCOMPATIBLE_TLS; return (-1); } else if (res == 1) { /* * Start TLS */ send_buffer = send_buffer_list_get_new(&instance->send_buffer_list); if (send_buffer == NULL) { qdevice_log(LOG_ERR, "Can't allocate send list buffer for " "starttls msg"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER; return (-1); } instance->last_msg_seq_num++; if (msg_create_starttls(&send_buffer->buffer, 1, instance->last_msg_seq_num) == 0) { qdevice_log(LOG_ERR, "Can't allocate send buffer for starttls msg"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER; send_buffer_list_discard_new(&instance->send_buffer_list, send_buffer); return (-1); } send_buffer_list_put(&instance->send_buffer_list, send_buffer); instance->state = QDEVICE_NET_INSTANCE_STATE_WAITING_STARTTLS_BEING_SENT; } else if (res == 0) { if (qdevice_net_send_init(instance) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER; return (-1); } } return (0); } static int qdevice_net_msg_received_init_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { size_t zi; int res; int send_config_node_list; int send_membership_node_list; int send_quorum_node_list; enum tlv_vote vote; struct tlv_ring_id tlv_rid; enum tlv_quorate quorate; qdevice_log(LOG_DEBUG, "Received init reply msg"); if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_INIT_REPLY) { qdevice_log(LOG_ERR, "Received unexpected init reply message. " "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG; return (-1); } if (qdevice_net_msg_check_seq_number(instance, msg) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } if (!msg->reply_error_code_set) { qdevice_log(LOG_ERR, "Received init reply message without error code." "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } if (msg->reply_error_code != TLV_REPLY_ERROR_CODE_NO_ERROR) { qdevice_log(LOG_ERR, "Received init reply message with error code %"PRIu16". " "Disconnecting from server", msg->reply_error_code); if (msg->reply_error_code == TLV_REPLY_ERROR_CODE_DUPLICATE_NODE_ID) { qdevice_log(LOG_ERR, "Duplicate node id may be result of server not yet " "accepted this node disconnect. Retry again."); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_DUPLICATE_NODE_ID_ERROR; } else if (msg->reply_error_code == TLV_REPLY_ERROR_CODE_TIE_BREAKER_DIFFERS_FROM_OTHER_NODES) { qdevice_log(LOG_ERR, "Configured tie-breaker differs in cluster. This may be " "result of server not yet accepted this node disconnect. Retry again."); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_TIE_BREAKER_DIFFERS_FROM_OTHER_NODES_ERROR; } else if (msg->reply_error_code == TLV_REPLY_ERROR_CODE_ALGORITHM_DIFFERS_FROM_OTHER_NODES) { qdevice_log(LOG_ERR, "Configured algorithm differs in cluster. This may be " "result of server not yet accepted this node disconnect. Retry again."); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_ALGORITHM_DIFFERS_FROM_OTHER_NODES_ERROR; } else { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_ERROR; } return (-1); } if (!msg->server_maximum_request_size_set || !msg->server_maximum_reply_size_set) { qdevice_log(LOG_ERR, "Required maximum_request_size or maximum_reply_size " "option is unset"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } if (msg->supported_messages == NULL || msg->supported_options == NULL) { qdevice_log(LOG_ERR, "Required supported messages or supported options " "option is unset"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } if (msg->supported_decision_algorithms == NULL) { qdevice_log(LOG_ERR, "Required supported decision algorithms option is unset"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } if (msg->server_maximum_request_size < instance->advanced_settings->net_min_msg_send_size) { qdevice_log(LOG_ERR, "Server accepts maximum %zu bytes message but this client minimum " "is %zu bytes.", msg->server_maximum_request_size, instance->advanced_settings->net_min_msg_send_size); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_INCOMPATIBLE_MSG_SIZE; return (-1); } if (msg->server_maximum_reply_size > instance->advanced_settings->net_max_msg_receive_size) { qdevice_log(LOG_ERR, "Server may send message up to %zu bytes message but this client maximum " "is %zu bytes.", msg->server_maximum_reply_size, instance->advanced_settings->net_max_msg_receive_size); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_INCOMPATIBLE_MSG_SIZE; return (-1); } /* * Change buffer sizes */ dynar_set_max_size(&instance->receive_buffer, msg->server_maximum_reply_size); send_buffer_list_set_max_buffer_size(&instance->send_buffer_list, msg->server_maximum_request_size); /* * Check if server supports decision algorithm we need */ res = 0; for (zi = 0; zi < msg->no_supported_decision_algorithms && !res; zi++) { if (msg->supported_decision_algorithms[zi] == instance->decision_algorithm) { res = 1; } } if (!res) { qdevice_log(LOG_ERR, "Server doesn't support required decision algorithm"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_DOESNT_SUPPORT_REQUIRED_ALGORITHM; return (-1); } /* * Finally fully connected so it's possible to remove connection timer */ if (instance->connect_timer != NULL) { timer_list_delete(&instance->main_timer_list, instance->connect_timer); instance->connect_timer = NULL; } /* * Server accepted heartbeat interval -> schedule regular sending of echo request */ qdevice_net_echo_request_timer_schedule(instance); send_config_node_list = 1; send_membership_node_list = 1; send_quorum_node_list = 1; vote = TLV_VOTE_WAIT_FOR_REPLY; if (qdevice_net_algorithm_connected(instance, &send_config_node_list, &send_membership_node_list, &send_quorum_node_list, &vote) != 0) { qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting."); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_CONNECTED_ERR; return (-1); } else { qdevice_log(LOG_DEBUG, "Algorithm decided to %s config node list, %s membership " "node list, %s quorum node list and result vote is %s", (send_config_node_list ? "send" : "not send"), (send_membership_node_list ? "send" : "not send"), (send_quorum_node_list ? "send" : "not send"), tlv_vote_to_str(vote)); } /* * Now we can finally really send node list, votequorum node list and update timer */ if (send_config_node_list) { if (qdevice_net_send_config_node_list(instance, &instance->qdevice_instance_ptr->config_node_list, instance->qdevice_instance_ptr->config_node_list_version_set, instance->qdevice_instance_ptr->config_node_list_version, 1) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER; return (-1); } } if (send_membership_node_list) { qdevice_net_votequorum_ring_id_to_tlv(&tlv_rid, &instance->qdevice_instance_ptr->vq_node_list_ring_id); if (qdevice_net_send_membership_node_list(instance, &tlv_rid, instance->qdevice_instance_ptr->vq_node_list_entries, instance->qdevice_instance_ptr->vq_node_list) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER; return (-1); } } if (send_quorum_node_list) { quorate = (instance->qdevice_instance_ptr->vq_quorum_quorate ? TLV_QUORATE_QUORATE : TLV_QUORATE_INQUORATE); if (qdevice_net_send_quorum_node_list(instance, quorate, instance->qdevice_instance_ptr->vq_quorum_node_list_entries, instance->qdevice_instance_ptr->vq_quorum_node_list) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER; return (-1); } } if (qdevice_net_cast_vote_timer_update(instance, vote) != 0) { qdevice_log(LOG_CRIT, "qdevice_net_msg_received_set_option_reply fatal error. " " Can't update cast vote timer vote"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER; } instance->state = QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS; instance->connected_since_time = time(NULL); return (0); } static int qdevice_net_msg_received_starttls(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { return (qdevice_net_msg_received_unexpected_msg(instance, msg, "starttls")); } static int qdevice_net_msg_received_server_error(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { if (!msg->reply_error_code_set) { qdevice_log(LOG_ERR, "Received server error without error code set. " "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; } else { qdevice_log(LOG_ERR, "Received server error %"PRIu16". " "Disconnecting from server", msg->reply_error_code); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_ERROR; } return (-1); } static int qdevice_net_msg_received_set_option(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { return (qdevice_net_msg_received_unexpected_msg(instance, msg, "set option")); } static int qdevice_net_msg_received_set_option_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) { qdevice_log(LOG_ERR, "Received unexpected set option reply message. " "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG; return (-1); } if (qdevice_net_msg_check_seq_number(instance, msg) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } qdevice_net_echo_request_timer_schedule(instance); return (0); } static int qdevice_net_msg_received_echo_request(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { return (qdevice_net_msg_received_unexpected_msg(instance, msg, "echo request")); } static int qdevice_net_msg_received_echo_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { if (!msg->seq_number_set) { qdevice_log(LOG_ERR, "Received echo reply message doesn't contain seq_number."); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } if (msg->seq_number != instance->echo_request_expected_msg_seq_num) { qdevice_log(LOG_WARNING, "Received echo reply message seq_number is not expected one."); } if (qdevice_net_algorithm_echo_reply_received(instance, msg->seq_number, msg->seq_number == instance->echo_request_expected_msg_seq_num) != 0) { qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_ECHO_REPLY_RECEIVED_ERR; return (-1); } instance->echo_reply_received_msg_seq_num = msg->seq_number; instance->last_echo_reply_received_time = time(NULL); return (0); } static int qdevice_net_msg_received_node_list(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { return (qdevice_net_msg_received_unexpected_msg(instance, msg, "node list")); } static int qdevice_net_msg_received_node_list_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { const char *str; enum tlv_vote result_vote; int res; int case_processed; int ring_id_is_valid; if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) { qdevice_log(LOG_ERR, "Received unexpected node list reply message. " "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG; return (-1); } if (!msg->vote_set || !msg->seq_number_set || !msg->node_list_type_set) { qdevice_log(LOG_ERR, "Received node list reply message without " "required options. Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } if (!msg->ring_id_set) { qdevice_log(LOG_ERR, "Received node list reply message " "without ring id set. Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } str = NULL; switch (msg->node_list_type) { case TLV_NODE_LIST_TYPE_INITIAL_CONFIG: str = "initial config"; break; case TLV_NODE_LIST_TYPE_CHANGED_CONFIG: str = "changed config"; break; case TLV_NODE_LIST_TYPE_MEMBERSHIP: str ="membership"; break; case TLV_NODE_LIST_TYPE_QUORUM: str ="quorum"; break; /* * Default is not defined intentionally. Compiler shows warning when new node list type * is added */ } if (str == NULL) { qdevice_log(LOG_CRIT, "qdevice_net_msg_received_node_list_reply fatal error. " "Unhandled node_list_type (debug output)"); exit(1); } qdevice_log(LOG_DEBUG, "Received %s node list reply", str); qdevice_log(LOG_DEBUG, " seq = "UTILS_PRI_MSG_SEQ, msg->seq_number); qdevice_log(LOG_DEBUG, " vote = %s", tlv_vote_to_str(msg->vote)); qdevice_log(LOG_DEBUG, " ring id = ("UTILS_PRI_RING_ID")", msg->ring_id.node_id, msg->ring_id.seq); /* * Call algorithm */ result_vote = msg->vote; if (!tlv_ring_id_eq(&msg->ring_id, &instance->last_sent_ring_id)) { ring_id_is_valid = 0; qdevice_log(LOG_DEBUG, "Received node list reply with old ring id."); } else { ring_id_is_valid = 1; } case_processed = 0; switch (msg->node_list_type) { case TLV_NODE_LIST_TYPE_INITIAL_CONFIG: case TLV_NODE_LIST_TYPE_CHANGED_CONFIG: case_processed = 1; res = qdevice_net_algorithm_config_node_list_reply_received(instance, msg->seq_number, (msg->node_list_type == TLV_NODE_LIST_TYPE_INITIAL_CONFIG), &msg->ring_id, ring_id_is_valid, &result_vote); break; case TLV_NODE_LIST_TYPE_MEMBERSHIP: case_processed = 1; res = qdevice_net_algorithm_membership_node_list_reply_received(instance, msg->seq_number, &msg->ring_id, ring_id_is_valid, &result_vote); break; case TLV_NODE_LIST_TYPE_QUORUM: case_processed = 1; res = qdevice_net_algorithm_quorum_node_list_reply_received(instance, msg->seq_number, &msg->ring_id, ring_id_is_valid, &result_vote); break; /* * Default is not defined intentionally. Compiler shows warning when new node list type * is added */ } if (!case_processed) { qdevice_log(LOG_CRIT, "qdevice_net_msg_received_node_list_reply fatal error. " "Unhandled node_list_type (algorithm call)"); exit(1); } if (res != 0) { qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting."); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_NODE_LIST_REPLY_ERR; return (-1); } else { qdevice_log(LOG_DEBUG, "Algorithm result vote is %s", tlv_vote_to_str(result_vote)); } if (qdevice_net_cast_vote_timer_update(instance, result_vote) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER; return (-1); } return (0); } static int qdevice_net_msg_received_ask_for_vote(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { return (qdevice_net_msg_received_unexpected_msg(instance, msg, "ask for vote")); } static int qdevice_net_msg_received_ask_for_vote_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { enum tlv_vote result_vote; int ring_id_is_valid; if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) { qdevice_log(LOG_ERR, "Received unexpected ask for vote reply message. " "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG; return (-1); } if (!msg->vote_set || !msg->seq_number_set || !msg->ring_id_set) { qdevice_log(LOG_ERR, "Received node list reply message without " "required options. Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } qdevice_log(LOG_DEBUG, "Received ask for vote reply"); qdevice_log(LOG_DEBUG, " seq = "UTILS_PRI_MSG_SEQ, msg->seq_number); qdevice_log(LOG_DEBUG, " vote = %s", tlv_vote_to_str(msg->vote)); qdevice_log(LOG_DEBUG, " ring id = ("UTILS_PRI_RING_ID")", msg->ring_id.node_id, msg->ring_id.seq); result_vote = msg->vote; if (!tlv_ring_id_eq(&msg->ring_id, &instance->last_sent_ring_id)) { ring_id_is_valid = 0; qdevice_log(LOG_DEBUG, "Received ask for vote reply with old ring id."); } else { ring_id_is_valid = 1; } if (qdevice_net_algorithm_ask_for_vote_reply_received(instance, msg->seq_number, &msg->ring_id, ring_id_is_valid, &result_vote) != 0) { qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting."); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_ASK_FOR_VOTE_REPLY_ERR; return (-1); } else { qdevice_log(LOG_DEBUG, "Algorithm result vote is %s", tlv_vote_to_str(result_vote)); } if (qdevice_net_cast_vote_timer_update(instance, result_vote) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER; return (-1); } return (0); } static int qdevice_net_msg_received_vote_info(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { struct send_buffer_list_entry *send_buffer; enum tlv_vote result_vote; int ring_id_is_valid; if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) { qdevice_log(LOG_ERR, "Received unexpected vote info message. " "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG; return (-1); } if (!msg->vote_set || !msg->seq_number_set || !msg->ring_id_set) { qdevice_log(LOG_ERR, "Received node list reply message without " "required options. Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING; return (-1); } qdevice_log(LOG_DEBUG, "Received vote info"); qdevice_log(LOG_DEBUG, " seq = "UTILS_PRI_MSG_SEQ, msg->seq_number); qdevice_log(LOG_DEBUG, " vote = %s", tlv_vote_to_str(msg->vote)); qdevice_log(LOG_DEBUG, " ring id = ("UTILS_PRI_RING_ID")", msg->ring_id.node_id, msg->ring_id.seq); result_vote = msg->vote; if (!tlv_ring_id_eq(&msg->ring_id, &instance->last_sent_ring_id)) { ring_id_is_valid = 0; qdevice_log(LOG_DEBUG, "Received vote info with old ring id."); } else { ring_id_is_valid = 1; } if (qdevice_net_algorithm_vote_info_received(instance, msg->seq_number, &msg->ring_id, ring_id_is_valid, &result_vote) != 0) { qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting."); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_VOTE_INFO_ERR; return (-1); } else { qdevice_log(LOG_DEBUG, "Algorithm result vote is %s", tlv_vote_to_str(result_vote)); } if (qdevice_net_cast_vote_timer_update(instance, result_vote) != 0) { instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER; return (-1); } /* * Create reply message */ send_buffer = send_buffer_list_get_new(&instance->send_buffer_list); if (send_buffer == NULL) { qdevice_log(LOG_ERR, "Can't allocate send list buffer for " "vote info reply msg"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER; return (-1); } if (msg_create_vote_info_reply(&send_buffer->buffer, msg->seq_number) == 0) { qdevice_log(LOG_ERR, "Can't allocate send buffer for " "vote info reply list msg"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER; send_buffer_list_discard_new(&instance->send_buffer_list, send_buffer); return (-1); } send_buffer_list_put(&instance->send_buffer_list, send_buffer); return (0); } static int qdevice_net_msg_received_vote_info_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg) { return (qdevice_net_msg_received_unexpected_msg(instance, msg, "vote info reply")); } int qdevice_net_msg_received(struct qdevice_net_instance *instance) { struct msg_decoded msg; int res; int ret_val; int msg_processed; msg_decoded_init(&msg); res = msg_decode(&instance->receive_buffer, &msg); if (res != 0) { /* * Error occurred. Disconnect. */ qdevice_net_msg_received_log_msg_decode_error(res); qdevice_log(LOG_ERR, "Disconnecting from server"); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_MSG_DECODE_ERROR; return (-1); } ret_val = 0; msg_processed = 0; switch (msg.type) { case MSG_TYPE_INIT: msg_processed = 1; ret_val = qdevice_net_msg_received_init(instance, &msg); break; case MSG_TYPE_PREINIT: msg_processed = 1; ret_val = qdevice_net_msg_received_preinit(instance, &msg); break; case MSG_TYPE_PREINIT_REPLY: msg_processed = 1; ret_val = qdevice_net_msg_received_preinit_reply(instance, &msg); break; case MSG_TYPE_STARTTLS: msg_processed = 1; ret_val = qdevice_net_msg_received_starttls(instance, &msg); break; case MSG_TYPE_SERVER_ERROR: msg_processed = 1; ret_val = qdevice_net_msg_received_server_error(instance, &msg); break; case MSG_TYPE_INIT_REPLY: msg_processed = 1; ret_val = qdevice_net_msg_received_init_reply(instance, &msg); break; case MSG_TYPE_SET_OPTION: msg_processed = 1; ret_val = qdevice_net_msg_received_set_option(instance, &msg); break; case MSG_TYPE_SET_OPTION_REPLY: msg_processed = 1; ret_val = qdevice_net_msg_received_set_option_reply(instance, &msg); break; case MSG_TYPE_ECHO_REQUEST: msg_processed = 1; ret_val = qdevice_net_msg_received_echo_request(instance, &msg); break; case MSG_TYPE_ECHO_REPLY: msg_processed = 1; ret_val = qdevice_net_msg_received_echo_reply(instance, &msg); break; case MSG_TYPE_NODE_LIST: msg_processed = 1; ret_val = qdevice_net_msg_received_node_list(instance, &msg); break; case MSG_TYPE_NODE_LIST_REPLY: msg_processed = 1; ret_val = qdevice_net_msg_received_node_list_reply(instance, &msg); break; case MSG_TYPE_ASK_FOR_VOTE: msg_processed = 1; ret_val = qdevice_net_msg_received_ask_for_vote(instance, &msg); break; case MSG_TYPE_ASK_FOR_VOTE_REPLY: msg_processed = 1; ret_val = qdevice_net_msg_received_ask_for_vote_reply(instance, &msg); break; case MSG_TYPE_VOTE_INFO: msg_processed = 1; ret_val = qdevice_net_msg_received_vote_info(instance, &msg); break; case MSG_TYPE_VOTE_INFO_REPLY: msg_processed = 1; ret_val = qdevice_net_msg_received_vote_info_reply(instance, &msg); break; /* * Default is not defined intentionally. Compiler shows warning when msg type is added */ } if (!msg_processed) { qdevice_log(LOG_ERR, "Received unsupported message %u. " "Disconnecting from server", msg.type); instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG; ret_val = -1; } msg_decoded_destroy(&msg); return (ret_val); } diff --git a/qdevices/qnetd-algo-ffsplit.c b/qdevices/qnetd-algo-ffsplit.c index 01e5f0c7..356bdbf8 100644 --- a/qdevices/qnetd-algo-ffsplit.c +++ b/qdevices/qnetd-algo-ffsplit.c @@ -1,813 +1,813 @@ /* * Copyright (c) 2015-2016 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "qnetd-algo-ffsplit.h" #include "qnetd-log.h" #include "qnetd-log-debug.h" #include "qnetd-cluster-list.h" #include "qnetd-cluster.h" #include "qnetd-client-send.h" enum qnetd_algo_ffsplit_cluster_state { QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE, QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP, QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS, QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS, }; struct qnetd_algo_ffsplit_cluster_data { enum qnetd_algo_ffsplit_cluster_state cluster_state; const struct node_list *quorate_partition_node_list; }; enum qnetd_algo_ffsplit_client_state { QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE, QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK, QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK, }; struct qnetd_algo_ffsplit_client_data { enum qnetd_algo_ffsplit_client_state client_state; uint32_t vote_info_expected_seq_num; }; enum tlv_reply_error_code qnetd_algo_ffsplit_client_init(struct qnetd_client *client) { struct qnetd_algo_ffsplit_cluster_data *cluster_data; struct qnetd_algo_ffsplit_client_data *client_data; if (qnetd_cluster_size(client->cluster) == 1) { cluster_data = malloc(sizeof(*cluster_data)); if (cluster_data == NULL) { qnetd_log(LOG_ERR, "ffsplit: Can't initialize cluster data for client %s", client->addr_str); return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR); } memset(cluster_data, 0, sizeof(*cluster_data)); cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE; cluster_data->quorate_partition_node_list = NULL; client->cluster->algorithm_data = cluster_data; } client_data = malloc(sizeof(*client_data)); if (client_data == NULL) { qnetd_log(LOG_ERR, "ffsplit: Can't initialize node data for client %s", client->addr_str); return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR); } memset(client_data, 0, sizeof(*client_data)); client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE; client->algorithm_data = client_data; return (TLV_REPLY_ERROR_CODE_NO_ERROR); } static int -qnetd_algo_ffsplit_is_prefered_partition(const struct qnetd_client *client, +qnetd_algo_ffsplit_is_preferred_partition(const struct qnetd_client *client, const struct node_list *config_node_list, const struct node_list *membership_node_list) { - uint32_t prefered_node_id; + uint32_t preferred_node_id; struct node_list_entry *node_entry; int case_processed; - prefered_node_id = 0; + preferred_node_id = 0; case_processed = 0; switch (client->tie_breaker.mode) { case TLV_TIE_BREAKER_MODE_LOWEST: node_entry = TAILQ_FIRST(config_node_list); - prefered_node_id = node_entry->node_id; + preferred_node_id = node_entry->node_id; TAILQ_FOREACH(node_entry, config_node_list, entries) { - if (node_entry->node_id < prefered_node_id) { - prefered_node_id = node_entry->node_id; + if (node_entry->node_id < preferred_node_id) { + preferred_node_id = node_entry->node_id; } } case_processed = 1; break; case TLV_TIE_BREAKER_MODE_HIGHEST: node_entry = TAILQ_FIRST(config_node_list); - prefered_node_id = node_entry->node_id; + preferred_node_id = node_entry->node_id; TAILQ_FOREACH(node_entry, config_node_list, entries) { - if (node_entry->node_id > prefered_node_id) { - prefered_node_id = node_entry->node_id; + if (node_entry->node_id > preferred_node_id) { + preferred_node_id = node_entry->node_id; } } case_processed = 1; break; case TLV_TIE_BREAKER_MODE_NODE_ID: - prefered_node_id = client->tie_breaker.node_id; + preferred_node_id = client->tie_breaker.node_id; case_processed = 1; break; } if (!case_processed) { - qnetd_log(LOG_CRIT, "qnetd_algo_ffsplit_is_prefered_partition unprocessed " + qnetd_log(LOG_CRIT, "qnetd_algo_ffsplit_is_preferred_partition unprocessed " "tie_breaker.mode"); exit(1); } - return (node_list_find_node_id(membership_node_list, prefered_node_id) != NULL); + return (node_list_find_node_id(membership_node_list, preferred_node_id) != NULL); } static int qnetd_algo_ffsplit_is_membership_stable(const struct qnetd_client *client, int client_leaving, const struct tlv_ring_id *ring_id, const struct node_list *config_node_list, const struct node_list *membership_node_list) { const struct qnetd_client *iter_client1, *iter_client2; const struct node_list *config_node_list1, *config_node_list2; const struct node_list *membership_node_list1, *membership_node_list2; const struct node_list_entry *iter_node1, *iter_node2; const struct node_list_entry *iter_node3, *iter_node4; const struct tlv_ring_id *ring_id1, *ring_id2; /* * Test if all active clients share same config list. */ TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) { TAILQ_FOREACH(iter_client2, &client->cluster->client_list, cluster_entries) { if (iter_client1 == iter_client2) { continue; } if (iter_client1->node_id == client->node_id) { if (client_leaving) { continue; } config_node_list1 = config_node_list; } else { config_node_list1 = &iter_client1->configuration_node_list; } if (iter_client2->node_id == client->node_id) { if (client_leaving) { continue; } config_node_list2 = config_node_list; } else { config_node_list2 = &iter_client2->configuration_node_list; } /* * Walk thru all node ids in given config node list... */ TAILQ_FOREACH(iter_node1, config_node_list1, entries) { /* * ... and try to find given node id in other list */ iter_node2 = node_list_find_node_id(config_node_list2, iter_node1->node_id); if (iter_node2 == NULL) { /* * Node with iter_node1->node_id was not found in * config_node_list2 -> lists doesn't match */ return (0); } } } } /* * Test if same partitions share same ring ids and membership node list */ TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) { if (iter_client1->node_id == client->node_id) { if (client_leaving) { continue; } membership_node_list1 = membership_node_list; ring_id1 = ring_id; } else { membership_node_list1 = &iter_client1->last_membership_node_list; ring_id1 = &iter_client1->last_ring_id; } /* * Walk thru all memberships nodes */ TAILQ_FOREACH(iter_node1, membership_node_list1, entries) { /* * try to find client with given node id */ iter_client2 = qnetd_cluster_find_client_by_node_id(client->cluster, iter_node1->node_id); if (iter_client2 == NULL) { /* * Client with given id is not connected */ continue; } if (iter_client2->node_id == client->node_id) { if (client_leaving) { continue; } membership_node_list2 = membership_node_list; ring_id2 = ring_id; } else { membership_node_list2 = &iter_client2->last_membership_node_list; ring_id2 = &iter_client2->last_ring_id; } /* * Compare ring ids */ if (!tlv_ring_id_eq(ring_id1, ring_id2)) { return (0); } /* * Now compare that membership node list equals, so walk thru all * members ... */ TAILQ_FOREACH(iter_node3, membership_node_list1, entries) { /* * ... and try to find given node id in other membership node list */ iter_node4 = node_list_find_node_id(membership_node_list2, iter_node3->node_id); if (iter_node4 == NULL) { /* * Node with iter_node3->node_id was not found in * membership_node_list2 -> lists doesn't match */ return (0); } } } } return (1); } static size_t qnetd_algo_ffsplit_no_active_clients_in_partition(const struct qnetd_client *client, const struct node_list *membership_node_list) { const struct node_list_entry *iter_node; const struct qnetd_client *iter_client; size_t res; res = 0; if (client == NULL || membership_node_list == NULL) { return (0); } TAILQ_FOREACH(iter_node, membership_node_list, entries) { iter_client = qnetd_cluster_find_client_by_node_id(client->cluster, iter_node->node_id); if (iter_client != NULL) { res++; } } return (res); } /* * Compares two partitions. Return 1 if client1, config_node_list1, membership_node_list1 is * "better" than client2, config_node_list2, membership_node_list2 */ static int qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1, const struct node_list *config_node_list1, const struct node_list *membership_node_list1, const struct qnetd_client *client2, const struct node_list *config_node_list2, const struct node_list *membership_node_list2) { size_t part1_active_clients, part2_active_clients; int res; res = -1; if (node_list_size(config_node_list1) % 2 != 0) { /* * Odd clusters never split into 50:50. */ if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) { res = 1; goto exit_res; } else { res = 0; goto exit_res; } } else { if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) { res = 1; goto exit_res; } else if (node_list_size(membership_node_list1) < node_list_size(config_node_list1) / 2) { res = 0; goto exit_res; } /* * 50:50 split */ /* * Check how many active clients are in partitions */ part1_active_clients = qnetd_algo_ffsplit_no_active_clients_in_partition( client1, membership_node_list1); part2_active_clients = qnetd_algo_ffsplit_no_active_clients_in_partition( client2, membership_node_list2); if (part1_active_clients > part2_active_clients) { res = 1; goto exit_res; } else if (part1_active_clients < part2_active_clients) { res = 0; goto exit_res; } /* * Number of active clients in both partitions equals. Use tie-breaker. */ - if (qnetd_algo_ffsplit_is_prefered_partition(client1, config_node_list1, + if (qnetd_algo_ffsplit_is_preferred_partition(client1, config_node_list1, membership_node_list1)) { res = 1; goto exit_res; } else { res = 0; goto exit_res; } } exit_res: if (res == -1) { qnetd_log(LOG_CRIT, "qnetd_algo_ffsplit_partition_cmp unhandled case"); exit(1); /* NOTREACHED */ } return (res); } /* * Select best partition for given client->cluster. * If there is no partition which could become quorate, NULL is returned */ static const struct node_list * qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int client_leaving, const struct node_list *config_node_list, const struct node_list *membership_node_list) { const struct qnetd_client *iter_client; const struct qnetd_client *best_client; const struct node_list *best_config_node_list, *best_membership_node_list; const struct node_list *iter_config_node_list, *iter_membership_node_list; best_client = NULL; best_config_node_list = best_membership_node_list = NULL; /* * Get highest score */ TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) { if (iter_client->node_id == client->node_id) { if (client_leaving) { continue; } iter_config_node_list = config_node_list; iter_membership_node_list = membership_node_list; } else { iter_config_node_list = &iter_client->configuration_node_list; iter_membership_node_list = &iter_client->last_membership_node_list; } if (qnetd_algo_ffsplit_partition_cmp(iter_client, iter_config_node_list, iter_membership_node_list, best_client, best_config_node_list, best_membership_node_list) > 0) { best_client = iter_client; best_config_node_list = iter_config_node_list; best_membership_node_list = iter_membership_node_list; } } return (best_membership_node_list); } /* * Update state of all nodes to match quorate_partition_node_list */ static void qnetd_algo_ffsplit_update_nodes_state(struct qnetd_client *client, int client_leaving, const struct node_list *quorate_partition_node_list) { const struct qnetd_client *iter_client; struct qnetd_algo_ffsplit_client_data *iter_client_data; TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) { iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data; if (iter_client->node_id == client->node_id && client_leaving) { iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE; continue; } if (quorate_partition_node_list == NULL || node_list_find_node_id(quorate_partition_node_list, iter_client->node_id) == NULL) { iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK; } else { iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK; } } } /* * Send vote info. If client_leaving is set, client is ignored. if send_acks * is set, only ACK votes are send (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state), * otherwise only NACK votes are send (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state) * * Returns number of send votes */ static size_t qnetd_algo_ffsplit_send_votes(struct qnetd_client *client, int client_leaving, const struct tlv_ring_id *ring_id, int send_acks) { size_t sent_votes; struct qnetd_client *iter_client; struct qnetd_algo_ffsplit_client_data *iter_client_data; const struct tlv_ring_id *ring_id_to_send; enum tlv_vote vote_to_send; sent_votes = 0; TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) { if (iter_client->node_id == client->node_id) { if (client_leaving) { continue; } ring_id_to_send = ring_id; } else { ring_id_to_send = &iter_client->last_ring_id; } iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data; vote_to_send = TLV_VOTE_UNDEFINED; if (send_acks) { if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) { vote_to_send = TLV_VOTE_ACK; } } else { if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) { vote_to_send = TLV_VOTE_NACK; } } if (vote_to_send != TLV_VOTE_UNDEFINED) { iter_client_data->vote_info_expected_seq_num++; sent_votes++; if (qnetd_client_send_vote_info(iter_client, iter_client_data->vote_info_expected_seq_num, ring_id_to_send, vote_to_send) == -1) { client->schedule_disconnect = 1; } } } return (sent_votes); } /* * Return number of clients in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state if sending_acks is * set or number of nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state if sending_acks is * not set */ static size_t qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client *client, int sending_acks) { size_t no_clients; struct qnetd_client *iter_client; struct qnetd_algo_ffsplit_client_data *iter_client_data; no_clients = 0; TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) { iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data; if (sending_acks && iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) { no_clients++; } if (!sending_acks && iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) { no_clients++; } } return (no_clients); } static enum tlv_vote qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving, const struct tlv_ring_id *ring_id, const struct node_list *config_node_list, const struct node_list *membership_node_list) { struct qnetd_algo_ffsplit_cluster_data *cluster_data; const struct node_list *quorate_partition_node_list; cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data; cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP; if (!qnetd_algo_ffsplit_is_membership_stable(client, client_leaving, ring_id, config_node_list, membership_node_list)) { /* * Wait until membership is stable */ qnetd_log(LOG_DEBUG, "ffsplit: Membership for cluster %s is not yet stable", client->cluster_name); return (TLV_VOTE_WAIT_FOR_REPLY); } qnetd_log(LOG_DEBUG, "ffsplit: Membership for cluster %s is now stable", client->cluster_name); quorate_partition_node_list = qnetd_algo_ffsplit_select_partition(client, client_leaving, config_node_list, membership_node_list); cluster_data->quorate_partition_node_list = quorate_partition_node_list; if (quorate_partition_node_list == NULL) { qnetd_log(LOG_DEBUG, "ffsplit: No quorate partition was selected"); } else { qnetd_log(LOG_DEBUG, "ffsplit: Quorate partition selected"); qnetd_log_debug_dump_node_list(client, quorate_partition_node_list); } qnetd_algo_ffsplit_update_nodes_state(client, client_leaving, quorate_partition_node_list); cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS; if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 0) == 0) { qnetd_log(LOG_DEBUG, "ffsplit: No client gets NACK"); /* * No one gets nack -> send acks */ cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS; if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 1) == 0) { qnetd_log(LOG_DEBUG, "ffsplit: No client gets ACK"); /* * No one gets acks -> finished */ cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE; } } return (TLV_VOTE_NO_CHANGE); } enum tlv_reply_error_code qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client, uint32_t msg_seq_num, int config_version_set, uint64_t config_version, const struct node_list *nodes, int initial, enum tlv_vote *result_vote) { if (node_list_size(nodes) == 0) { /* * Empty node list shouldn't happen */ qnetd_log(LOG_ERR, "ffsplit: Received empty config node list for client %s", client->addr_str); return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST); } if (node_list_find_node_id(nodes, client->node_id) == NULL) { /* * Current node is not in node list */ qnetd_log(LOG_ERR, "ffsplit: Received config node list without client %s", client->addr_str); return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST); } if (initial || node_list_size(&client->last_membership_node_list) == 0) { /* * Initial node list -> membership is going to be send by client */ *result_vote = TLV_VOTE_ASK_LATER; } else { *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id, nodes, &client->last_membership_node_list); } return (TLV_REPLY_ERROR_CODE_NO_ERROR); } /* * Called after client sent membership node list. * All client fields are already set. Nodes is actual node list. * msg_seq_num is 32-bit number set by client. If client sent config file version, * config_version_set is set to 1 and config_version contains valid config file version. * ring_id and quorate are copied from client votequorum callback. * * Function has to return result_vote. This can be one of ack/nack, ask_later (client * should ask later for a vote) or wait_for_reply (client should wait for reply). * * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_* * on failure (error is send back to client) */ enum tlv_reply_error_code qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client, uint32_t msg_seq_num, const struct tlv_ring_id *ring_id, const struct node_list *nodes, enum tlv_vote *result_vote) { if (node_list_size(nodes) == 0) { /* * Empty node list shouldn't happen */ qnetd_log(LOG_ERR, "ffsplit: Received empty membership node list for client %s", client->addr_str); return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST); } if (node_list_find_node_id(nodes, client->node_id) == NULL) { /* * Current node is not in node list */ qnetd_log(LOG_ERR, "ffsplit: Received membership node list without client %s", client->addr_str); return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST); } if (node_list_size(&client->configuration_node_list) == 0) { /* * Config node list not received -> it's going to be sent later */ *result_vote = TLV_VOTE_ASK_LATER; } else { *result_vote = qnetd_algo_ffsplit_do(client, 0, ring_id, &client->configuration_node_list, nodes); } return (TLV_REPLY_ERROR_CODE_NO_ERROR); } enum tlv_reply_error_code qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client, uint32_t msg_seq_num, enum tlv_quorate quorate, const struct node_list *nodes, enum tlv_vote *result_vote) { /* * Quorum node list is informative -> no change */ *result_vote = TLV_VOTE_NO_CHANGE; return (TLV_REPLY_ERROR_CODE_NO_ERROR); } void qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down) { (void)qnetd_algo_ffsplit_do(client, 1, &client->last_ring_id, &client->configuration_node_list, &client->last_membership_node_list); free(client->algorithm_data); if (qnetd_cluster_size(client->cluster) == 1) { /* * Last client in the cluster */ free(client->cluster->algorithm_data); } } enum tlv_reply_error_code qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client *client, uint32_t msg_seq_num, enum tlv_vote *result_vote) { /* * Ask for vote is not supported in current algorithm */ return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE); } enum tlv_reply_error_code qnetd_algo_ffsplit_vote_info_reply_received(struct qnetd_client *client, uint32_t msg_seq_num) { struct qnetd_algo_ffsplit_cluster_data *cluster_data; struct qnetd_algo_ffsplit_client_data *client_data; cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data; client_data = (struct qnetd_algo_ffsplit_client_data *)client->algorithm_data; if (client_data->vote_info_expected_seq_num != msg_seq_num) { qnetd_log(LOG_DEBUG, "ffsplit: Received old vote info reply from client %s", client->addr_str); return (TLV_REPLY_ERROR_CODE_NO_ERROR); } client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE; if (cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS && cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS) { return (TLV_REPLY_ERROR_CODE_NO_ERROR); } if (cluster_data->cluster_state == QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS) { if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 0) == 0) { qnetd_log(LOG_DEBUG, "ffsplit: All NACK votes sent for cluster %s", client->cluster_name); cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS; if (qnetd_algo_ffsplit_send_votes(client, 0, &client->last_ring_id, 1) == 0) { qnetd_log(LOG_DEBUG, "ffsplit: No client gets ACK"); /* * No one gets acks -> finished */ cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE; } } } else { if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 1) == 0) { qnetd_log(LOG_DEBUG, "ffsplit: All ACK votes sent for cluster %s", client->cluster_name); cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE; } } return (TLV_REPLY_ERROR_CODE_NO_ERROR); } enum tlv_reply_error_code qnetd_algo_ffsplit_timer_callback(struct qnetd_client *client, int *reschedule_timer, int *send_vote, enum tlv_vote *result_vote) { return (TLV_REPLY_ERROR_CODE_NO_ERROR); } static struct qnetd_algorithm qnetd_algo_ffsplit = { .init = qnetd_algo_ffsplit_client_init, .config_node_list_received = qnetd_algo_ffsplit_config_node_list_received, .membership_node_list_received = qnetd_algo_ffsplit_membership_node_list_received, .quorum_node_list_received = qnetd_algo_ffsplit_quorum_node_list_received, .client_disconnect = qnetd_algo_ffsplit_client_disconnect, .ask_for_vote_received = qnetd_algo_ffsplit_ask_for_vote_received, .vote_info_reply_received = qnetd_algo_ffsplit_vote_info_reply_received, .timer_callback = qnetd_algo_ffsplit_timer_callback, }; enum tlv_reply_error_code qnetd_algo_ffsplit_register() { return (qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_FFSPLIT, &qnetd_algo_ffsplit)); } diff --git a/qdevices/qnetd-log.c b/qdevices/qnetd-log.c index 1d046455..6f225aea 100644 --- a/qdevices/qnetd-log.c +++ b/qdevices/qnetd-log.c @@ -1,183 +1,183 @@ /* * Copyright (c) 2015-2016 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "qnet-config.h" #include "qnetd-log.h" static int qnetd_log_config_target = 0; static int qnetd_log_config_debug = 0; static int qnetd_log_config_priority_bump = 0; static const char qnetd_log_month_str[][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; struct qnetd_log_syslog_prio_to_str_item { int priority; const char *priority_str; }; static struct qnetd_log_syslog_prio_to_str_item qnetd_syslog_prio_to_str_array[] = { {LOG_EMERG, "emerg"}, {LOG_ALERT, "alert"}, {LOG_CRIT, "crit"}, {LOG_ERR, "error"}, {LOG_WARNING, "warning"}, {LOG_NOTICE, "notice"}, {LOG_INFO, "info"}, {LOG_DEBUG, "debug"}, {-1, NULL}}; void qnetd_log_init(int target) { qnetd_log_config_target = target; if (qnetd_log_config_target & QNETD_LOG_TARGET_SYSLOG) { openlog(QNETD_PROGRAM_NAME, LOG_PID, LOG_DAEMON); } } static const char * qnetd_log_syslog_prio_to_str(int priority) { if (priority >= LOG_EMERG && priority <= LOG_DEBUG) { return (qnetd_syslog_prio_to_str_array[priority].priority_str); } else { return ("none"); } } void qnetd_log_vprintf(int priority, const char *format, va_list ap) { time_t current_time; struct tm tm_res; int final_priority; va_list ap_copy; if (priority != LOG_DEBUG || (qnetd_log_config_debug)) { if (qnetd_log_config_target & QNETD_LOG_TARGET_STDERR) { current_time = time(NULL); localtime_r(¤t_time, &tm_res); fprintf(stderr, "%s %02d %02d:%02d:%02d ", qnetd_log_month_str[tm_res.tm_mon], tm_res.tm_mday, tm_res.tm_hour, tm_res.tm_min, tm_res.tm_sec); fprintf(stderr, "%-7s ", qnetd_log_syslog_prio_to_str(priority)); va_copy(ap_copy, ap); vfprintf(stderr, format, ap_copy); va_end(ap_copy); fprintf(stderr, "\n"); } if (qnetd_log_config_target & QNETD_LOG_TARGET_SYSLOG) { final_priority = priority; if (qnetd_log_config_priority_bump && priority > LOG_INFO) { final_priority = LOG_INFO; } va_copy(ap_copy, ap); vsyslog(final_priority, format, ap); va_end(ap_copy); } } } void qnetd_log_printf(int priority, const char *format, ...) { va_list ap; va_start(ap, format); qnetd_log_vprintf(priority, format, ap); va_end(ap); } void qnetd_log_close(void) { if (qnetd_log_config_target & QNETD_LOG_TARGET_SYSLOG) { closelog(); } } void qnetd_log_set_debug(int enabled) { qnetd_log_config_debug = enabled; } void qnetd_log_set_priority_bump(int enabled) { qnetd_log_config_priority_bump = enabled; } void qnetd_log_msg_decode_error(int ret) { switch (ret) { case -1: qnetd_log(LOG_WARNING, "Received message with option with invalid length"); break; case -2: qnetd_log(LOG_CRIT, "Can't allocate memory"); break; case -3: qnetd_log(LOG_WARNING, "Received inconsistent msg (tlv len > msg size)"); break; case -4: qnetd_log(LOG_WARNING, "Received message with option with invalid value"); break; default: - qnetd_log(LOG_ERR, "Unknown error occured when decoding message"); + qnetd_log(LOG_ERR, "Unknown error occurred when decoding message"); break; } }