diff --git a/cts/agents/cpg_test_agent.c b/cts/agents/cpg_test_agent.c index a52f4f0a..f729ad2d 100644 --- a/cts/agents/cpg_test_agent.c +++ b/cts/agents/cpg_test_agent.c @@ -1,804 +1,804 @@ /* * Copyright (c) 2010 Red Hat, Inc. * * All rights reserved. * * Author: Angus Salkeld (asalkeld@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include #include "common_test_agent.h" #include #include typedef enum { MSG_OK, MSG_NODEID_ERR, MSG_PID_ERR, MSG_SEQ_ERR, MSG_SIZE_ERR, MSG_SHA1_ERR, } msg_status_t; typedef struct { uint32_t nodeid; pid_t pid; unsigned char sha1[20]; uint32_t seq; size_t size; unsigned char payload[0]; } msg_t; #define LOG_STR_SIZE 80 typedef struct { char log[LOG_STR_SIZE]; - struct list_head list; + struct qb_list_head list; } log_entry_t; static char big_and_buf[HOW_BIG_AND_BUF]; static int32_t record_config_events_g = 0; static int32_t record_messages_g = 0; static cpg_handle_t cpg_handle = 0; static corosync_cfg_handle_t cfg_handle = 0; static int32_t cpg_fd = -1; static int32_t cfg_fd = -1; -static struct list_head config_chg_log_head; -static struct list_head msg_log_head; +static struct qb_list_head config_chg_log_head; +static struct qb_list_head msg_log_head; static pid_t my_pid; static uint32_t my_nodeid; static int32_t my_seq; static int32_t use_zcb = QB_FALSE; static int32_t my_msgs_to_send; static int32_t my_msgs_sent; static int32_t total_stored_msgs = 0; static int32_t total_msgs_revd = 0; static int32_t in_cnchg = 0; static int32_t pcmk_test = 0; PK11Context* sha1_context; static void send_some_more_messages (void * unused); static char* err_status_string (char * buf, size_t buf_len, msg_status_t status) { switch (status) { case MSG_OK: strncpy (buf, "OK", buf_len); break; case MSG_NODEID_ERR: strncpy (buf, "NODEID_ERR", buf_len); break; case MSG_PID_ERR: strncpy (buf, "PID_ERR", buf_len); break; case MSG_SEQ_ERR: strncpy (buf, "SEQ_ERR", buf_len); break; case MSG_SIZE_ERR: strncpy (buf, "SIZE_ERR", buf_len); break; case MSG_SHA1_ERR: strncpy (buf, "SHA1_ERR", buf_len); break; default: strncpy (buf, "UNKNOWN_ERR", buf_len); break; } if (buf_len > 0) { buf[buf_len - 1] = '\0'; } return buf; } static void delivery_callback ( cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { log_entry_t *log_pt; msg_t *msg_pt = (msg_t*)msg; msg_status_t status = MSG_OK; char status_buf[20]; unsigned char sha1_compare[20]; unsigned int sha1_len; if (record_messages_g == 0) { return; } if (nodeid != msg_pt->nodeid) { status = MSG_NODEID_ERR; } if (pid != msg_pt->pid) { status = MSG_PID_ERR; } if (msg_len != msg_pt->size) { status = MSG_SIZE_ERR; } PK11_DigestBegin(sha1_context); PK11_DigestOp(sha1_context, msg_pt->payload, (msg_pt->size - sizeof (msg_t))); PK11_DigestFinal(sha1_context, sha1_compare, &sha1_len, sizeof(sha1_compare)); if (memcmp (sha1_compare, msg_pt->sha1, 20) != 0) { qb_log (LOG_ERR, "msg seq:%d; incorrect hash", msg_pt->seq); status = MSG_SHA1_ERR; } log_pt = malloc (sizeof(log_entry_t)); - list_init (&log_pt->list); + qb_list_init (&log_pt->list); snprintf (log_pt->log, LOG_STR_SIZE, "%u:%d:%d:%s;", msg_pt->nodeid, msg_pt->seq, my_seq, err_status_string (status_buf, 20, status)); - list_add_tail (&log_pt->list, &msg_log_head); + qb_list_add_tail (&log_pt->list, &msg_log_head); total_stored_msgs++; total_msgs_revd++; my_seq++; if ((total_msgs_revd % 1000) == 0) { qb_log (LOG_INFO, "rx %d", total_msgs_revd); } } static void config_change_callback ( cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; log_entry_t *log_pt; /* group_name,ip,pid,join|leave */ if (record_config_events_g > 0) { qb_log (LOG_INFO, "got cpg event[recording] for group %s", groupName->value); } else { qb_log (LOG_INFO, "got cpg event[ignoring] for group %s", groupName->value); } for (i = 0; i < left_list_entries; i++) { if (record_config_events_g > 0) { log_pt = malloc (sizeof(log_entry_t)); - list_init (&log_pt->list); + qb_list_init (&log_pt->list); snprintf (log_pt->log, LOG_STR_SIZE, "%s,%u,%u,left", groupName->value, left_list[i].nodeid,left_list[i].pid); - list_add_tail(&log_pt->list, &config_chg_log_head); + qb_list_add_tail(&log_pt->list, &config_chg_log_head); qb_log (LOG_INFO, "cpg event %s", log_pt->log); } } for (i = 0; i < joined_list_entries; i++) { if (record_config_events_g > 0) { log_pt = malloc (sizeof(log_entry_t)); - list_init (&log_pt->list); + qb_list_init (&log_pt->list); snprintf (log_pt->log, LOG_STR_SIZE, "%s,%u,%u,join", groupName->value, joined_list[i].nodeid,joined_list[i].pid); - list_add_tail (&log_pt->list, &config_chg_log_head); + qb_list_add_tail (&log_pt->list, &config_chg_log_head); qb_log (LOG_INFO, "cpg event %s", log_pt->log); } } if (pcmk_test == 1) { in_cnchg = 1; send_some_more_messages (NULL); in_cnchg = 0; } } static void my_shutdown_callback (corosync_cfg_handle_t handle, corosync_cfg_shutdown_flags_t flags) { qb_log (LOG_CRIT, "flags:%d", flags); if (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST) { corosync_cfg_replyto_shutdown (cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_YES); } } static corosync_cfg_callbacks_t cfg_callbacks = { .corosync_cfg_shutdown_callback = my_shutdown_callback, }; static cpg_callbacks_t callbacks = { .cpg_deliver_fn = delivery_callback, .cpg_confchg_fn = config_change_callback, }; static void record_messages (void) { record_messages_g = 1; qb_log (LOG_INFO, "record:%d", record_messages_g); } static void record_config_events (int sock) { char response[100]; ssize_t rc; size_t send_len; record_config_events_g = 1; qb_log (LOG_INFO, "record:%d", record_config_events_g); snprintf (response, 100, "%s", OK_STR); send_len = strlen (response); rc = send (sock, response, send_len, 0); assert(rc == send_len); } static void read_config_event (int sock) { const char *empty = "None"; - struct list_head * list = config_chg_log_head.next; + struct qb_list_head * list = config_chg_log_head.next; log_entry_t *entry; ssize_t rc; size_t send_len; if (list != &config_chg_log_head) { - entry = list_entry (list, log_entry_t, list); + entry = qb_list_entry (list, log_entry_t, list); send_len = strlen (entry->log); rc = send (sock, entry->log, send_len, 0); - list_del (&entry->list); + qb_list_del (&entry->list); free (entry); } else { qb_log (LOG_DEBUG, "no events in list"); send_len = strlen (empty); rc = send (sock, empty, send_len, 0); } assert(rc == send_len); } static void read_messages (int sock, char* atmost_str) { - struct list_head * list; + struct qb_list_head * list; log_entry_t *entry; int atmost = atoi (atmost_str); int packed = 0; ssize_t rc; if (atmost == 0) atmost = 1; if (atmost > (HOW_BIG_AND_BUF / LOG_STR_SIZE)) atmost = (HOW_BIG_AND_BUF / LOG_STR_SIZE); big_and_buf[0] = '\0'; for (list = msg_log_head.next; - (!list_empty (&msg_log_head) && packed < atmost); ) { + (!qb_list_empty (&msg_log_head) && packed < atmost); ) { - entry = list_entry (list, log_entry_t, list); + entry = qb_list_entry (list, log_entry_t, list); strcat (big_and_buf, entry->log); packed++; list = list->next; - list_del (&entry->list); + qb_list_del (&entry->list); free (entry); total_stored_msgs--; } if (packed == 0) { strcpy (big_and_buf, "None"); } else { if ((total_stored_msgs % 1000) == 0) { qb_log(LOG_INFO, "sending %d; total_stored_msgs:%d; len:%d", packed, total_stored_msgs, (int)strlen (big_and_buf)); } } rc = send (sock, big_and_buf, strlen (big_and_buf), 0); assert(rc == strlen (big_and_buf)); } static qb_loop_timer_handle more_messages_timer_handle; static void send_some_more_messages_later (void) { cpg_dispatch (cpg_handle, CS_DISPATCH_ALL); qb_loop_timer_add ( ta_poll_handle_get(), QB_LOOP_MED, 300*QB_TIME_NS_IN_MSEC, NULL, send_some_more_messages, &more_messages_timer_handle); } static void send_some_more_messages_zcb (void) { msg_t *my_msg; int i; int send_now; size_t payload_size; size_t total_size; unsigned int sha1_len; cs_error_t res; cpg_flow_control_state_t fc_state; void *zcb_buffer; if (cpg_fd < 0) return; send_now = my_msgs_to_send; payload_size = (rand() % 100000); total_size = payload_size + sizeof (msg_t); cpg_zcb_alloc (cpg_handle, total_size, &zcb_buffer); my_msg = (msg_t*)zcb_buffer; qb_log(LOG_DEBUG, "send_now:%d", send_now); my_msg->pid = my_pid; my_msg->nodeid = my_nodeid; my_msg->size = sizeof (msg_t) + payload_size; my_msg->seq = my_msgs_sent; for (i = 0; i < payload_size; i++) { my_msg->payload[i] = i; } PK11_DigestBegin(sha1_context); PK11_DigestOp(sha1_context, my_msg->payload, payload_size); PK11_DigestFinal(sha1_context, my_msg->sha1, &sha1_len, sizeof(my_msg->sha1)); for (i = 0; i < send_now; i++) { res = cpg_flow_control_state_get (cpg_handle, &fc_state); if (res == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { /* lets do this later */ send_some_more_messages_later (); qb_log (LOG_INFO, "flow control enabled."); goto free_buffer; } res = cpg_zcb_mcast_joined (cpg_handle, CPG_TYPE_AGREED, zcb_buffer, total_size); if (res == CS_ERR_TRY_AGAIN) { /* lets do this later */ send_some_more_messages_later (); goto free_buffer; } else if (res != CS_OK) { qb_log (LOG_ERR, "cpg_mcast_joined error:%d, exiting.", res); exit (-2); } my_msgs_sent++; my_msgs_to_send--; } free_buffer: cpg_zcb_free (cpg_handle, zcb_buffer); } #define cs_repeat(counter, max, code) do { \ code; \ if (res == CS_ERR_TRY_AGAIN) { \ counter++; \ sleep(counter); \ } \ } while (res == CS_ERR_TRY_AGAIN && counter < max) static unsigned char buffer[200000]; static void send_some_more_messages_normal (void) { msg_t my_msg; struct iovec iov[2]; int i; int send_now; size_t payload_size; cs_error_t res; cpg_flow_control_state_t fc_state; int retries = 0; time_t before; unsigned int sha1_len; if (cpg_fd < 0) return; send_now = my_msgs_to_send; qb_log (LOG_TRACE, "send_now:%d", send_now); my_msg.pid = my_pid; my_msg.nodeid = my_nodeid; payload_size = (rand() % 10000); my_msg.size = sizeof (msg_t) + payload_size; my_msg.seq = my_msgs_sent; for (i = 0; i < payload_size; i++) { buffer[i] = i; } PK11_DigestBegin(sha1_context); PK11_DigestOp(sha1_context, buffer, payload_size); PK11_DigestFinal(sha1_context, my_msg.sha1, &sha1_len, sizeof(my_msg.sha1)); iov[0].iov_len = sizeof (msg_t); iov[0].iov_base = &my_msg; iov[1].iov_len = payload_size; iov[1].iov_base = buffer; for (i = 0; i < send_now; i++) { if (in_cnchg && pcmk_test) { retries = 0; before = time(NULL); cs_repeat(retries, 30, res = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 2)); if (retries > 20) { qb_log (LOG_ERR, "cs_repeat: blocked for :%lu secs.", (unsigned long)(time(NULL) - before)); } if (res != CS_OK) { qb_log (LOG_ERR, "cpg_mcast_joined error:%d.", res); return; } } else { res = cpg_flow_control_state_get (cpg_handle, &fc_state); if (res == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { /* lets do this later */ send_some_more_messages_later (); qb_log (LOG_INFO, "flow control enabled."); return; } res = cpg_mcast_joined (cpg_handle, CPG_TYPE_AGREED, iov, 2); if (res == CS_ERR_TRY_AGAIN) { /* lets do this later */ send_some_more_messages_later (); if (i > 0) { qb_log (LOG_INFO, "TRY_AGAIN %d to send.", my_msgs_to_send); } return; } else if (res != CS_OK) { qb_log (LOG_ERR, "cpg_mcast_joined error:%d, exiting.", res); exit (-2); } } my_msgs_sent++; my_msg.seq = my_msgs_sent; my_msgs_to_send--; } qb_log (LOG_TRACE, "sent %d; to send %d.", my_msgs_sent, my_msgs_to_send); } static void send_some_more_messages (void * unused) { if (use_zcb) { send_some_more_messages_zcb (); } else { send_some_more_messages_normal (); } } static void msg_blaster (int sock, char* num_to_send_str) { my_msgs_to_send = atoi (num_to_send_str); my_msgs_sent = 0; my_seq = 1; my_pid = getpid(); use_zcb = QB_FALSE; total_stored_msgs = 0; cpg_local_get (cpg_handle, &my_nodeid); /* control the limits */ if (my_msgs_to_send <= 0) my_msgs_to_send = 1; if (my_msgs_to_send > 10000) my_msgs_to_send = 10000; send_some_more_messages_normal (); } static void context_test (int sock) { char response[100]; char *cmp; ssize_t rc; size_t send_len; cpg_context_set (cpg_handle, response); cpg_context_get (cpg_handle, (void**)&cmp); if (response != cmp) { snprintf (response, 100, "%s", FAIL_STR); } else { snprintf (response, 100, "%s", OK_STR); } send_len = strlen (response); rc = send (sock, response, send_len, 0); assert(rc == send_len); } static void msg_blaster_zcb (int sock, char* num_to_send_str) { my_msgs_to_send = atoi (num_to_send_str); my_seq = 1; my_pid = getpid(); use_zcb = QB_TRUE; total_stored_msgs = 0; cpg_local_get (cpg_handle, &my_nodeid); /* control the limits */ if (my_msgs_to_send <= 0) my_msgs_to_send = 1; if (my_msgs_to_send > 10000) my_msgs_to_send = 10000; send_some_more_messages_zcb (); } static int cfg_dispatch_wrapper_fn ( int fd, int revents, void *data) { cs_error_t error; if (revents & POLLHUP || revents & POLLERR) { qb_log (LOG_ERR, "got POLLHUP disconnecting from CFG"); corosync_cfg_finalize(cfg_handle); cfg_handle = 0; return -1; } error = corosync_cfg_dispatch (cfg_handle, CS_DISPATCH_ALL); if (error == CS_ERR_LIBRARY) { qb_log (LOG_ERR, "got LIB error disconnecting from CFG."); corosync_cfg_finalize(cfg_handle); cfg_handle = 0; return -1; } return 0; } static int cpg_dispatch_wrapper_fn ( int fd, int revents, void *data) { cs_error_t error; if (revents & POLLHUP || revents & POLLERR) { qb_log (LOG_ERR, "got POLLHUP disconnecting from CPG"); cpg_finalize(cpg_handle); cpg_handle = 0; return -1; } error = cpg_dispatch (cpg_handle, CS_DISPATCH_ALL); if (error == CS_ERR_LIBRARY) { qb_log (LOG_ERR, "got LIB error disconnecting from CPG"); cpg_finalize(cpg_handle); cpg_handle = 0; return -1; } return 0; } static void do_command (int sock, char* func, char*args[], int num_args) { int result; char response[100]; struct cpg_name group_name; ssize_t rc; size_t send_len; qb_log (LOG_TRACE, "RPC:%s() called.", func); if (strcmp ("cpg_mcast_joined",func) == 0) { struct iovec iov[5]; int a; for (a = 0; a < num_args; a++) { iov[a].iov_base = args[a]; iov[a].iov_len = strlen(args[a])+1; } cpg_mcast_joined (cpg_handle, CPG_TYPE_AGREED, iov, num_args); } else if (strcmp ("cpg_join",func) == 0) { if (strlen(args[0]) >= CPG_MAX_NAME_LENGTH) { qb_log (LOG_ERR, "Invalid group name"); exit (1); } strcpy (group_name.value, args[0]); group_name.length = strlen(args[0]); result = cpg_join (cpg_handle, &group_name); if (result != CS_OK) { qb_log (LOG_ERR, "Could not join process group, error %d", result); exit (1); } qb_log (LOG_INFO, "called cpg_join(%s)!", group_name.value); } else if (strcmp ("cpg_leave",func) == 0) { strcpy (group_name.value, args[0]); group_name.length = strlen(args[0]); result = cpg_leave (cpg_handle, &group_name); if (result != CS_OK) { qb_log (LOG_ERR, "Could not leave process group, error %d", result); exit (1); } qb_log (LOG_INFO, "called cpg_leave(%s)!", group_name.value); } else if (strcmp ("cpg_initialize",func) == 0) { int retry_count = 0; result = cpg_initialize (&cpg_handle, &callbacks); while (result != CS_OK) { qb_log (LOG_ERR, "cpg_initialize error %d (attempt %d)", result, retry_count); if (retry_count >= 3) { exit (1); } sleep(1); retry_count++; result = cpg_initialize (&cpg_handle, &callbacks); } cpg_fd_get (cpg_handle, &cpg_fd); qb_loop_poll_add (ta_poll_handle_get(), QB_LOOP_MED, cpg_fd, POLLIN|POLLNVAL, NULL, cpg_dispatch_wrapper_fn); } else if (strcmp ("cpg_local_get", func) == 0) { unsigned int local_nodeid; cpg_local_get (cpg_handle, &local_nodeid); snprintf (response, 100, "%u",local_nodeid); send_len = strlen (response); rc = send (sock, response, send_len, 0); assert(rc == send_len); } else if (strcmp ("cpg_finalize", func) == 0) { if (cpg_handle > 0) { cpg_finalize (cpg_handle); cpg_handle = 0; } } else if (strcmp ("record_config_events", func) == 0) { record_config_events (sock); } else if (strcmp ("record_messages", func) == 0) { record_messages (); } else if (strcmp ("read_config_event", func) == 0) { read_config_event (sock); } else if (strcmp ("read_messages", func) == 0) { read_messages (sock, args[0]); } else if (strcmp ("msg_blaster_zcb", func) == 0) { msg_blaster_zcb (sock, args[0]); } else if (strcmp ("pcmk_test", func) == 0) { pcmk_test = 1; } else if (strcmp ("msg_blaster", func) == 0) { msg_blaster (sock, args[0]); } else if (strcmp ("context_test", func) == 0) { context_test (sock); } else if (strcmp ("are_you_ok_dude", func) == 0) { snprintf (response, 100, "%s", OK_STR); send_len = strlen (response); rc = send (sock, response, strlen (response), 0); assert(rc == send_len); } else if (strcmp ("cfg_shutdown", func) == 0) { qb_log (LOG_INFO, "calling %s() called!", func); result = corosync_cfg_try_shutdown (cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST); qb_log (LOG_INFO,"%s() returned %d!", func, result); } else if (strcmp ("cfg_initialize",func) == 0) { int retry_count = 0; qb_log (LOG_INFO,"%s() called!", func); result = corosync_cfg_initialize (&cfg_handle, &cfg_callbacks); while (result != CS_OK) { qb_log (LOG_ERR, "cfg_initialize error %d (attempt %d)", result, retry_count); if (retry_count >= 3) { exit (1); } sleep(1); retry_count++; result = corosync_cfg_initialize (&cfg_handle, &cfg_callbacks); } qb_log (LOG_INFO,"corosync_cfg_initialize() == %d", result); result = corosync_cfg_fd_get (cfg_handle, &cfg_fd); qb_log (LOG_INFO,"corosync_cfg_fd_get() == %d", result); qb_loop_poll_add (ta_poll_handle_get(), QB_LOOP_MED, cfg_fd, POLLIN|POLLNVAL, NULL, cfg_dispatch_wrapper_fn); } else { qb_log(LOG_ERR, "RPC:%s not supported!", func); } } static void my_pre_exit(void) { qb_log (LOG_INFO, "%s PRE EXIT", __FILE__); if (cpg_handle > 0) { cpg_finalize (cpg_handle); cpg_handle = 0; } if (cfg_handle > 0) { corosync_cfg_finalize (cfg_handle); cfg_handle = 0; } PK11_DestroyContext(sha1_context, PR_TRUE); } int main(int argc, char *argv[]) { - list_init (&msg_log_head); - list_init (&config_chg_log_head); + qb_list_init (&msg_log_head); + qb_list_init (&config_chg_log_head); if (NSS_NoDB_Init(".") != SECSuccess) { qb_log(LOG_ERR, "Couldn't initialize nss"); exit (0); } if ((sha1_context = PK11_CreateDigestContext(SEC_OID_SHA1)) == NULL) { qb_log(LOG_ERR, "Couldn't initialize nss"); exit (0); } return test_agent_run ("cpg_test_agent", 9034, do_command, my_pre_exit); } diff --git a/exec/cfg.c b/exec/cfg.c index a5482ad2..05c9a3a5 100644 --- a/exec/cfg.c +++ b/exec/cfg.c @@ -1,1083 +1,1083 @@ /* * Copyright (c) 2005-2006 MontaVista Software, Inc. * Copyright (c) 2006-2013 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include #include #include #include "service.h" #include "main.h" LOGSYS_DECLARE_SUBSYS ("CFG"); enum cfg_message_req_types { MESSAGE_REQ_EXEC_CFG_RINGREENABLE = 0, MESSAGE_REQ_EXEC_CFG_KILLNODE = 1, MESSAGE_REQ_EXEC_CFG_SHUTDOWN = 2, MESSAGE_REQ_EXEC_CFG_RELOAD_CONFIG = 3 }; #define DEFAULT_SHUTDOWN_TIMEOUT 5 -static struct list_head trackers_list; +static struct qb_list_head trackers_list; /* * Variables controlling a requested shutdown */ static corosync_timer_handle_t shutdown_timer; static struct cfg_info *shutdown_con; static uint32_t shutdown_flags; static int shutdown_yes; static int shutdown_no; static int shutdown_expected; struct cfg_info { - struct list_head list; + struct qb_list_head list; void *conn; void *tracker_conn; enum {SHUTDOWN_REPLY_UNKNOWN, SHUTDOWN_REPLY_YES, SHUTDOWN_REPLY_NO} shutdown_reply; }; static void cfg_confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); static char *cfg_exec_init_fn (struct corosync_api_v1 *corosync_api_v1); static struct corosync_api_v1 *api; static int cfg_lib_init_fn (void *conn); static int cfg_lib_exit_fn (void *conn); static void message_handler_req_exec_cfg_ringreenable ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cfg_killnode ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cfg_shutdown ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cfg_reload_config ( const void *message, unsigned int nodeid); static void exec_cfg_killnode_endian_convert (void *msg); static void message_handler_req_lib_cfg_ringstatusget ( void *conn, const void *msg); static void message_handler_req_lib_cfg_ringreenable ( void *conn, const void *msg); static void message_handler_req_lib_cfg_killnode ( void *conn, const void *msg); static void message_handler_req_lib_cfg_tryshutdown ( void *conn, const void *msg); static void message_handler_req_lib_cfg_replytoshutdown ( void *conn, const void *msg); static void message_handler_req_lib_cfg_get_node_addrs ( void *conn, const void *msg); static void message_handler_req_lib_cfg_local_get ( void *conn, const void *msg); static void message_handler_req_lib_cfg_reload_config ( void *conn, const void *msg); /* * Service Handler Definition */ static struct corosync_lib_handler cfg_lib_engine[] = { { /* 0 */ .lib_handler_fn = message_handler_req_lib_cfg_ringstatusget, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 1 */ .lib_handler_fn = message_handler_req_lib_cfg_ringreenable, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 2 */ .lib_handler_fn = message_handler_req_lib_cfg_killnode, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 3 */ .lib_handler_fn = message_handler_req_lib_cfg_tryshutdown, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 4 */ .lib_handler_fn = message_handler_req_lib_cfg_replytoshutdown, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 5 */ .lib_handler_fn = message_handler_req_lib_cfg_get_node_addrs, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 */ .lib_handler_fn = message_handler_req_lib_cfg_local_get, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 */ .lib_handler_fn = message_handler_req_lib_cfg_reload_config, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED } }; static struct corosync_exec_handler cfg_exec_engine[] = { { /* 0 */ .exec_handler_fn = message_handler_req_exec_cfg_ringreenable, }, { /* 1 */ .exec_handler_fn = message_handler_req_exec_cfg_killnode, .exec_endian_convert_fn = exec_cfg_killnode_endian_convert }, { /* 2 */ .exec_handler_fn = message_handler_req_exec_cfg_shutdown, }, { /* 3 */ .exec_handler_fn = message_handler_req_exec_cfg_reload_config, } }; /* * Exports the interface for the service */ struct corosync_service_engine cfg_service_engine = { .name = "corosync configuration service", .id = CFG_SERVICE, .priority = 1, .private_data_size = sizeof(struct cfg_info), .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = cfg_lib_init_fn, .lib_exit_fn = cfg_lib_exit_fn, .lib_engine = cfg_lib_engine, .lib_engine_count = sizeof (cfg_lib_engine) / sizeof (struct corosync_lib_handler), .exec_init_fn = cfg_exec_init_fn, .exec_engine = cfg_exec_engine, .exec_engine_count = sizeof (cfg_exec_engine) / sizeof (struct corosync_exec_handler), .confchg_fn = cfg_confchg_fn }; struct corosync_service_engine *cfg_get_service_engine_ver0 (void) { return (&cfg_service_engine); } struct req_exec_cfg_ringreenable { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_message_source_t source __attribute__((aligned(8))); }; struct req_exec_cfg_reload_config { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_message_source_t source __attribute__((aligned(8))); }; struct req_exec_cfg_killnode { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_uint32_t nodeid __attribute__((aligned(8))); mar_name_t reason __attribute__((aligned(8))); }; struct req_exec_cfg_shutdown { struct qb_ipc_request_header header __attribute__((aligned(8))); }; /* IMPL */ static char *cfg_exec_init_fn ( struct corosync_api_v1 *corosync_api_v1) { api = corosync_api_v1; - list_init(&trackers_list); + qb_list_init(&trackers_list); return (NULL); } static void cfg_confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { } /* * Tell other nodes we are shutting down */ static int send_shutdown(void) { struct req_exec_cfg_shutdown req_exec_cfg_shutdown; struct iovec iovec; ENTER(); req_exec_cfg_shutdown.header.size = sizeof (struct req_exec_cfg_shutdown); req_exec_cfg_shutdown.header.id = SERVICE_ID_MAKE (CFG_SERVICE, MESSAGE_REQ_EXEC_CFG_SHUTDOWN); iovec.iov_base = (char *)&req_exec_cfg_shutdown; iovec.iov_len = sizeof (struct req_exec_cfg_shutdown); assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0); LEAVE(); return 0; } static void send_test_shutdown(void *only_conn, void *exclude_conn, int status) { struct res_lib_cfg_testshutdown res_lib_cfg_testshutdown; - struct list_head *iter; + struct qb_list_head *iter; ENTER(); res_lib_cfg_testshutdown.header.size = sizeof(struct res_lib_cfg_testshutdown); res_lib_cfg_testshutdown.header.id = MESSAGE_RES_CFG_TESTSHUTDOWN; res_lib_cfg_testshutdown.header.error = status; res_lib_cfg_testshutdown.flags = shutdown_flags; if (only_conn) { TRACE1("sending testshutdown to only %p", only_conn); api->ipc_dispatch_send(only_conn, &res_lib_cfg_testshutdown, sizeof(res_lib_cfg_testshutdown)); } else { - for (iter = trackers_list.next; iter != &trackers_list; iter = iter->next) { - struct cfg_info *ci = list_entry(iter, struct cfg_info, list); + qb_list_for_each(iter, &trackers_list) { + struct cfg_info *ci = qb_list_entry(iter, struct cfg_info, list); if (ci->conn != exclude_conn) { TRACE1("sending testshutdown to %p", ci->tracker_conn); api->ipc_dispatch_send(ci->tracker_conn, &res_lib_cfg_testshutdown, sizeof(res_lib_cfg_testshutdown)); } } } LEAVE(); } static void check_shutdown_status(void) { ENTER(); /* * Shutdown client might have gone away */ if (!shutdown_con) { LEAVE(); return; } /* * All replies safely gathered in ? */ if (shutdown_yes + shutdown_no >= shutdown_expected) { struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; api->timer_delete(shutdown_timer); if (shutdown_yes >= shutdown_expected || shutdown_flags == CFG_SHUTDOWN_FLAG_REGARDLESS) { TRACE1("shutdown confirmed"); res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; res_lib_cfg_tryshutdown.header.error = CS_OK; /* * Tell originator that shutdown was confirmed */ api->ipc_response_send(shutdown_con->conn, &res_lib_cfg_tryshutdown, sizeof(res_lib_cfg_tryshutdown)); shutdown_con = NULL; /* * Tell other nodes we are going down */ send_shutdown(); } else { TRACE1("shutdown cancelled"); res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; res_lib_cfg_tryshutdown.header.error = CS_ERR_BUSY; /* * Tell originator that shutdown was cancelled */ api->ipc_response_send(shutdown_con->conn, &res_lib_cfg_tryshutdown, sizeof(res_lib_cfg_tryshutdown)); shutdown_con = NULL; } log_printf(LOGSYS_LEVEL_DEBUG, "shutdown decision is: (yes count: %d, no count: %d) flags=%x", shutdown_yes, shutdown_no, shutdown_flags); } LEAVE(); } /* * Not all nodes responded to the shutdown (in time) */ static void shutdown_timer_fn(void *arg) { ENTER(); /* * Mark undecideds as "NO" */ shutdown_no = shutdown_expected; check_shutdown_status(); send_test_shutdown(NULL, NULL, CS_ERR_TIMEOUT); LEAVE(); } static void remove_ci_from_shutdown(struct cfg_info *ci) { ENTER(); /* * If the controlling shutdown process has quit, then cancel the * shutdown session */ if (ci == shutdown_con) { shutdown_con = NULL; api->timer_delete(shutdown_timer); } - if (!list_empty(&ci->list)) { - list_del(&ci->list); - list_init(&ci->list); + if (!qb_list_empty(&ci->list)) { + qb_list_del(&ci->list); + qb_list_init(&ci->list); /* * Remove our option */ if (shutdown_con) { if (ci->shutdown_reply == SHUTDOWN_REPLY_YES) shutdown_yes--; if (ci->shutdown_reply == SHUTDOWN_REPLY_NO) shutdown_no--; } /* * If we are leaving, then that's an implicit YES to shutdown */ ci->shutdown_reply = SHUTDOWN_REPLY_YES; shutdown_yes++; check_shutdown_status(); } LEAVE(); } int cfg_lib_exit_fn (void *conn) { struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); ENTER(); remove_ci_from_shutdown(ci); LEAVE(); return (0); } static int cfg_lib_init_fn (void *conn) { struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); ENTER(); - list_init(&ci->list); + qb_list_init(&ci->list); LEAVE(); return (0); } /* * Executive message handlers */ static void message_handler_req_exec_cfg_ringreenable ( const void *message, unsigned int nodeid) { const struct req_exec_cfg_ringreenable *req_exec_cfg_ringreenable = message; struct res_lib_cfg_ringreenable res_lib_cfg_ringreenable; ENTER(); api->totem_ring_reenable (); if (api->ipc_source_is_local(&req_exec_cfg_ringreenable->source)) { res_lib_cfg_ringreenable.header.id = MESSAGE_RES_CFG_RINGREENABLE; res_lib_cfg_ringreenable.header.size = sizeof (struct res_lib_cfg_ringreenable); res_lib_cfg_ringreenable.header.error = CS_OK; api->ipc_response_send ( req_exec_cfg_ringreenable->source.conn, &res_lib_cfg_ringreenable, sizeof (struct res_lib_cfg_ringreenable)); api->ipc_refcnt_dec(req_exec_cfg_ringreenable->source.conn); } LEAVE(); } static void exec_cfg_killnode_endian_convert (void *msg) { struct req_exec_cfg_killnode *req_exec_cfg_killnode = (struct req_exec_cfg_killnode *)msg; ENTER(); swab_mar_name_t(&req_exec_cfg_killnode->reason); LEAVE(); } static void message_handler_req_exec_cfg_killnode ( const void *message, unsigned int nodeid) { const struct req_exec_cfg_killnode *req_exec_cfg_killnode = message; cs_name_t reason; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "request to kill node %d(us=%d)", req_exec_cfg_killnode->nodeid, api->totem_nodeid_get()); if (req_exec_cfg_killnode->nodeid == api->totem_nodeid_get()) { marshall_from_mar_name_t(&reason, &req_exec_cfg_killnode->reason); log_printf(LOGSYS_LEVEL_NOTICE, "Killed by node %d: %s", nodeid, reason.value); corosync_fatal_error(COROSYNC_FATAL_ERROR_EXIT); } LEAVE(); } /* * Self shutdown */ static void message_handler_req_exec_cfg_shutdown ( const void *message, unsigned int nodeid) { ENTER(); log_printf(LOGSYS_LEVEL_NOTICE, "Node %d was shut down by sysadmin", nodeid); if (nodeid == api->totem_nodeid_get()) { api->shutdown_request(); } LEAVE(); } /* strcmp replacement that can handle NULLs */ static int nullcheck_strcmp(const char* left, const char *right) { if (!left && right) return -1; if (left && !right) return 1; if (!left && !right) return 0; return strcmp(left, right); } /* * If a key has changed value in the new file, then warn the user and remove it from the temp_map */ static void delete_and_notify_if_changed(icmap_map_t temp_map, const char *key_name) { if (!(icmap_key_value_eq(temp_map, key_name, icmap_get_global_map(), key_name))) { if (icmap_delete_r(temp_map, key_name) == CS_OK) { log_printf(LOGSYS_LEVEL_NOTICE, "Modified entry '%s' in corosync.conf cannot be changed at run-time", key_name); } } } /* * Remove any keys from the new config file that in the new corosync.conf but that * cannot be changed at run time. A log message will be issued for each * entry that the user wants to change but they cannot. * * Add more here as needed. */ static void remove_ro_entries(icmap_map_t temp_map) { delete_and_notify_if_changed(temp_map, "totem.secauth"); delete_and_notify_if_changed(temp_map, "totem.crypto_hash"); delete_and_notify_if_changed(temp_map, "totem.crypto_cipher"); delete_and_notify_if_changed(temp_map, "totem.version"); delete_and_notify_if_changed(temp_map, "totem.threads"); delete_and_notify_if_changed(temp_map, "totem.ip_version"); delete_and_notify_if_changed(temp_map, "totem.rrp_mode"); delete_and_notify_if_changed(temp_map, "totem.netmtu"); delete_and_notify_if_changed(temp_map, "totem.interface.ringnumber"); delete_and_notify_if_changed(temp_map, "totem.interface.bindnetaddr"); delete_and_notify_if_changed(temp_map, "totem.interface.mcastaddr"); delete_and_notify_if_changed(temp_map, "totem.interface.broadcast"); delete_and_notify_if_changed(temp_map, "totem.interface.mcastport"); delete_and_notify_if_changed(temp_map, "totem.interface.ttl"); delete_and_notify_if_changed(temp_map, "totem.vsftype"); delete_and_notify_if_changed(temp_map, "totem.transport"); delete_and_notify_if_changed(temp_map, "totem.cluster_name"); delete_and_notify_if_changed(temp_map, "quorum.provider"); delete_and_notify_if_changed(temp_map, "qb.ipc_type"); } /* * Remove entries that exist in the global map, but not in the temp_map, this will * cause delete notifications to be sent to any listeners. * * NOTE: This routine depends entirely on the keys returned by the iterators * being in alpha-sorted order. */ static void remove_deleted_entries(icmap_map_t temp_map, const char *prefix) { icmap_iter_t old_iter; icmap_iter_t new_iter; const char *old_key, *new_key; int ret; old_iter = icmap_iter_init(prefix); new_iter = icmap_iter_init_r(temp_map, prefix); old_key = icmap_iter_next(old_iter, NULL, NULL); new_key = icmap_iter_next(new_iter, NULL, NULL); while (old_key || new_key) { ret = nullcheck_strcmp(old_key, new_key); if ((ret < 0 && old_key) || !new_key) { /* * new_key is greater, a line (or more) has been deleted * Continue until old is >= new */ do { /* Remove it from icmap & send notifications */ icmap_delete(old_key); old_key = icmap_iter_next(old_iter, NULL, NULL); ret = nullcheck_strcmp(old_key, new_key); } while (ret < 0 && old_key); } else if ((ret > 0 && new_key) || !old_key) { /* * old_key is greater, a line (or more) has been added * Continue until new is >= old * * we don't need to do anything special with this like tell * icmap. That will happen when we copy the values over */ do { new_key = icmap_iter_next(new_iter, NULL, NULL); ret = nullcheck_strcmp(old_key, new_key); } while (ret > 0 && new_key); } if (ret == 0) { new_key = icmap_iter_next(new_iter, NULL, NULL); old_key = icmap_iter_next(old_iter, NULL, NULL); } } icmap_iter_finalize(new_iter); icmap_iter_finalize(old_iter); } /* * Reload configuration file */ static void message_handler_req_exec_cfg_reload_config ( const void *message, unsigned int nodeid) { const struct req_exec_cfg_reload_config *req_exec_cfg_reload_config = message; struct res_lib_cfg_reload_config res_lib_cfg_reload_config; icmap_map_t temp_map; const char *error_string; int res = CS_OK; ENTER(); log_printf(LOGSYS_LEVEL_NOTICE, "Config reload requested by node %d", nodeid); /* * Set up a new hashtable as a staging area. */ if ((res = icmap_init_r(&temp_map)) != CS_OK) { log_printf(LOGSYS_LEVEL_ERROR, "Unable to create temporary icmap. config file reload cancelled\n"); goto reload_fini; } /* * Load new config into the temporary map */ res = coroparse_configparse(temp_map, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Unable to reload config file: %s", error_string); res = CS_ERR_LIBRARY; goto reload_return; } /* Tell interested listeners that we have started a reload */ icmap_set_uint8("config.reload_in_progress", 1); /* Detect deleted entries and remove them from the main icmap hashtable */ remove_deleted_entries(temp_map, "logging."); remove_deleted_entries(temp_map, "totem."); remove_deleted_entries(temp_map, "nodelist."); remove_deleted_entries(temp_map, "quorum."); remove_deleted_entries(temp_map, "uidgid.config."); /* Remove entries that cannot be changed */ remove_ro_entries(temp_map); /* * Copy new keys into live config. * If this fails we will have a partially loaded config because some keys (above) might * have been reset to defaults - I'm not sure what to do here, we might have to quit. */ if ( (res = icmap_copy_map(icmap_get_global_map(), temp_map)) != CS_OK) { log_printf (LOGSYS_LEVEL_ERROR, "Error making new config live. cmap database may be inconsistent\n"); } /* All done - let clients know */ icmap_set_uint8("config.reload_in_progress", 0); reload_fini: /* Finished with the temporary storage */ icmap_fini_r(temp_map); reload_return: /* All done, return result to the caller if it was on this system */ if (nodeid == api->totem_nodeid_get()) { res_lib_cfg_reload_config.header.size = sizeof(res_lib_cfg_reload_config); res_lib_cfg_reload_config.header.id = MESSAGE_RES_CFG_RELOAD_CONFIG; res_lib_cfg_reload_config.header.error = res; api->ipc_response_send(req_exec_cfg_reload_config->source.conn, &res_lib_cfg_reload_config, sizeof(res_lib_cfg_reload_config)); api->ipc_refcnt_dec(req_exec_cfg_reload_config->source.conn);; } LEAVE(); } /* * Library Interface Implementation */ static void message_handler_req_lib_cfg_ringstatusget ( void *conn, const void *msg) { struct res_lib_cfg_ringstatusget res_lib_cfg_ringstatusget; struct totem_ip_address interfaces[INTERFACE_MAX]; unsigned int iface_count; char **status; const char *totem_ip_string; unsigned int i; cs_error_t res = CS_OK; ENTER(); res_lib_cfg_ringstatusget.header.id = MESSAGE_RES_CFG_RINGSTATUSGET; res_lib_cfg_ringstatusget.header.size = sizeof (struct res_lib_cfg_ringstatusget); api->totem_ifaces_get ( api->totem_nodeid_get(), interfaces, INTERFACE_MAX, &status, &iface_count); assert(iface_count <= CFG_MAX_INTERFACES); res_lib_cfg_ringstatusget.interface_count = iface_count; for (i = 0; i < iface_count; i++) { totem_ip_string = (const char *)api->totem_ip_print (&interfaces[i]); if (strlen(totem_ip_string) >= CFG_INTERFACE_NAME_MAX_LEN) { log_printf(LOGSYS_LEVEL_ERROR, "String representation of interface %u is too long", i); res = CS_ERR_NAME_TOO_LONG; goto send_response; } if (strlen(status[i]) >= CFG_INTERFACE_STATUS_MAX_LEN) { log_printf(LOGSYS_LEVEL_ERROR, "Status string for interface %u is too long", i); res = CS_ERR_NAME_TOO_LONG; goto send_response; } strcpy ((char *)&res_lib_cfg_ringstatusget.interface_status[i], status[i]); strcpy ((char *)&res_lib_cfg_ringstatusget.interface_name[i], totem_ip_string); } send_response: res_lib_cfg_ringstatusget.header.error = res; api->ipc_response_send ( conn, &res_lib_cfg_ringstatusget, sizeof (struct res_lib_cfg_ringstatusget)); LEAVE(); } static void message_handler_req_lib_cfg_ringreenable ( void *conn, const void *msg) { struct req_exec_cfg_ringreenable req_exec_cfg_ringreenable; struct iovec iovec; ENTER(); req_exec_cfg_ringreenable.header.size = sizeof (struct req_exec_cfg_ringreenable); req_exec_cfg_ringreenable.header.id = SERVICE_ID_MAKE (CFG_SERVICE, MESSAGE_REQ_EXEC_CFG_RINGREENABLE); api->ipc_source_set (&req_exec_cfg_ringreenable.source, conn); api->ipc_refcnt_inc(conn); iovec.iov_base = (char *)&req_exec_cfg_ringreenable; iovec.iov_len = sizeof (struct req_exec_cfg_ringreenable); assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0); LEAVE(); } static void message_handler_req_lib_cfg_killnode ( void *conn, const void *msg) { const struct req_lib_cfg_killnode *req_lib_cfg_killnode = msg; struct res_lib_cfg_killnode res_lib_cfg_killnode; struct req_exec_cfg_killnode req_exec_cfg_killnode; struct iovec iovec; ENTER(); req_exec_cfg_killnode.header.size = sizeof (struct req_exec_cfg_killnode); req_exec_cfg_killnode.header.id = SERVICE_ID_MAKE (CFG_SERVICE, MESSAGE_REQ_EXEC_CFG_KILLNODE); req_exec_cfg_killnode.nodeid = req_lib_cfg_killnode->nodeid; marshall_to_mar_name_t(&req_exec_cfg_killnode.reason, &req_lib_cfg_killnode->reason); iovec.iov_base = (char *)&req_exec_cfg_killnode; iovec.iov_len = sizeof (struct req_exec_cfg_killnode); (void)api->totem_mcast (&iovec, 1, TOTEM_SAFE); res_lib_cfg_killnode.header.size = sizeof(struct res_lib_cfg_killnode); res_lib_cfg_killnode.header.id = MESSAGE_RES_CFG_KILLNODE; res_lib_cfg_killnode.header.error = CS_OK; api->ipc_response_send(conn, &res_lib_cfg_killnode, sizeof(res_lib_cfg_killnode)); LEAVE(); } static void message_handler_req_lib_cfg_tryshutdown ( void *conn, const void *msg) { struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); const struct req_lib_cfg_tryshutdown *req_lib_cfg_tryshutdown = msg; - struct list_head *iter; + struct qb_list_head *iter; ENTER(); if (req_lib_cfg_tryshutdown->flags == CFG_SHUTDOWN_FLAG_IMMEDIATE) { struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; /* * Tell other nodes */ send_shutdown(); res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; res_lib_cfg_tryshutdown.header.error = CS_OK; api->ipc_response_send(conn, &res_lib_cfg_tryshutdown, sizeof(res_lib_cfg_tryshutdown)); LEAVE(); return; } /* * Shutdown in progress, return an error */ if (shutdown_con) { struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; res_lib_cfg_tryshutdown.header.error = CS_ERR_EXIST; api->ipc_response_send(conn, &res_lib_cfg_tryshutdown, sizeof(res_lib_cfg_tryshutdown)); LEAVE(); return; } ci->conn = conn; shutdown_con = (struct cfg_info *)api->ipc_private_data_get (conn); shutdown_flags = req_lib_cfg_tryshutdown->flags; shutdown_yes = 0; shutdown_no = 0; /* * Count the number of listeners */ shutdown_expected = 0; - for (iter = trackers_list.next; iter != &trackers_list; iter = iter->next) { - struct cfg_info *testci = list_entry(iter, struct cfg_info, list); + qb_list_for_each(iter, &trackers_list) { + struct cfg_info *testci = qb_list_entry(iter, struct cfg_info, list); /* * It is assumed that we will allow shutdown */ if (testci != ci) { testci->shutdown_reply = SHUTDOWN_REPLY_UNKNOWN; shutdown_expected++; } } /* * If no-one is listening for events then we can just go down now */ if (shutdown_expected == 0) { struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; res_lib_cfg_tryshutdown.header.error = CS_OK; /* * Tell originator that shutdown was confirmed */ api->ipc_response_send(conn, &res_lib_cfg_tryshutdown, sizeof(res_lib_cfg_tryshutdown)); send_shutdown(); LEAVE(); return; } else { unsigned int shutdown_timeout = DEFAULT_SHUTDOWN_TIMEOUT; /* * Look for a shutdown timeout in configuration map */ icmap_get_uint32("cfg.shutdown_timeout", &shutdown_timeout); /* * Start the timer. If we don't get a full set of replies before this goes * off we'll cancel the shutdown */ api->timer_add_duration((unsigned long long)shutdown_timeout*1000000000, NULL, shutdown_timer_fn, &shutdown_timer); /* * Tell the users we would like to shut down */ send_test_shutdown(NULL, conn, CS_OK); } /* * We don't sent a reply to the caller here. * We send it when we know if we can shut down or not */ LEAVE(); } static void message_handler_req_lib_cfg_replytoshutdown ( void *conn, const void *msg) { struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); const struct req_lib_cfg_replytoshutdown *req_lib_cfg_replytoshutdown = msg; struct res_lib_cfg_replytoshutdown res_lib_cfg_replytoshutdown; int status = CS_OK; ENTER(); if (!shutdown_con) { status = CS_ERR_ACCESS; goto exit_fn; } if (req_lib_cfg_replytoshutdown->response) { shutdown_yes++; ci->shutdown_reply = SHUTDOWN_REPLY_YES; } else { shutdown_no++; ci->shutdown_reply = SHUTDOWN_REPLY_NO; } check_shutdown_status(); exit_fn: res_lib_cfg_replytoshutdown.header.error = status; res_lib_cfg_replytoshutdown.header.id = MESSAGE_RES_CFG_REPLYTOSHUTDOWN; res_lib_cfg_replytoshutdown.header.size = sizeof(res_lib_cfg_replytoshutdown); api->ipc_response_send(conn, &res_lib_cfg_replytoshutdown, sizeof(res_lib_cfg_replytoshutdown)); LEAVE(); } static void message_handler_req_lib_cfg_get_node_addrs (void *conn, const void *msg) { struct totem_ip_address node_ifs[INTERFACE_MAX]; char buf[PIPE_BUF]; char **status; unsigned int num_interfaces = 0; int ret = CS_OK; int i; const struct req_lib_cfg_get_node_addrs *req_lib_cfg_get_node_addrs = msg; struct res_lib_cfg_get_node_addrs *res_lib_cfg_get_node_addrs = (struct res_lib_cfg_get_node_addrs *)buf; unsigned int nodeid = req_lib_cfg_get_node_addrs->nodeid; char *addr_buf; if (nodeid == 0) nodeid = api->totem_nodeid_get(); api->totem_ifaces_get(nodeid, node_ifs, INTERFACE_MAX, &status, &num_interfaces); res_lib_cfg_get_node_addrs->header.size = sizeof(struct res_lib_cfg_get_node_addrs) + (num_interfaces * TOTEMIP_ADDRLEN); res_lib_cfg_get_node_addrs->header.id = MESSAGE_RES_CFG_GET_NODE_ADDRS; res_lib_cfg_get_node_addrs->header.error = ret; res_lib_cfg_get_node_addrs->num_addrs = num_interfaces; if (num_interfaces) { res_lib_cfg_get_node_addrs->family = node_ifs[0].family; for (i = 0, addr_buf = (char *)res_lib_cfg_get_node_addrs->addrs; i < num_interfaces; i++, addr_buf += TOTEMIP_ADDRLEN) { memcpy(addr_buf, node_ifs[i].addr, TOTEMIP_ADDRLEN); } } else { res_lib_cfg_get_node_addrs->header.error = CS_ERR_NOT_EXIST; } api->ipc_response_send(conn, res_lib_cfg_get_node_addrs, res_lib_cfg_get_node_addrs->header.size); } static void message_handler_req_lib_cfg_local_get (void *conn, const void *msg) { struct res_lib_cfg_local_get res_lib_cfg_local_get; res_lib_cfg_local_get.header.size = sizeof(res_lib_cfg_local_get); res_lib_cfg_local_get.header.id = MESSAGE_RES_CFG_LOCAL_GET; res_lib_cfg_local_get.header.error = CS_OK; res_lib_cfg_local_get.local_nodeid = api->totem_nodeid_get (); api->ipc_response_send(conn, &res_lib_cfg_local_get, sizeof(res_lib_cfg_local_get)); } static void message_handler_req_lib_cfg_reload_config (void *conn, const void *msg) { struct req_exec_cfg_reload_config req_exec_cfg_reload_config; struct iovec iovec; ENTER(); req_exec_cfg_reload_config.header.size = sizeof (struct req_exec_cfg_reload_config); req_exec_cfg_reload_config.header.id = SERVICE_ID_MAKE (CFG_SERVICE, MESSAGE_REQ_EXEC_CFG_RELOAD_CONFIG); api->ipc_source_set (&req_exec_cfg_reload_config.source, conn); api->ipc_refcnt_inc(conn); iovec.iov_base = (char *)&req_exec_cfg_reload_config; iovec.iov_len = sizeof (struct req_exec_cfg_reload_config); assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0); LEAVE(); } diff --git a/exec/cmap.c b/exec/cmap.c index ba4c7987..161abc5d 100644 --- a/exec/cmap.c +++ b/exec/cmap.c @@ -1,1029 +1,1028 @@ /* * Copyright (c) 2011-2012 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include "service.h" LOGSYS_DECLARE_SUBSYS ("CMAP"); #define MAX_REQ_EXEC_CMAP_MCAST_ITEMS 32 #define ICMAP_VALUETYPE_NOT_EXIST 0 struct cmap_conn_info { struct hdb_handle_database iter_db; struct hdb_handle_database track_db; }; typedef uint64_t cmap_iter_handle_t; typedef uint64_t cmap_track_handle_t; struct cmap_track_user_data { void *conn; cmap_track_handle_t track_handle; uint64_t track_inst_handle; }; enum cmap_message_req_types { MESSAGE_REQ_EXEC_CMAP_MCAST = 0, }; enum cmap_mcast_reason { CMAP_MCAST_REASON_SYNC = 0, CMAP_MCAST_REASON_NEW_CONFIG_VERSION = 1, }; static struct corosync_api_v1 *api; static char *cmap_exec_init_fn (struct corosync_api_v1 *corosync_api); static int cmap_exec_exit_fn(void); static int cmap_lib_init_fn (void *conn); static int cmap_lib_exit_fn (void *conn); static void message_handler_req_lib_cmap_set(void *conn, const void *message); static void message_handler_req_lib_cmap_delete(void *conn, const void *message); static void message_handler_req_lib_cmap_get(void *conn, const void *message); static void message_handler_req_lib_cmap_adjust_int(void *conn, const void *message); static void message_handler_req_lib_cmap_iter_init(void *conn, const void *message); static void message_handler_req_lib_cmap_iter_next(void *conn, const void *message); static void message_handler_req_lib_cmap_iter_finalize(void *conn, const void *message); static void message_handler_req_lib_cmap_track_add(void *conn, const void *message); static void message_handler_req_lib_cmap_track_delete(void *conn, const void *message); static void cmap_notify_fn(int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data); static void message_handler_req_exec_cmap_mcast( const void *message, unsigned int nodeid); static void exec_cmap_mcast_endian_convert(void *message); /* * Reson is subtype of message. argc is number of items in argv array. Argv is array * of strings (key names) which will be send to wire. There can be maximum * MAX_REQ_EXEC_CMAP_MCAST_ITEMS items (for more items, CS_ERR_TOO_MANY_GROUPS * error is returned). If key is not found, item has type ICMAP_VALUETYPE_NOT_EXIST * and length zero. */ static cs_error_t cmap_mcast_send(enum cmap_mcast_reason reason, int argc, char *argv[]); static void cmap_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id); static int cmap_sync_process (void); static void cmap_sync_activate (void); static void cmap_sync_abort (void); static void cmap_config_version_track_cb( int32_t event, const char *key_name, struct icmap_notify_value new_value, struct icmap_notify_value old_value, void *user_data); /* * Library Handler Definition */ static struct corosync_lib_handler cmap_lib_engine[] = { { /* 0 */ .lib_handler_fn = message_handler_req_lib_cmap_set, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 1 */ .lib_handler_fn = message_handler_req_lib_cmap_delete, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 2 */ .lib_handler_fn = message_handler_req_lib_cmap_get, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 3 */ .lib_handler_fn = message_handler_req_lib_cmap_adjust_int, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 4 */ .lib_handler_fn = message_handler_req_lib_cmap_iter_init, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 5 */ .lib_handler_fn = message_handler_req_lib_cmap_iter_next, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 */ .lib_handler_fn = message_handler_req_lib_cmap_iter_finalize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 */ .lib_handler_fn = message_handler_req_lib_cmap_track_add, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 8 */ .lib_handler_fn = message_handler_req_lib_cmap_track_delete, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, }; static struct corosync_exec_handler cmap_exec_engine[] = { { /* 0 - MESSAGE_REQ_EXEC_CMAP_MCAST */ .exec_handler_fn = message_handler_req_exec_cmap_mcast, .exec_endian_convert_fn = exec_cmap_mcast_endian_convert }, }; struct corosync_service_engine cmap_service_engine = { .name = "corosync configuration map access", .id = CMAP_SERVICE, .priority = 1, .private_data_size = sizeof(struct cmap_conn_info), .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = cmap_lib_init_fn, .lib_exit_fn = cmap_lib_exit_fn, .lib_engine = cmap_lib_engine, .lib_engine_count = sizeof (cmap_lib_engine) / sizeof (struct corosync_lib_handler), .exec_init_fn = cmap_exec_init_fn, .exec_exit_fn = cmap_exec_exit_fn, .exec_engine = cmap_exec_engine, .exec_engine_count = sizeof (cmap_exec_engine) / sizeof (struct corosync_exec_handler), .sync_init = cmap_sync_init, .sync_process = cmap_sync_process, .sync_activate = cmap_sync_activate, .sync_abort = cmap_sync_abort }; struct corosync_service_engine *cmap_get_service_engine_ver0 (void) { return (&cmap_service_engine); } struct req_exec_cmap_mcast_item { mar_name_t key_name __attribute__((aligned(8))); mar_uint8_t value_type __attribute__((aligned(8))); mar_size_t value_len __attribute__((aligned(8))); uint8_t value[] __attribute__((aligned(8))); }; struct req_exec_cmap_mcast { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_uint8_t reason __attribute__((aligned(8))); mar_uint8_t no_items __attribute__((aligned(8))); mar_uint8_t reserved1 __attribute__((aligned(8))); mar_uint8_t reserver2 __attribute__((aligned(8))); /* * Following are array of req_exec_cmap_mcast_item alligned to 8 bytes */ }; static size_t cmap_sync_trans_list_entries = 0; static size_t cmap_sync_member_list_entries = 0; static uint64_t cmap_highest_config_version_received = 0; static uint64_t cmap_my_config_version = 0; static int cmap_first_sync = 1; static icmap_track_t cmap_config_version_track; static void cmap_config_version_track_cb( int32_t event, const char *key_name, struct icmap_notify_value new_value, struct icmap_notify_value old_value, void *user_data) { const char *key = "totem.config_version"; cs_error_t ret; ENTER(); if (icmap_get_uint64("totem.config_version", &cmap_my_config_version) != CS_OK) { cmap_my_config_version = 0; } ret = cmap_mcast_send(CMAP_MCAST_REASON_NEW_CONFIG_VERSION, 1, (char **)&key); if (ret != CS_OK) { log_printf(LOGSYS_LEVEL_ERROR, "Can't inform other nodes about new config version"); } LEAVE(); } static int cmap_exec_exit_fn(void) { if (icmap_track_delete(cmap_config_version_track) != CS_OK) { log_printf(LOGSYS_LEVEL_ERROR, "Can't delete config_version icmap tracker"); } return 0; } static char *cmap_exec_init_fn ( struct corosync_api_v1 *corosync_api) { cs_error_t ret; api = corosync_api; ret = icmap_track_add("totem.config_version", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, cmap_config_version_track_cb, NULL, &cmap_config_version_track); if (ret != CS_OK) { return ((char *)"Can't add config_version icmap tracker"); } return (NULL); } static int cmap_lib_init_fn (void *conn) { struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p", conn); api->ipc_refcnt_inc(conn); memset(conn_info, 0, sizeof(*conn_info)); hdb_create(&conn_info->iter_db); hdb_create(&conn_info->track_db); return (0); } static int cmap_lib_exit_fn (void *conn) { struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); hdb_handle_t iter_handle = 0; icmap_iter_t *iter; hdb_handle_t track_handle = 0; icmap_track_t *track; log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p", conn); hdb_iterator_reset(&conn_info->iter_db); while (hdb_iterator_next(&conn_info->iter_db, (void*)&iter, &iter_handle) == 0) { icmap_iter_finalize(*iter); (void)hdb_handle_put (&conn_info->iter_db, iter_handle); } hdb_destroy(&conn_info->iter_db); hdb_iterator_reset(&conn_info->track_db); while (hdb_iterator_next(&conn_info->track_db, (void*)&track, &track_handle) == 0) { free(icmap_track_get_user_data(*track)); icmap_track_delete(*track); (void)hdb_handle_put (&conn_info->track_db, track_handle); } hdb_destroy(&conn_info->track_db); api->ipc_refcnt_dec(conn); return (0); } static void cmap_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { cmap_sync_trans_list_entries = trans_list_entries; cmap_sync_member_list_entries = member_list_entries; cmap_highest_config_version_received = 0; if (icmap_get_uint64("totem.config_version", &cmap_my_config_version) != CS_OK) { cmap_my_config_version = 0; } } static int cmap_sync_process (void) { const char *key = "totem.config_version"; cs_error_t ret; ret = cmap_mcast_send(CMAP_MCAST_REASON_SYNC, 1, (char **)&key); return (ret == CS_OK ? 0 : -1); } static void cmap_sync_activate (void) { if (cmap_sync_trans_list_entries == 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Single node sync -> no action"); return ; } if (cmap_first_sync == 1) { cmap_first_sync = 0; } else { log_printf(LOGSYS_LEVEL_DEBUG, "Not first sync -> no action"); return ; } if (cmap_my_config_version == 0) { log_printf(LOGSYS_LEVEL_DEBUG, "My config version is 0 -> no action"); return ; } if (cmap_highest_config_version_received == 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Other nodes version is 0 -> no action"); return ; } if (cmap_highest_config_version_received != cmap_my_config_version) { log_printf(LOGSYS_LEVEL_ERROR, "Received config version (%"PRIu64") is different than my config version (%"PRIu64")! Exiting", cmap_highest_config_version_received, cmap_my_config_version); api->shutdown_request(); return ; } } static void cmap_sync_abort (void) { } static void message_handler_req_lib_cmap_set(void *conn, const void *message) { const struct req_lib_cmap_set *req_lib_cmap_set = message; struct res_lib_cmap_set res_lib_cmap_set; cs_error_t ret; if (icmap_is_key_ro((char *)req_lib_cmap_set->key_name.value)) { ret = CS_ERR_ACCESS; } else { ret = icmap_set((char *)req_lib_cmap_set->key_name.value, &req_lib_cmap_set->value, req_lib_cmap_set->value_len, req_lib_cmap_set->type); } memset(&res_lib_cmap_set, 0, sizeof(res_lib_cmap_set)); res_lib_cmap_set.header.size = sizeof(res_lib_cmap_set); res_lib_cmap_set.header.id = MESSAGE_RES_CMAP_SET; res_lib_cmap_set.header.error = ret; api->ipc_response_send(conn, &res_lib_cmap_set, sizeof(res_lib_cmap_set)); } static void message_handler_req_lib_cmap_delete(void *conn, const void *message) { const struct req_lib_cmap_set *req_lib_cmap_set = message; struct res_lib_cmap_delete res_lib_cmap_delete; cs_error_t ret; if (icmap_is_key_ro((char *)req_lib_cmap_set->key_name.value)) { ret = CS_ERR_ACCESS; } else { ret = icmap_delete((char *)req_lib_cmap_set->key_name.value); } memset(&res_lib_cmap_delete, 0, sizeof(res_lib_cmap_delete)); res_lib_cmap_delete.header.size = sizeof(res_lib_cmap_delete); res_lib_cmap_delete.header.id = MESSAGE_RES_CMAP_DELETE; res_lib_cmap_delete.header.error = ret; api->ipc_response_send(conn, &res_lib_cmap_delete, sizeof(res_lib_cmap_delete)); } static void message_handler_req_lib_cmap_get(void *conn, const void *message) { const struct req_lib_cmap_get *req_lib_cmap_get = message; struct res_lib_cmap_get *res_lib_cmap_get; struct res_lib_cmap_get error_res_lib_cmap_get; cs_error_t ret; size_t value_len; size_t res_lib_cmap_get_size; icmap_value_types_t type; void *value; value_len = req_lib_cmap_get->value_len; res_lib_cmap_get_size = sizeof(*res_lib_cmap_get) + value_len; res_lib_cmap_get = malloc(res_lib_cmap_get_size); if (res_lib_cmap_get == NULL) { ret = CS_ERR_NO_MEMORY; goto error_exit; } memset(res_lib_cmap_get, 0, res_lib_cmap_get_size); if (value_len > 0) { value = res_lib_cmap_get->value; } else { value = NULL; } ret = icmap_get((char *)req_lib_cmap_get->key_name.value, value, &value_len, &type); if (ret != CS_OK) { free(res_lib_cmap_get); goto error_exit; } res_lib_cmap_get->header.size = res_lib_cmap_get_size; res_lib_cmap_get->header.id = MESSAGE_RES_CMAP_GET; res_lib_cmap_get->header.error = ret; res_lib_cmap_get->type = type; res_lib_cmap_get->value_len = value_len; api->ipc_response_send(conn, res_lib_cmap_get, res_lib_cmap_get_size); free(res_lib_cmap_get); return ; error_exit: memset(&error_res_lib_cmap_get, 0, sizeof(error_res_lib_cmap_get)); error_res_lib_cmap_get.header.size = sizeof(error_res_lib_cmap_get); error_res_lib_cmap_get.header.id = MESSAGE_RES_CMAP_GET; error_res_lib_cmap_get.header.error = ret; api->ipc_response_send(conn, &error_res_lib_cmap_get, sizeof(error_res_lib_cmap_get)); } static void message_handler_req_lib_cmap_adjust_int(void *conn, const void *message) { const struct req_lib_cmap_adjust_int *req_lib_cmap_adjust_int = message; struct res_lib_cmap_adjust_int res_lib_cmap_adjust_int; cs_error_t ret; if (icmap_is_key_ro((char *)req_lib_cmap_adjust_int->key_name.value)) { ret = CS_ERR_ACCESS; } else { ret = icmap_adjust_int((char *)req_lib_cmap_adjust_int->key_name.value, req_lib_cmap_adjust_int->step); } memset(&res_lib_cmap_adjust_int, 0, sizeof(res_lib_cmap_adjust_int)); res_lib_cmap_adjust_int.header.size = sizeof(res_lib_cmap_adjust_int); res_lib_cmap_adjust_int.header.id = MESSAGE_RES_CMAP_ADJUST_INT; res_lib_cmap_adjust_int.header.error = ret; api->ipc_response_send(conn, &res_lib_cmap_adjust_int, sizeof(res_lib_cmap_adjust_int)); } static void message_handler_req_lib_cmap_iter_init(void *conn, const void *message) { const struct req_lib_cmap_iter_init *req_lib_cmap_iter_init = message; struct res_lib_cmap_iter_init res_lib_cmap_iter_init; cs_error_t ret; icmap_iter_t iter; icmap_iter_t *hdb_iter; cmap_iter_handle_t handle = 0ULL; const char *prefix; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); if (req_lib_cmap_iter_init->prefix.length > 0) { prefix = (char *)req_lib_cmap_iter_init->prefix.value; } else { prefix = NULL; } iter = icmap_iter_init(prefix); if (iter == NULL) { ret = CS_ERR_NO_SECTIONS; goto reply_send; } ret = hdb_error_to_cs(hdb_handle_create(&conn_info->iter_db, sizeof(iter), &handle)); if (ret != CS_OK) { goto reply_send; } ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, handle, (void *)&hdb_iter)); if (ret != CS_OK) { goto reply_send; } *hdb_iter = iter; (void)hdb_handle_put (&conn_info->iter_db, handle); reply_send: memset(&res_lib_cmap_iter_init, 0, sizeof(res_lib_cmap_iter_init)); res_lib_cmap_iter_init.header.size = sizeof(res_lib_cmap_iter_init); res_lib_cmap_iter_init.header.id = MESSAGE_RES_CMAP_ITER_INIT; res_lib_cmap_iter_init.header.error = ret; res_lib_cmap_iter_init.iter_handle = handle; api->ipc_response_send(conn, &res_lib_cmap_iter_init, sizeof(res_lib_cmap_iter_init)); } static void message_handler_req_lib_cmap_iter_next(void *conn, const void *message) { const struct req_lib_cmap_iter_next *req_lib_cmap_iter_next = message; struct res_lib_cmap_iter_next res_lib_cmap_iter_next; cs_error_t ret; icmap_iter_t *iter; size_t value_len = 0; icmap_value_types_t type = 0; const char *res = NULL; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, req_lib_cmap_iter_next->iter_handle, (void *)&iter)); if (ret != CS_OK) { goto reply_send; } res = icmap_iter_next(*iter, &value_len, &type); if (res == NULL) { ret = CS_ERR_NO_SECTIONS; } (void)hdb_handle_put (&conn_info->iter_db, req_lib_cmap_iter_next->iter_handle); reply_send: memset(&res_lib_cmap_iter_next, 0, sizeof(res_lib_cmap_iter_next)); res_lib_cmap_iter_next.header.size = sizeof(res_lib_cmap_iter_next); res_lib_cmap_iter_next.header.id = MESSAGE_RES_CMAP_ITER_NEXT; res_lib_cmap_iter_next.header.error = ret; if (res != NULL) { res_lib_cmap_iter_next.value_len = value_len; res_lib_cmap_iter_next.type = type; memcpy(res_lib_cmap_iter_next.key_name.value, res, strlen(res)); res_lib_cmap_iter_next.key_name.length = strlen(res); } api->ipc_response_send(conn, &res_lib_cmap_iter_next, sizeof(res_lib_cmap_iter_next)); } static void message_handler_req_lib_cmap_iter_finalize(void *conn, const void *message) { const struct req_lib_cmap_iter_finalize *req_lib_cmap_iter_finalize = message; struct res_lib_cmap_iter_finalize res_lib_cmap_iter_finalize; cs_error_t ret; icmap_iter_t *iter; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle, (void *)&iter)); if (ret != CS_OK) { goto reply_send; } icmap_iter_finalize(*iter); (void)hdb_handle_destroy(&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle); (void)hdb_handle_put (&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle); reply_send: memset(&res_lib_cmap_iter_finalize, 0, sizeof(res_lib_cmap_iter_finalize)); res_lib_cmap_iter_finalize.header.size = sizeof(res_lib_cmap_iter_finalize); res_lib_cmap_iter_finalize.header.id = MESSAGE_RES_CMAP_ITER_FINALIZE; res_lib_cmap_iter_finalize.header.error = ret; api->ipc_response_send(conn, &res_lib_cmap_iter_finalize, sizeof(res_lib_cmap_iter_finalize)); } static void cmap_notify_fn(int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { struct cmap_track_user_data *cmap_track_user_data = (struct cmap_track_user_data *)user_data; struct res_lib_cmap_notify_callback res_lib_cmap_notify_callback; struct iovec iov[3]; memset(&res_lib_cmap_notify_callback, 0, sizeof(res_lib_cmap_notify_callback)); res_lib_cmap_notify_callback.header.size = sizeof(res_lib_cmap_notify_callback) + new_val.len + old_val.len; res_lib_cmap_notify_callback.header.id = MESSAGE_RES_CMAP_NOTIFY_CALLBACK; res_lib_cmap_notify_callback.header.error = CS_OK; res_lib_cmap_notify_callback.new_value_type = new_val.type; res_lib_cmap_notify_callback.old_value_type = old_val.type; res_lib_cmap_notify_callback.new_value_len = new_val.len; res_lib_cmap_notify_callback.old_value_len = old_val.len; res_lib_cmap_notify_callback.event = event; res_lib_cmap_notify_callback.key_name.length = strlen(key_name); res_lib_cmap_notify_callback.track_inst_handle = cmap_track_user_data->track_inst_handle; memcpy(res_lib_cmap_notify_callback.key_name.value, key_name, strlen(key_name)); iov[0].iov_base = (char *)&res_lib_cmap_notify_callback; iov[0].iov_len = sizeof(res_lib_cmap_notify_callback); iov[1].iov_base = (char *)new_val.data; iov[1].iov_len = new_val.len; iov[2].iov_base = (char *)old_val.data; iov[2].iov_len = old_val.len; api->ipc_dispatch_iov_send(cmap_track_user_data->conn, iov, 3); } static void message_handler_req_lib_cmap_track_add(void *conn, const void *message) { const struct req_lib_cmap_track_add *req_lib_cmap_track_add = message; struct res_lib_cmap_track_add res_lib_cmap_track_add; cs_error_t ret; cmap_track_handle_t handle = 0; icmap_track_t track = NULL; icmap_track_t *hdb_track; struct cmap_track_user_data *cmap_track_user_data; const char *key_name; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); cmap_track_user_data = malloc(sizeof(*cmap_track_user_data)); if (cmap_track_user_data == NULL) { ret = CS_ERR_NO_MEMORY; goto reply_send; } memset(cmap_track_user_data, 0, sizeof(*cmap_track_user_data)); if (req_lib_cmap_track_add->key_name.length > 0) { key_name = (char *)req_lib_cmap_track_add->key_name.value; } else { key_name = NULL; } ret = icmap_track_add(key_name, req_lib_cmap_track_add->track_type, cmap_notify_fn, cmap_track_user_data, &track); if (ret != CS_OK) { free(cmap_track_user_data); goto reply_send; } ret = hdb_error_to_cs(hdb_handle_create(&conn_info->track_db, sizeof(track), &handle)); if (ret != CS_OK) { free(cmap_track_user_data); goto reply_send; } ret = hdb_error_to_cs(hdb_handle_get(&conn_info->track_db, handle, (void *)&hdb_track)); if (ret != CS_OK) { free(cmap_track_user_data); goto reply_send; } *hdb_track = track; cmap_track_user_data->conn = conn; cmap_track_user_data->track_handle = handle; cmap_track_user_data->track_inst_handle = req_lib_cmap_track_add->track_inst_handle; (void)hdb_handle_put (&conn_info->track_db, handle); reply_send: memset(&res_lib_cmap_track_add, 0, sizeof(res_lib_cmap_track_add)); res_lib_cmap_track_add.header.size = sizeof(res_lib_cmap_track_add); res_lib_cmap_track_add.header.id = MESSAGE_RES_CMAP_TRACK_ADD; res_lib_cmap_track_add.header.error = ret; res_lib_cmap_track_add.track_handle = handle; api->ipc_response_send(conn, &res_lib_cmap_track_add, sizeof(res_lib_cmap_track_add)); } static void message_handler_req_lib_cmap_track_delete(void *conn, const void *message) { const struct req_lib_cmap_track_delete *req_lib_cmap_track_delete = message; struct res_lib_cmap_track_delete res_lib_cmap_track_delete; cs_error_t ret; icmap_track_t *track; struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); uint64_t track_inst_handle = 0; ret = hdb_error_to_cs(hdb_handle_get(&conn_info->track_db, req_lib_cmap_track_delete->track_handle, (void *)&track)); if (ret != CS_OK) { goto reply_send; } track_inst_handle = ((struct cmap_track_user_data *)icmap_track_get_user_data(*track))->track_inst_handle; free(icmap_track_get_user_data(*track)); ret = icmap_track_delete(*track); (void)hdb_handle_put (&conn_info->track_db, req_lib_cmap_track_delete->track_handle); (void)hdb_handle_destroy(&conn_info->track_db, req_lib_cmap_track_delete->track_handle); reply_send: memset(&res_lib_cmap_track_delete, 0, sizeof(res_lib_cmap_track_delete)); res_lib_cmap_track_delete.header.size = sizeof(res_lib_cmap_track_delete); res_lib_cmap_track_delete.header.id = MESSAGE_RES_CMAP_TRACK_DELETE; res_lib_cmap_track_delete.header.error = ret; res_lib_cmap_track_delete.track_inst_handle = track_inst_handle; api->ipc_response_send(conn, &res_lib_cmap_track_delete, sizeof(res_lib_cmap_track_delete)); } static cs_error_t cmap_mcast_send(enum cmap_mcast_reason reason, int argc, char *argv[]) { int i; size_t value_len; icmap_value_types_t value_type; cs_error_t err; size_t item_len; size_t msg_len = 0; struct req_exec_cmap_mcast req_exec_cmap_mcast; struct req_exec_cmap_mcast_item *item = NULL; struct iovec req_exec_cmap_iovec[MAX_REQ_EXEC_CMAP_MCAST_ITEMS + 1]; ENTER(); if (argc > MAX_REQ_EXEC_CMAP_MCAST_ITEMS) { return (CS_ERR_TOO_MANY_GROUPS); } memset(req_exec_cmap_iovec, 0, sizeof(req_exec_cmap_iovec)); for (i = 0; i < argc; i++) { err = icmap_get(argv[i], NULL, &value_len, &value_type); if (err != CS_OK && err != CS_ERR_NOT_EXIST) { goto free_mem; } if (err == CS_ERR_NOT_EXIST) { value_type = ICMAP_VALUETYPE_NOT_EXIST; value_len = 0; } item_len = MAR_ALIGN_UP(sizeof(*item) + value_len, 8); item = malloc(item_len); if (item == NULL) { goto free_mem; } memset(item, 0, item_len); item->value_type = value_type; item->value_len = value_len; item->key_name.length = strlen(argv[i]); strcpy((char *)item->key_name.value, argv[i]); if (value_type != ICMAP_VALUETYPE_NOT_EXIST) { err = icmap_get(argv[i], item->value, &value_len, &value_type); if (err != CS_OK) { goto free_mem; } } req_exec_cmap_iovec[i + 1].iov_base = item; req_exec_cmap_iovec[i + 1].iov_len = item_len; msg_len += item_len; qb_log(LOG_TRACE, "Item %u - type %u, len %zu", i, item->value_type, item->value_len); item = NULL; } memset(&req_exec_cmap_mcast, 0, sizeof(req_exec_cmap_mcast)); req_exec_cmap_mcast.header.size = sizeof(req_exec_cmap_mcast) + msg_len; req_exec_cmap_mcast.reason = reason; req_exec_cmap_mcast.no_items = argc; req_exec_cmap_iovec[0].iov_base = &req_exec_cmap_mcast; req_exec_cmap_iovec[0].iov_len = sizeof(req_exec_cmap_mcast); qb_log(LOG_TRACE, "Sending %u items (%u iovec) for reason %u", argc, argc + 1, reason); err = (api->totem_mcast(req_exec_cmap_iovec, argc + 1, TOTEM_AGREED) == 0 ? CS_OK : CS_ERR_MESSAGE_ERROR); free_mem: for (i = 0; i < argc; i++) { free(req_exec_cmap_iovec[i + 1].iov_base); } free(item); LEAVE(); return (err); } static struct req_exec_cmap_mcast_item *cmap_mcast_item_find( const void *message, char *key) { const struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; int i; const char *p; struct req_exec_cmap_mcast_item *item; mar_uint16_t key_name_len; p = (const char *)message + sizeof(*req_exec_cmap_mcast); for (i = 0; i < req_exec_cmap_mcast->no_items; i++) { item = (struct req_exec_cmap_mcast_item *)p; key_name_len = item->key_name.length; if (strlen(key) == key_name_len && strcmp((char *)item->key_name.value, key) == 0) { return (item); } p += MAR_ALIGN_UP(sizeof(*item) + item->value_len, 8); } return (NULL); } static void message_handler_req_exec_cmap_mcast_reason_sync_nv( enum cmap_mcast_reason reason, const void *message, unsigned int nodeid) { char member_config_version[ICMAP_KEYNAME_MAXLEN]; uint64_t config_version = 0; struct req_exec_cmap_mcast_item *item; mar_size_t value_len; ENTER(); item = cmap_mcast_item_find(message, (char *)"totem.config_version"); if (item != NULL) { value_len = item->value_len; if (item->value_type == ICMAP_VALUETYPE_NOT_EXIST) { config_version = 0; } if (item->value_type == ICMAP_VALUETYPE_UINT64) { memcpy(&config_version, item->value, value_len); } } qb_log(LOG_TRACE, "Received config version %"PRIu64" from node %x", config_version, nodeid); if (nodeid != api->totem_nodeid_get() && config_version > cmap_highest_config_version_received) { cmap_highest_config_version_received = config_version; } snprintf(member_config_version, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.config_version", nodeid); icmap_set_uint64(member_config_version, config_version); LEAVE(); } static void message_handler_req_exec_cmap_mcast( const void *message, unsigned int nodeid) { const struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; ENTER(); switch (req_exec_cmap_mcast->reason) { case CMAP_MCAST_REASON_SYNC: message_handler_req_exec_cmap_mcast_reason_sync_nv(req_exec_cmap_mcast->reason, message, nodeid); break; case CMAP_MCAST_REASON_NEW_CONFIG_VERSION: message_handler_req_exec_cmap_mcast_reason_sync_nv(req_exec_cmap_mcast->reason, message, nodeid); break; default: qb_log(LOG_TRACE, "Received mcast with unknown reason %u", req_exec_cmap_mcast->reason); }; LEAVE(); } static void exec_cmap_mcast_endian_convert(void *message) { struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; const char *p; int i; struct req_exec_cmap_mcast_item *item; uint16_t u16; uint32_t u32; uint64_t u64; float flt; double dbl; swab_coroipc_request_header_t(&req_exec_cmap_mcast->header); p = (const char *)message + sizeof(*req_exec_cmap_mcast); for (i = 0; i < req_exec_cmap_mcast->no_items; i++) { item = (struct req_exec_cmap_mcast_item *)p; swab_mar_uint16_t(&item->key_name.length); swab_mar_size_t(&item->value_len); switch (item->value_type) { case ICMAP_VALUETYPE_INT16: case ICMAP_VALUETYPE_UINT16: memcpy(&u16, item->value, sizeof(u16)); u16 = swab16(u16); memcpy(item->value, &u16, sizeof(u16)); break; case ICMAP_VALUETYPE_INT32: case ICMAP_VALUETYPE_UINT32: memcpy(&u32, item->value, sizeof(u32)); u32 = swab32(u32); memcpy(item->value, &u32, sizeof(u32)); break; case ICMAP_VALUETYPE_INT64: case ICMAP_VALUETYPE_UINT64: memcpy(&u64, item->value, sizeof(u64)); u64 = swab64(u64); memcpy(item->value, &u64, sizeof(u64)); break; case ICMAP_VALUETYPE_FLOAT: memcpy(&flt, item->value, sizeof(flt)); swabflt(&flt); memcpy(item->value, &flt, sizeof(flt)); break; case ICMAP_VALUETYPE_DOUBLE: memcpy(&dbl, item->value, sizeof(dbl)); swabdbl(&dbl); memcpy(item->value, &dbl, sizeof(dbl)); break; } p += MAR_ALIGN_UP(sizeof(*item) + item->value_len, 8); } } diff --git a/exec/coroparse.c b/exec/coroparse.c index 16e2d148..3318a399 100644 --- a/exec/coroparse.c +++ b/exec/coroparse.c @@ -1,1400 +1,1392 @@ /* * Copyright (c) 2006-2013 Red Hat, Inc. * * All rights reserved. * * Author: Patrick Caulfield (pcaulfie@redhat.com) * Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #define LOGSYS_UTILS_ONLY 1 #include #include #include "main.h" #include "util.h" enum parser_cb_type { PARSER_CB_START, PARSER_CB_END, PARSER_CB_SECTION_START, PARSER_CB_SECTION_END, PARSER_CB_ITEM, }; enum main_cp_cb_data_state { MAIN_CP_CB_DATA_STATE_NORMAL, MAIN_CP_CB_DATA_STATE_TOTEM, MAIN_CP_CB_DATA_STATE_INTERFACE, MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS, MAIN_CP_CB_DATA_STATE_UIDGID, MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON, MAIN_CP_CB_DATA_STATE_MEMBER, MAIN_CP_CB_DATA_STATE_QUORUM, MAIN_CP_CB_DATA_STATE_QDEVICE, MAIN_CP_CB_DATA_STATE_NODELIST, MAIN_CP_CB_DATA_STATE_NODELIST_NODE, MAIN_CP_CB_DATA_STATE_PLOAD, MAIN_CP_CB_DATA_STATE_QB, MAIN_CP_CB_DATA_STATE_RESOURCES, MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM, MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS, MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED, MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED }; typedef int (*parser_cb_f)(const char *path, char *key, char *value, enum main_cp_cb_data_state *state, enum parser_cb_type type, const char **error_string, icmap_map_t config_map, void *user_data); struct key_value_list_item { char *key; char *value; - struct list_head list; + struct qb_list_head list; }; struct main_cp_cb_data { int linknumber; char *bindnetaddr; char *mcastaddr; char *broadcast; int mcastport; int ttl; int knet_link_priority; int knet_ping_interval; int knet_ping_timeout; int knet_ping_precision; - struct list_head logger_subsys_items_head; + struct qb_list_head logger_subsys_items_head; char *subsys; char *logging_daemon_name; - struct list_head member_items_head; + struct qb_list_head member_items_head; int node_number; int ring0_addr_added; }; static int read_config_file_into_icmap( const char **error_string, icmap_map_t config_map); static char error_string_response[512]; static int uid_determine (const char *req_user) { int pw_uid = 0; struct passwd passwd; struct passwd* pwdptr = &passwd; struct passwd* temp_pwd_pt; char *pwdbuffer; int pwdlinelen, rc; long int id; char *ep; id = strtol(req_user, &ep, 10); if (*ep == '\0' && id >= 0 && id <= UINT_MAX) { return (id); } pwdlinelen = sysconf (_SC_GETPW_R_SIZE_MAX); if (pwdlinelen == -1) { pwdlinelen = 256; } pwdbuffer = malloc (pwdlinelen); while ((rc = getpwnam_r (req_user, pwdptr, pwdbuffer, pwdlinelen, &temp_pwd_pt)) == ERANGE) { char *n; pwdlinelen *= 2; if (pwdlinelen <= 32678) { n = realloc (pwdbuffer, pwdlinelen); if (n != NULL) { pwdbuffer = n; continue; } } } if (rc != 0) { free (pwdbuffer); sprintf (error_string_response, "getpwnam_r(): %s", strerror(rc)); return (-1); } if (temp_pwd_pt == NULL) { free (pwdbuffer); sprintf (error_string_response, "The '%s' user is not found in /etc/passwd, please read the documentation.", req_user); return (-1); } pw_uid = passwd.pw_uid; free (pwdbuffer); return pw_uid; } static int gid_determine (const char *req_group) { int corosync_gid = 0; struct group group; struct group * grpptr = &group; struct group * temp_grp_pt; char *grpbuffer; int grplinelen, rc; long int id; char *ep; id = strtol(req_group, &ep, 10); if (*ep == '\0' && id >= 0 && id <= UINT_MAX) { return (id); } grplinelen = sysconf (_SC_GETGR_R_SIZE_MAX); if (grplinelen == -1) { grplinelen = 256; } grpbuffer = malloc (grplinelen); while ((rc = getgrnam_r (req_group, grpptr, grpbuffer, grplinelen, &temp_grp_pt)) == ERANGE) { char *n; grplinelen *= 2; if (grplinelen <= 32678) { n = realloc (grpbuffer, grplinelen); if (n != NULL) { grpbuffer = n; continue; } } } if (rc != 0) { free (grpbuffer); sprintf (error_string_response, "getgrnam_r(): %s", strerror(rc)); return (-1); } if (temp_grp_pt == NULL) { free (grpbuffer); sprintf (error_string_response, "The '%s' group is not found in /etc/group, please read the documentation.", req_group); return (-1); } corosync_gid = group.gr_gid; free (grpbuffer); return corosync_gid; } static char *strchr_rs (const char *haystack, int byte) { const char *end_address = strchr (haystack, byte); if (end_address) { end_address += 1; /* skip past { or = */ while (*end_address == ' ' || *end_address == '\t') end_address++; } return ((char *) end_address); } int coroparse_configparse (icmap_map_t config_map, const char **error_string) { if (read_config_file_into_icmap(error_string, config_map)) { return -1; } return 0; } static char *remove_whitespace(char *string, int remove_colon_and_brace) { char *start; char *end; start = string; while (*start == ' ' || *start == '\t') start++; end = start+(strlen(start))-1; while ((*end == ' ' || *end == '\t' || (remove_colon_and_brace && (*end == ':' || *end == '{'))) && end > start) end--; if (end != start) *(end+1) = '\0'; return start; } static int parse_section(FILE *fp, char *path, const char **error_string, int depth, enum main_cp_cb_data_state state, parser_cb_f parser_cb, icmap_map_t config_map, void *user_data) { char line[512]; int i; char *loc; int ignore_line; char new_keyname[ICMAP_KEYNAME_MAXLEN]; if (strcmp(path, "") == 0) { parser_cb("", NULL, NULL, &state, PARSER_CB_START, error_string, config_map, user_data); } while (fgets (line, sizeof (line), fp)) { if (strlen(line) > 0) { if (line[strlen(line) - 1] == '\n') line[strlen(line) - 1] = '\0'; if (strlen (line) > 0 && line[strlen(line) - 1] == '\r') line[strlen(line) - 1] = '\0'; } /* * Clear out white space and tabs */ for (i = strlen (line) - 1; i > -1; i--) { if (line[i] == '\t' || line[i] == ' ') { line[i] = '\0'; } else { break; } } ignore_line = 1; for (i = 0; i < strlen (line); i++) { if (line[i] != '\t' && line[i] != ' ') { if (line[i] != '#') ignore_line = 0; break; } } /* * Clear out comments and empty lines */ if (ignore_line) { continue; } /* New section ? */ if ((loc = strchr_rs (line, '{'))) { char *section = remove_whitespace(line, 1); enum main_cp_cb_data_state newstate; loc--; *loc = '\0'; if (strlen(path) + strlen(section) + 1 >= ICMAP_KEYNAME_MAXLEN) { *error_string = "parser error: Start of section makes total cmap path too long"; return -1; } strcpy(new_keyname, path); if (strcmp(path, "") != 0) { strcat(new_keyname, "."); } strcat(new_keyname, section); /* Only use the new state for items further down the stack */ newstate = state; if (!parser_cb(new_keyname, NULL, NULL, &newstate, PARSER_CB_SECTION_START, error_string, config_map, user_data)) { return -1; } if (parse_section(fp, new_keyname, error_string, depth + 1, newstate, parser_cb, config_map, user_data)) return -1; continue ; } /* New key/value */ if ((loc = strchr_rs (line, ':'))) { char *key; char *value; *(loc-1) = '\0'; key = remove_whitespace(line, 1); value = remove_whitespace(loc, 0); if (strlen(path) + strlen(key) + 1 >= ICMAP_KEYNAME_MAXLEN) { *error_string = "parser error: New key makes total cmap path too long"; return -1; } strcpy(new_keyname, path); if (strcmp(path, "") != 0) { strcat(new_keyname, "."); } strcat(new_keyname, key); if (!parser_cb(new_keyname, key, value, &state, PARSER_CB_ITEM, error_string, config_map, user_data)) { return -1; } continue ; } if (strchr_rs (line, '}')) { if (depth == 0) { *error_string = "parser error: Unexpected closing brace"; return -1; } if (!parser_cb(path, NULL, NULL, &state, PARSER_CB_SECTION_END, error_string, config_map, user_data)) { return -1; } return 0; } } if (strcmp(path, "") != 0) { *error_string = "parser error: Missing closing brace"; return -1; } if (strcmp(path, "") == 0) { parser_cb("", NULL, NULL, &state, PARSER_CB_END, error_string, config_map, user_data); } return 0; } static int safe_atoq_range(icmap_value_types_t value_type, long long int *min_val, long long int *max_val) { switch (value_type) { case ICMAP_VALUETYPE_INT8: *min_val = INT8_MIN; *max_val = INT8_MAX; break; case ICMAP_VALUETYPE_UINT8: *min_val = 0; *max_val = UINT8_MAX; break; case ICMAP_VALUETYPE_INT16: *min_val = INT16_MIN; *max_val = INT16_MAX; break; case ICMAP_VALUETYPE_UINT16: *min_val = 0; *max_val = UINT16_MAX; break; case ICMAP_VALUETYPE_INT32: *min_val = INT32_MIN; *max_val = INT32_MAX; break; case ICMAP_VALUETYPE_UINT32: *min_val = 0; *max_val = UINT32_MAX; break; default: return (-1); } return (0); } /* * Convert string str to long long int res. Type of result is target_type and currently only * ICMAP_VALUETYPE_[U]INT[8|16|32] is supported. * Return 0 on success, -1 on failure. */ static int safe_atoq(const char *str, long long int *res, icmap_value_types_t target_type) { long long int val; long long int min_val, max_val; char *endptr; errno = 0; val = strtoll(str, &endptr, 10); if (errno == ERANGE) { return (-1); } if (endptr == str) { return (-1); } if (*endptr != '\0') { return (-1); } if (safe_atoq_range(target_type, &min_val, &max_val) != 0) { return (-1); } if (val < min_val || val > max_val) { return (-1); } *res = val; return (0); } static int str_to_ull(const char *str, unsigned long long int *res) { unsigned long long int val; char *endptr; errno = 0; val = strtoull(str, &endptr, 10); if (errno == ERANGE) { return (-1); } if (endptr == str) { return (-1); } if (*endptr != '\0') { return (-1); } *res = val; return (0); } static int main_config_parser_cb(const char *path, char *key, char *value, enum main_cp_cb_data_state *state, enum parser_cb_type type, const char **error_string, icmap_map_t config_map, void *user_data) { int ii; long long int val; long long int min_val, max_val; icmap_value_types_t val_type = ICMAP_VALUETYPE_BINARY; unsigned long long int ull; int add_as_string; char key_name[ICMAP_KEYNAME_MAXLEN]; static char formated_err[256]; struct main_cp_cb_data *data = (struct main_cp_cb_data *)user_data; struct key_value_list_item *kv_item; - struct list_head *iter, *iter_next; + struct qb_list_head *iter; int uid, gid; switch (type) { case PARSER_CB_START: memset(data, 0, sizeof(struct main_cp_cb_data)); *state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case PARSER_CB_END: break; case PARSER_CB_ITEM: add_as_string = 1; switch (*state) { case MAIN_CP_CB_DATA_STATE_NORMAL: break; case MAIN_CP_CB_DATA_STATE_PLOAD: if ((strcmp(path, "pload.count") == 0) || (strcmp(path, "pload.size") == 0)) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } icmap_set_uint32_r(config_map, path, val); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_QUORUM: if ((strcmp(path, "quorum.expected_votes") == 0) || (strcmp(path, "quorum.votes") == 0) || (strcmp(path, "quorum.last_man_standing_window") == 0) || (strcmp(path, "quorum.leaving_timeout") == 0)) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } icmap_set_uint32_r(config_map, path, val); add_as_string = 0; } if ((strcmp(path, "quorum.two_node") == 0) || (strcmp(path, "quorum.expected_votes_tracking") == 0) || (strcmp(path, "quorum.allow_downscale") == 0) || (strcmp(path, "quorum.wait_for_all") == 0) || (strcmp(path, "quorum.auto_tie_breaker") == 0) || (strcmp(path, "quorum.last_man_standing") == 0)) { val_type = ICMAP_VALUETYPE_UINT8; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } icmap_set_uint8_r(config_map, path, val); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_QDEVICE: if ((strcmp(path, "quorum.device.timeout") == 0) || (strcmp(path, "quorum.device.sync_timeout") == 0) || (strcmp(path, "quorum.device.votes") == 0)) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } icmap_set_uint32_r(config_map, path, val); add_as_string = 0; } if ((strcmp(path, "quorum.device.master_wins") == 0)) { val_type = ICMAP_VALUETYPE_UINT8; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } icmap_set_uint8_r(config_map, path, val); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_TOTEM: if ((strcmp(path, "totem.version") == 0) || (strcmp(path, "totem.nodeid") == 0) || (strcmp(path, "totem.threads") == 0) || (strcmp(path, "totem.token") == 0) || (strcmp(path, "totem.token_coefficient") == 0) || (strcmp(path, "totem.token_retransmit") == 0) || (strcmp(path, "totem.hold") == 0) || (strcmp(path, "totem.token_retransmits_before_loss_const") == 0) || (strcmp(path, "totem.join") == 0) || (strcmp(path, "totem.send_join") == 0) || (strcmp(path, "totem.consensus") == 0) || (strcmp(path, "totem.merge") == 0) || (strcmp(path, "totem.downcheck") == 0) || (strcmp(path, "totem.fail_recv_const") == 0) || (strcmp(path, "totem.seqno_unchanged_const") == 0) || (strcmp(path, "totem.rrp_token_expired_timeout") == 0) || (strcmp(path, "totem.rrp_problem_count_timeout") == 0) || (strcmp(path, "totem.rrp_problem_count_threshold") == 0) || (strcmp(path, "totem.rrp_problem_count_mcast_threshold") == 0) || (strcmp(path, "totem.rrp_autorecovery_check_timeout") == 0) || (strcmp(path, "totem.heartbeat_failures_allowed") == 0) || (strcmp(path, "totem.max_network_delay") == 0) || (strcmp(path, "totem.window_size") == 0) || (strcmp(path, "totem.max_messages") == 0) || (strcmp(path, "totem.miss_count_const") == 0) || (strcmp(path, "totem.netmtu") == 0)) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } icmap_set_uint32_r(config_map,path, val); add_as_string = 0; } if (strcmp(path, "totem.config_version") == 0) { if (str_to_ull(value, &ull) != 0) { goto atoi_error; } icmap_set_uint64_r(config_map, path, ull); add_as_string = 0; } if (strcmp(path, "totem.ip_version") == 0) { if ((strcmp(value, "ipv4") != 0) && (strcmp(value, "ipv6") != 0)) { *error_string = "Invalid ip_version type"; return (0); } } if (strcmp(path, "totem.crypto_type") == 0) { if ((strcmp(value, "nss") != 0) && (strcmp(value, "aes256") != 0) && (strcmp(value, "aes192") != 0) && (strcmp(value, "aes128") != 0) && (strcmp(value, "3des") != 0)) { *error_string = "Invalid crypto type"; return (0); } } if (strcmp(path, "totem.crypto_cipher") == 0) { if ((strcmp(value, "none") != 0) && (strcmp(value, "aes256") != 0) && (strcmp(value, "aes192") != 0) && (strcmp(value, "aes128") != 0) && (strcmp(value, "3des") != 0)) { *error_string = "Invalid cipher type"; return (0); } } if (strcmp(path, "totem.crypto_hash") == 0) { if ((strcmp(value, "none") != 0) && (strcmp(value, "md5") != 0) && (strcmp(value, "sha1") != 0) && (strcmp(value, "sha256") != 0) && (strcmp(value, "sha384") != 0) && (strcmp(value, "sha512") != 0)) { *error_string = "Invalid hash type"; return (0); } } break; case MAIN_CP_CB_DATA_STATE_QB: if (strcmp(path, "qb.ipc_type") == 0) { if ((strcmp(value, "native") != 0) && (strcmp(value, "shm") != 0) && (strcmp(value, "socket") != 0)) { *error_string = "Invalid qb ipc_type"; return (0); } } break; case MAIN_CP_CB_DATA_STATE_INTERFACE: if (strcmp(path, "totem.interface.linknumber") == 0) { val_type = ICMAP_VALUETYPE_UINT8; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } data->linknumber = val; add_as_string = 0; } if (strcmp(path, "totem.interface.bindnetaddr") == 0) { data->bindnetaddr = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.mcastaddr") == 0) { data->mcastaddr = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.broadcast") == 0) { data->broadcast = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.mcastport") == 0) { val_type = ICMAP_VALUETYPE_UINT16; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } data->mcastport = val; add_as_string = 0; } if (strcmp(path, "totem.interface.ttl") == 0) { val_type = ICMAP_VALUETYPE_UINT8; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } data->ttl = val; add_as_string = 0; } if (strcmp(path, "totem.interface.knet_link_priority") == 0) { val_type = ICMAP_VALUETYPE_UINT8; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } data->knet_link_priority = val; add_as_string = 0; } if (strcmp(path, "totem.interface.knet_ping_interval") == 0) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } data->knet_ping_interval = val; add_as_string = 0; } if (strcmp(path, "totem.interface.knet_ping_timeout") == 0) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } data->knet_ping_timeout = val; add_as_string = 0; } if (strcmp(path, "totem.interface.knet_ping_precision") == 0) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } data->knet_ping_precision = val; add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS: if (strcmp(key, "subsys") == 0) { data->subsys = strdup(value); if (data->subsys == NULL) { *error_string = "Can't alloc memory"; return (0); } } else { kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } - list_init(&kv_item->list); - list_add(&kv_item->list, &data->logger_subsys_items_head); + qb_list_init(&kv_item->list); + qb_list_add(&kv_item->list, &data->logger_subsys_items_head); } add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON: if (strcmp(key, "subsys") == 0) { data->subsys = strdup(value); if (data->subsys == NULL) { *error_string = "Can't alloc memory"; return (0); } } else if (strcmp(key, "name") == 0) { data->logging_daemon_name = strdup(value); if (data->logging_daemon_name == NULL) { *error_string = "Can't alloc memory"; return (0); } } else { kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } - list_init(&kv_item->list); - list_add(&kv_item->list, &data->logger_subsys_items_head); + qb_list_init(&kv_item->list); + qb_list_add(&kv_item->list, &data->logger_subsys_items_head); } add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_UIDGID: if (strcmp(key, "uid") == 0) { uid = uid_determine(value); if (uid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.uid.%u", uid); icmap_set_uint8_r(config_map, key_name, 1); add_as_string = 0; } else if (strcmp(key, "gid") == 0) { gid = gid_determine(value); if (gid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.gid.%u", gid); icmap_set_uint8_r(config_map, key_name, 1); add_as_string = 0; } else { *error_string = "uidgid: Only uid and gid are allowed items"; return (0); } break; case MAIN_CP_CB_DATA_STATE_MEMBER: if (strcmp(key, "memberaddr") != 0) { *error_string = "Only memberaddr is allowed in member section"; return (0); } kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } - list_init(&kv_item->list); - list_add(&kv_item->list, &data->member_items_head); + qb_list_init(&kv_item->list); + qb_list_add(&kv_item->list, &data->member_items_head); add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_NODELIST: break; case MAIN_CP_CB_DATA_STATE_NODELIST_NODE: snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.%s", data->node_number, key); if ((strcmp(key, "nodeid") == 0) || (strcmp(key, "quorum_votes") == 0)) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } icmap_set_uint32_r(config_map, key_name, val); add_as_string = 0; } if (strcmp(key, "ring0_addr") == 0) { data->ring0_addr_added = 1; } if (add_as_string) { icmap_set_string_r(config_map, key_name, value); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_RESOURCES: if (strcmp(key, "watchdog_timeout") == 0) { val_type = ICMAP_VALUETYPE_UINT32; if (safe_atoq(value, &val, val_type) != 0) { goto atoi_error; } icmap_set_uint32_r(config_map,path, val); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM: case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED: if (strcmp(key, "poll_period") == 0) { if (str_to_ull(value, &ull) != 0) { goto atoi_error; } icmap_set_uint64_r(config_map,path, ull); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS: case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED: if (strcmp(key, "poll_period") == 0) { if (str_to_ull(value, &ull) != 0) { goto atoi_error; } icmap_set_uint64_r(config_map,path, ull); add_as_string = 0; } break; } if (add_as_string) { icmap_set_string_r(config_map, path, value); } break; case PARSER_CB_SECTION_START: if (strcmp(path, "totem.interface") == 0) { *state = MAIN_CP_CB_DATA_STATE_INTERFACE; data->linknumber = 0; data->mcastport = -1; data->ttl = -1; data->knet_link_priority = -1; data->knet_ping_interval = -1; data->knet_ping_timeout = -1; data->knet_ping_precision = -1; - list_init(&data->member_items_head); + qb_list_init(&data->member_items_head); }; if (strcmp(path, "totem") == 0) { *state = MAIN_CP_CB_DATA_STATE_TOTEM; }; if (strcmp(path, "qb") == 0) { *state = MAIN_CP_CB_DATA_STATE_QB; } if (strcmp(path, "logging.logger_subsys") == 0) { *state = MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS; - list_init(&data->logger_subsys_items_head); + qb_list_init(&data->logger_subsys_items_head); data->subsys = NULL; } if (strcmp(path, "logging.logging_daemon") == 0) { *state = MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON; - list_init(&data->logger_subsys_items_head); + qb_list_init(&data->logger_subsys_items_head); data->subsys = NULL; data->logging_daemon_name = NULL; } if (strcmp(path, "uidgid") == 0) { *state = MAIN_CP_CB_DATA_STATE_UIDGID; } if (strcmp(path, "totem.interface.member") == 0) { *state = MAIN_CP_CB_DATA_STATE_MEMBER; } if (strcmp(path, "quorum") == 0) { *state = MAIN_CP_CB_DATA_STATE_QUORUM; } if (strcmp(path, "quorum.device") == 0) { *state = MAIN_CP_CB_DATA_STATE_QDEVICE; } if (strcmp(path, "nodelist") == 0) { *state = MAIN_CP_CB_DATA_STATE_NODELIST; data->node_number = 0; } if (strcmp(path, "nodelist.node") == 0) { *state = MAIN_CP_CB_DATA_STATE_NODELIST_NODE; data->ring0_addr_added = 0; } if (strcmp(path, "resources") == 0) { *state = MAIN_CP_CB_DATA_STATE_RESOURCES; } if (strcmp(path, "resources.system") == 0) { *state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM; } if (strcmp(path, "resources.system.memory_used") == 0) { *state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED; } if (strcmp(path, "resources.process") == 0) { *state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS; } if (strcmp(path, "resources.process.memory_used") == 0) { *state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED; } break; case PARSER_CB_SECTION_END: switch (*state) { case MAIN_CP_CB_DATA_STATE_INTERFACE: /* * Create new interface section */ if (data->bindnetaddr != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", data->linknumber); icmap_set_string_r(config_map, key_name, data->bindnetaddr); free(data->bindnetaddr); data->bindnetaddr = NULL; } if (data->mcastaddr != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", data->linknumber); icmap_set_string_r(config_map, key_name, data->mcastaddr); free(data->mcastaddr); data->mcastaddr = NULL; } if (data->broadcast != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", data->linknumber); icmap_set_string_r(config_map, key_name, data->broadcast); free(data->broadcast); data->broadcast = NULL; } if (data->mcastport > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", data->linknumber); icmap_set_uint16_r(config_map, key_name, data->mcastport); } if (data->ttl > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", data->linknumber); icmap_set_uint8_r(config_map, key_name, data->ttl); } if (data->knet_link_priority > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_link_priority", data->linknumber); icmap_set_uint8_r(config_map, key_name, data->knet_link_priority); } if (data->knet_ping_interval > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_interval", data->linknumber); icmap_set_uint32_r(config_map, key_name, data->knet_ping_interval); } if (data->knet_ping_timeout > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_timeout", data->linknumber); icmap_set_uint32_r(config_map, key_name, data->knet_ping_timeout); } if (data->knet_ping_precision > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_precision", data->linknumber); icmap_set_uint32_r(config_map, key_name, data->knet_ping_precision); } ii = 0; - for (iter = data->member_items_head.next; - iter != &data->member_items_head; iter = iter_next) { - kv_item = list_entry(iter, struct key_value_list_item, list); + + qb_list_for_each(iter, &(data->member_items_head)) { + kv_item = qb_list_entry(iter, struct key_value_list_item, list); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.%u", data->linknumber, ii); icmap_set_string_r(config_map, key_name, kv_item->value); - iter_next = iter->next; - free(kv_item->value); free(kv_item->key); free(kv_item); ii++; } break; case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS: if (data->subsys == NULL) { *error_string = "No subsys key in logger_subsys directive"; return (0); } - for (iter = data->logger_subsys_items_head.next; - iter != &data->logger_subsys_items_head; iter = iter_next) { - kv_item = list_entry(iter, struct key_value_list_item, list); + qb_list_for_each(iter, &(data->logger_subsys_items_head)) { + kv_item = qb_list_entry(iter, struct key_value_list_item, list); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.%s", data->subsys, kv_item->key); - icmap_set_string_r(config_map, key_name, kv_item->value); - - iter_next = iter->next; + icmap_set_string_r(config_map, key_name, kv_item->value); free(kv_item->value); free(kv_item->key); free(kv_item); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys", data->subsys); icmap_set_string_r(config_map, key_name, data->subsys); free(data->subsys); break; case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON: if (data->logging_daemon_name == NULL) { *error_string = "No name key in logging_daemon directive"; return (0); } - for (iter = data->logger_subsys_items_head.next; - iter != &data->logger_subsys_items_head; iter = iter_next) { - kv_item = list_entry(iter, struct key_value_list_item, list); + qb_list_for_each(iter, &(data->logger_subsys_items_head)) { + kv_item = qb_list_entry(iter, struct key_value_list_item, list); if (data->subsys == NULL) { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.%s", kv_item->key); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s", data->logging_daemon_name, kv_item->key); } } else { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.%s", data->subsys, kv_item->key); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.%s", data->logging_daemon_name, data->subsys, kv_item->key); } } icmap_set_string_r(config_map, key_name, kv_item->value); - iter_next = iter->next; - free(kv_item->value); free(kv_item->key); free(kv_item); } if (data->subsys == NULL) { if (strcmp(data->logging_daemon_name, "corosync") != 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.name", data->logging_daemon_name); icmap_set_string_r(config_map, key_name, data->logging_daemon_name); } } else { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys", data->subsys); icmap_set_string_r(config_map, key_name, data->subsys); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.subsys", data->logging_daemon_name, data->subsys); icmap_set_string_r(config_map, key_name, data->subsys); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.name", data->logging_daemon_name, data->subsys); icmap_set_string_r(config_map, key_name, data->logging_daemon_name); } } free(data->subsys); free(data->logging_daemon_name); break; case MAIN_CP_CB_DATA_STATE_NODELIST_NODE: if (!data->ring0_addr_added) { *error_string = "No ring0_addr specified for node"; return (0); } data->node_number++; break; case MAIN_CP_CB_DATA_STATE_NORMAL: case MAIN_CP_CB_DATA_STATE_PLOAD: case MAIN_CP_CB_DATA_STATE_UIDGID: case MAIN_CP_CB_DATA_STATE_MEMBER: case MAIN_CP_CB_DATA_STATE_QUORUM: case MAIN_CP_CB_DATA_STATE_QDEVICE: case MAIN_CP_CB_DATA_STATE_NODELIST: case MAIN_CP_CB_DATA_STATE_TOTEM: case MAIN_CP_CB_DATA_STATE_QB: break; case MAIN_CP_CB_DATA_STATE_RESOURCES: *state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM: *state = MAIN_CP_CB_DATA_STATE_RESOURCES; break; case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED: *state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM; break; case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS: *state = MAIN_CP_CB_DATA_STATE_RESOURCES; break; case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED: *state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS; break; } break; } return (1); atoi_error: min_val = max_val = 0; /* * This is really assert, because developer ether doesn't set val_type correctly or * we've got here after some nasty memory overwrite */ assert(safe_atoq_range(val_type, &min_val, &max_val) == 0); snprintf(formated_err, sizeof(formated_err), "Value of key \"%s\" is expected to be integer in range (%lld..%lld), but \"%s\" was given", key, min_val, max_val, value); *error_string = formated_err; return (0); } static int uidgid_config_parser_cb(const char *path, char *key, char *value, enum main_cp_cb_data_state *state, enum parser_cb_type type, const char **error_string, icmap_map_t config_map, void *user_data) { char key_name[ICMAP_KEYNAME_MAXLEN]; int uid, gid; switch (type) { case PARSER_CB_START: break; case PARSER_CB_END: break; case PARSER_CB_ITEM: if (strcmp(path, "uidgid.uid") == 0) { uid = uid_determine(value); if (uid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.uid.%u", uid); icmap_set_uint8_r(config_map, key_name, 1); } else if (strcmp(path, "uidgid.gid") == 0) { gid = gid_determine(value); if (gid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.gid.%u", gid); icmap_set_uint8_r(config_map, key_name, 1); } else { *error_string = "uidgid: Only uid and gid are allowed items"; return (0); } break; case PARSER_CB_SECTION_START: if (strcmp(path, "uidgid") != 0) { *error_string = "uidgid: Can't add subsection different than uidgid"; return (0); }; break; case PARSER_CB_SECTION_END: break; } return (1); } static int read_uidgid_files_into_icmap( const char **error_string, icmap_map_t config_map) { FILE *fp; const char *dirname; DIR *dp; struct dirent *dirent; struct dirent *entry; char filename[PATH_MAX + FILENAME_MAX + 1]; int res = 0; size_t len; int return_code; struct stat stat_buf; enum main_cp_cb_data_state state = MAIN_CP_CB_DATA_STATE_NORMAL; char key_name[ICMAP_KEYNAME_MAXLEN]; dirname = COROSYSCONFDIR "/uidgid.d"; dp = opendir (dirname); if (dp == NULL) return 0; len = offsetof(struct dirent, d_name) + FILENAME_MAX + 1; entry = malloc(len); if (entry == NULL) { res = 0; goto error_exit; } for (return_code = readdir_r(dp, entry, &dirent); dirent != NULL && return_code == 0; return_code = readdir_r(dp, entry, &dirent)) { snprintf(filename, sizeof (filename), "%s/%s", dirname, dirent->d_name); res = stat (filename, &stat_buf); if (res == 0 && S_ISREG(stat_buf.st_mode)) { fp = fopen (filename, "r"); if (fp == NULL) continue; key_name[0] = 0; res = parse_section(fp, key_name, error_string, 0, state, uidgid_config_parser_cb, config_map, NULL); fclose (fp); if (res != 0) { goto error_exit; } } } error_exit: free (entry); closedir(dp); return res; } /* Read config file and load into icmap */ static int read_config_file_into_icmap( const char **error_string, icmap_map_t config_map) { FILE *fp; const char *filename; char *error_reason = error_string_response; int res; char key_name[ICMAP_KEYNAME_MAXLEN]; struct main_cp_cb_data data; enum main_cp_cb_data_state state = MAIN_CP_CB_DATA_STATE_NORMAL; filename = getenv ("COROSYNC_MAIN_CONFIG_FILE"); if (!filename) filename = COROSYSCONFDIR "/corosync.conf"; fp = fopen (filename, "r"); if (fp == NULL) { char error_str[100]; const char *error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_reason, sizeof(error_string_response), "Can't read file %s reason = (%s)", filename, error_ptr); *error_string = error_reason; return -1; } key_name[0] = 0; res = parse_section(fp, key_name, error_string, 0, state, main_config_parser_cb, config_map, &data); fclose(fp); if (res == 0) { res = read_uidgid_files_into_icmap(error_string, config_map); } if (res == 0) { snprintf (error_reason, sizeof(error_string_response), "Successfully read main configuration file '%s'.", filename); *error_string = error_reason; } return res; } diff --git a/exec/cpg.c b/exec/cpg.c index 5bd830fd..52086881 100644 --- a/exec/cpg.c +++ b/exec/cpg.c @@ -1,2438 +1,2385 @@ /* * Copyright (c) 2006-2015 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield (ccaulfie@redhat.com) * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include + +#include #include #include #include #include -#include #include #include #include #include #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif #include "service.h" LOGSYS_DECLARE_SUBSYS ("CPG"); #define GROUP_HASH_SIZE 32 enum cpg_message_req_types { MESSAGE_REQ_EXEC_CPG_PROCJOIN = 0, MESSAGE_REQ_EXEC_CPG_PROCLEAVE = 1, MESSAGE_REQ_EXEC_CPG_JOINLIST = 2, MESSAGE_REQ_EXEC_CPG_MCAST = 3, MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4, MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5, MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST = 6, }; struct zcb_mapped { - struct list_head list; + struct qb_list_head list; void *addr; size_t size; }; /* * state` exec deliver * match group name, pid -> if matched deliver for YES: * XXX indicates impossible state * * join leave mcast * UNJOINED XXX XXX NO * LEAVE_STARTED XXX YES(unjoined_enter) YES * JOIN_STARTED YES(join_started_enter) XXX NO * JOIN_COMPLETED XXX NO YES * * join_started_enter * set JOIN_COMPLETED * add entry to process_info list * unjoined_enter * set UNJOINED * delete entry from process_info list * * * library accept join error codes * UNJOINED YES(CS_OK) set JOIN_STARTED * LEAVE_STARTED NO(CS_ERR_BUSY) * JOIN_STARTED NO(CS_ERR_EXIST) * JOIN_COMPlETED NO(CS_ERR_EXIST) * * library accept leave error codes * UNJOINED NO(CS_ERR_NOT_EXIST) * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) * JOIN_STARTED NO(CS_ERR_BUSY) * JOIN_COMPLETED YES(CS_OK) set LEAVE_STARTED * * library accept mcast * UNJOINED NO(CS_ERR_NOT_EXIST) * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) * JOIN_STARTED YES(CS_OK) * JOIN_COMPLETED YES(CS_OK) */ enum cpd_state { CPD_STATE_UNJOINED, CPD_STATE_LEAVE_STARTED, CPD_STATE_JOIN_STARTED, CPD_STATE_JOIN_COMPLETED }; enum cpg_sync_state { CPGSYNC_DOWNLIST, CPGSYNC_JOINLIST }; enum cpg_downlist_state_e { CPG_DOWNLIST_NONE, CPG_DOWNLIST_WAITING_FOR_MESSAGES, CPG_DOWNLIST_APPLYING, }; static enum cpg_downlist_state_e downlist_state; -static struct list_head downlist_messages_head; -static struct list_head joinlist_messages_head; +static struct qb_list_head downlist_messages_head; +static struct qb_list_head joinlist_messages_head; struct cpg_pd { void *conn; mar_cpg_name_t group_name; uint32_t pid; enum cpd_state cpd_state; unsigned int flags; int initial_totem_conf_sent; uint64_t transition_counter; /* These two are used when sending fragmented messages */ uint64_t initial_transition_counter; - struct list_head list; - struct list_head iteration_instance_list_head; - struct list_head zcb_mapped_list_head; + struct qb_list_head list; + struct qb_list_head iteration_instance_list_head; + struct qb_list_head zcb_mapped_list_head; }; struct cpg_iteration_instance { hdb_handle_t handle; - struct list_head list; - struct list_head items_list_head; /* List of process_info */ - struct list_head *current_pointer; + struct qb_list_head list; + struct qb_list_head items_list_head; /* List of process_info */ + struct qb_list_head *current_pointer; }; DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL); -DECLARE_LIST_INIT(cpg_pd_list_head); +QB_LIST_DECLARE (cpg_pd_list_head); static unsigned int my_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_member_list_entries; static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_old_member_list_entries = 0; static struct corosync_api_v1 *api = NULL; static enum cpg_sync_state my_sync_state = CPGSYNC_DOWNLIST; static mar_cpg_ring_id_t last_sync_ring_id; struct process_info { unsigned int nodeid; uint32_t pid; mar_cpg_name_t group; - struct list_head list; /* on the group_info members list */ + struct qb_list_head list; /* on the group_info members list */ }; -DECLARE_LIST_INIT(process_info_list_head); +QB_LIST_DECLARE (process_info_list_head); struct join_list_entry { uint32_t pid; mar_cpg_name_t group_name; }; /* * Service Interfaces required by service_message_handler struct */ static char *cpg_exec_init_fn (struct corosync_api_v1 *); static int cpg_lib_init_fn (void *conn); static int cpg_lib_exit_fn (void *conn); static void message_handler_req_exec_cpg_procjoin ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_procleave ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_joinlist ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_mcast ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_partial_mcast ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_downlist_old ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_downlist ( const void *message, unsigned int nodeid); static void exec_cpg_procjoin_endian_convert (void *msg); static void exec_cpg_joinlist_endian_convert (void *msg); static void exec_cpg_mcast_endian_convert (void *msg); static void exec_cpg_partial_mcast_endian_convert (void *msg); static void exec_cpg_downlist_endian_convert_old (void *msg); static void exec_cpg_downlist_endian_convert (void *msg); static void message_handler_req_lib_cpg_join (void *conn, const void *message); static void message_handler_req_lib_cpg_leave (void *conn, const void *message); static void message_handler_req_lib_cpg_finalize (void *conn, const void *message); static void message_handler_req_lib_cpg_mcast (void *conn, const void *message); static void message_handler_req_lib_cpg_partial_mcast (void *conn, const void *message); static void message_handler_req_lib_cpg_membership (void *conn, const void *message); static void message_handler_req_lib_cpg_local_get (void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_initialize ( void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_next ( void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_finalize ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_alloc ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_free ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_execute ( void *conn, const void *message); static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason); static int cpg_exec_send_downlist(void); static int cpg_exec_send_joinlist(void); static void downlist_messages_delete (void); static void downlist_master_choose_and_send (void); static void joinlist_inform_clients (void); static void joinlist_messages_delete (void); static void cpg_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id); static int cpg_sync_process (void); static void cpg_sync_activate (void); static void cpg_sync_abort (void); static void do_proc_join( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason); static void do_proc_leave( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason); static int notify_lib_totem_membership ( void *conn, int member_list_entries, const unsigned int *member_list); static inline int zcb_all_free ( struct cpg_pd *cpd); static char *cpg_print_group_name ( const mar_cpg_name_t *group); /* * Library Handler Definition */ static struct corosync_lib_handler cpg_lib_engine[] = { { /* 0 - MESSAGE_REQ_CPG_JOIN */ .lib_handler_fn = message_handler_req_lib_cpg_join, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 1 - MESSAGE_REQ_CPG_LEAVE */ .lib_handler_fn = message_handler_req_lib_cpg_leave, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 2 - MESSAGE_REQ_CPG_MCAST */ .lib_handler_fn = message_handler_req_lib_cpg_mcast, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 3 - MESSAGE_REQ_CPG_MEMBERSHIP */ .lib_handler_fn = message_handler_req_lib_cpg_membership, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 4 - MESSAGE_REQ_CPG_LOCAL_GET */ .lib_handler_fn = message_handler_req_lib_cpg_local_get, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 5 - MESSAGE_REQ_CPG_ITERATIONINITIALIZE */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_initialize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 - MESSAGE_REQ_CPG_ITERATIONNEXT */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_next, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 - MESSAGE_REQ_CPG_ITERATIONFINALIZE */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_finalize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 8 - MESSAGE_REQ_CPG_FINALIZE */ .lib_handler_fn = message_handler_req_lib_cpg_finalize, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 9 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_alloc, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 10 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_free, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 11 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_execute, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 12 */ .lib_handler_fn = message_handler_req_lib_cpg_partial_mcast, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, }; static struct corosync_exec_handler cpg_exec_engine[] = { { /* 0 - MESSAGE_REQ_EXEC_CPG_PROCJOIN */ .exec_handler_fn = message_handler_req_exec_cpg_procjoin, .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert }, { /* 1 - MESSAGE_REQ_EXEC_CPG_PROCLEAVE */ .exec_handler_fn = message_handler_req_exec_cpg_procleave, .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert }, { /* 2 - MESSAGE_REQ_EXEC_CPG_JOINLIST */ .exec_handler_fn = message_handler_req_exec_cpg_joinlist, .exec_endian_convert_fn = exec_cpg_joinlist_endian_convert }, { /* 3 - MESSAGE_REQ_EXEC_CPG_MCAST */ .exec_handler_fn = message_handler_req_exec_cpg_mcast, .exec_endian_convert_fn = exec_cpg_mcast_endian_convert }, { /* 4 - MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD */ .exec_handler_fn = message_handler_req_exec_cpg_downlist_old, .exec_endian_convert_fn = exec_cpg_downlist_endian_convert_old }, { /* 5 - MESSAGE_REQ_EXEC_CPG_DOWNLIST */ .exec_handler_fn = message_handler_req_exec_cpg_downlist, .exec_endian_convert_fn = exec_cpg_downlist_endian_convert }, { /* 6 - MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST */ .exec_handler_fn = message_handler_req_exec_cpg_partial_mcast, .exec_endian_convert_fn = exec_cpg_partial_mcast_endian_convert }, }; struct corosync_service_engine cpg_service_engine = { .name = "corosync cluster closed process group service v1.01", .id = CPG_SERVICE, .priority = 1, .private_data_size = sizeof (struct cpg_pd), .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = cpg_lib_init_fn, .lib_exit_fn = cpg_lib_exit_fn, .lib_engine = cpg_lib_engine, .lib_engine_count = sizeof (cpg_lib_engine) / sizeof (struct corosync_lib_handler), .exec_init_fn = cpg_exec_init_fn, .exec_dump_fn = NULL, .exec_engine = cpg_exec_engine, .exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler), .sync_init = cpg_sync_init, .sync_process = cpg_sync_process, .sync_activate = cpg_sync_activate, .sync_abort = cpg_sync_abort }; struct corosync_service_engine *cpg_get_service_engine_ver0 (void) { return (&cpg_service_engine); } struct req_exec_cpg_procjoin { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_uint32_t reason __attribute__((aligned(8))); }; struct req_exec_cpg_mcast { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t msglen __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_message_source_t source __attribute__((aligned(8))); mar_uint8_t message[] __attribute__((aligned(8))); }; struct req_exec_cpg_partial_mcast { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t msglen __attribute__((aligned(8))); mar_uint32_t fraglen __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_uint32_t type __attribute__((aligned(8))); mar_message_source_t source __attribute__((aligned(8))); mar_uint8_t message[] __attribute__((aligned(8))); }; struct req_exec_cpg_downlist_old { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); }; struct req_exec_cpg_downlist { struct qb_ipc_request_header header __attribute__((aligned(8))); /* merge decisions */ mar_uint32_t old_members __attribute__((aligned(8))); /* downlist below */ mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); }; struct downlist_msg { mar_uint32_t sender_nodeid; mar_uint32_t old_members __attribute__((aligned(8))); mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); - struct list_head list; + struct qb_list_head list; }; struct joinlist_msg { mar_uint32_t sender_nodeid; uint32_t pid; mar_cpg_name_t group_name; - struct list_head list; + struct qb_list_head list; }; static struct req_exec_cpg_downlist g_req_exec_cpg_downlist; /* * Function print group name. It's not reentrant */ static char *cpg_print_group_name(const mar_cpg_name_t *group) { static char res[CPG_MAX_NAME_LENGTH * 4 + 1]; int dest_pos = 0; char c; int i; for (i = 0; i < group->length; i++) { c = group->value[i]; if (c >= ' ' && c < 0x7f && c != '\\') { res[dest_pos++] = c; } else { if (c == '\\') { res[dest_pos++] = '\\'; res[dest_pos++] = '\\'; } else { snprintf(res + dest_pos, sizeof(res) - dest_pos, "\\x%02X", c); dest_pos += 4; } } } res[dest_pos] = 0; return (res); } static void cpg_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { int entries; int i, j; int found; my_sync_state = CPGSYNC_DOWNLIST; memcpy (my_member_list, member_list, member_list_entries * sizeof (unsigned int)); my_member_list_entries = member_list_entries; last_sync_ring_id.nodeid = ring_id->rep.nodeid; last_sync_ring_id.seq = ring_id->seq; downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES; entries = 0; /* * Determine list of nodeids for downlist message */ for (i = 0; i < my_old_member_list_entries; i++) { found = 0; for (j = 0; j < trans_list_entries; j++) { if (my_old_member_list[i] == trans_list[j]) { found = 1; break; } } if (found == 0) { g_req_exec_cpg_downlist.nodeids[entries++] = my_old_member_list[i]; } } g_req_exec_cpg_downlist.left_nodes = entries; } static int cpg_sync_process (void) { int res = -1; if (my_sync_state == CPGSYNC_DOWNLIST) { res = cpg_exec_send_downlist(); if (res == -1) { return (-1); } my_sync_state = CPGSYNC_JOINLIST; } if (my_sync_state == CPGSYNC_JOINLIST) { res = cpg_exec_send_joinlist(); } return (res); } static void cpg_sync_activate (void) { memcpy (my_old_member_list, my_member_list, my_member_list_entries * sizeof (unsigned int)); my_old_member_list_entries = my_member_list_entries; if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) { downlist_master_choose_and_send (); } joinlist_inform_clients (); downlist_messages_delete (); downlist_state = CPG_DOWNLIST_NONE; joinlist_messages_delete (); notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); } static void cpg_sync_abort (void) { downlist_state = CPG_DOWNLIST_NONE; downlist_messages_delete (); joinlist_messages_delete (); } static int notify_lib_totem_membership ( void *conn, int member_list_entries, const unsigned int *member_list) { - struct list_head *iter; + struct qb_list_head *iter; char *buf; int size; struct res_lib_cpg_totem_confchg_callback *res; size = sizeof(struct res_lib_cpg_totem_confchg_callback) + sizeof(mar_uint32_t) * (member_list_entries); buf = alloca(size); if (!buf) return CS_ERR_LIBRARY; res = (struct res_lib_cpg_totem_confchg_callback *)buf; res->member_list_entries = member_list_entries; res->header.size = size; res->header.id = MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK; res->header.error = CS_OK; memcpy (&res->ring_id, &last_sync_ring_id, sizeof (mar_cpg_ring_id_t)); memcpy (res->member_list, member_list, res->member_list_entries * sizeof (mar_uint32_t)); if (conn == NULL) { - for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { - struct cpg_pd *cpg_pd = list_entry (iter, struct cpg_pd, list); + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpg_pd = qb_list_entry (iter, struct cpg_pd, list); api->ipc_dispatch_send (cpg_pd->conn, buf, size); } } else { api->ipc_dispatch_send (conn, buf, size); } return CS_OK; } static int notify_lib_joinlist( const mar_cpg_name_t *group_name, void *conn, int joined_list_entries, mar_cpg_address_t *joined_list, int left_list_entries, mar_cpg_address_t *left_list, int id) { int size; char *buf; - struct list_head *iter; + struct qb_list_head *iter; int count; struct res_lib_cpg_confchg_callback *res; mar_cpg_address_t *retgi; count = 0; - for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { - struct process_info *pi = list_entry (iter, struct process_info, list); + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, group_name) == 0) { int i; int founded = 0; for (i = 0; i < left_list_entries; i++) { if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { founded++; } } if (!founded) count++; } } size = sizeof(struct res_lib_cpg_confchg_callback) + sizeof(mar_cpg_address_t) * (count + left_list_entries + joined_list_entries); buf = alloca(size); if (!buf) return CS_ERR_LIBRARY; res = (struct res_lib_cpg_confchg_callback *)buf; res->joined_list_entries = joined_list_entries; res->left_list_entries = left_list_entries; res->member_list_entries = count; retgi = res->member_list; res->header.size = size; res->header.id = id; res->header.error = CS_OK; memcpy(&res->group_name, group_name, sizeof(mar_cpg_name_t)); - for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { - struct process_info *pi=list_entry (iter, struct process_info, list); + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi=qb_list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, group_name) == 0) { int i; int founded = 0; for (i = 0;i < left_list_entries; i++) { if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { founded++; } } if (!founded) { retgi->nodeid = pi->nodeid; retgi->pid = pi->pid; retgi++; } } } if (left_list_entries) { memcpy (retgi, left_list, left_list_entries * sizeof(mar_cpg_address_t)); retgi += left_list_entries; } if (joined_list_entries) { memcpy (retgi, joined_list, joined_list_entries * sizeof(mar_cpg_address_t)); retgi += joined_list_entries; } if (conn) { api->ipc_dispatch_send (conn, buf, size); } else { - for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { - struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpd = qb_list_entry (iter, struct cpg_pd, list); if (mar_name_compare (&cpd->group_name, group_name) == 0) { assert (joined_list_entries <= 1); if (joined_list_entries) { if (joined_list[0].pid == cpd->pid && joined_list[0].nodeid == api->totem_nodeid_get()) { cpd->cpd_state = CPD_STATE_JOIN_COMPLETED; } } if (cpd->cpd_state == CPD_STATE_JOIN_COMPLETED || cpd->cpd_state == CPD_STATE_LEAVE_STARTED) { api->ipc_dispatch_send (cpd->conn, buf, size); cpd->transition_counter++; } if (left_list_entries) { if (left_list[0].pid == cpd->pid && left_list[0].nodeid == api->totem_nodeid_get() && left_list[0].reason == CONFCHG_CPG_REASON_LEAVE) { cpd->pid = 0; memset (&cpd->group_name, 0, sizeof(cpd->group_name)); cpd->cpd_state = CPD_STATE_UNJOINED; } } } } } /* * Traverse thru cpds and send totem membership for cpd, where it is not send yet */ - for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { - struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpd = qb_list_entry (iter, struct cpg_pd, list); if ((cpd->flags & CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF) && (cpd->initial_totem_conf_sent == 0)) { cpd->initial_totem_conf_sent = 1; notify_lib_totem_membership (cpd->conn, my_old_member_list_entries, my_old_member_list); } } return CS_OK; } static void downlist_log(const char *msg, struct downlist_msg* dl) { log_printf (LOG_DEBUG, "%s: sender %s; members(old:%d left:%d)", msg, api->totem_ifaces_print(dl->sender_nodeid), dl->old_members, dl->left_nodes); } static struct downlist_msg* downlist_master_choose (void) { struct downlist_msg *cmp; struct downlist_msg *best = NULL; - struct list_head *iter; + struct qb_list_head *iter; uint32_t cmp_members; uint32_t best_members; uint32_t i; int ignore_msg; - for (iter = downlist_messages_head.next; - iter != &downlist_messages_head; - iter = iter->next) { - - cmp = list_entry(iter, struct downlist_msg, list); + qb_list_for_each(iter, &downlist_messages_head) { + cmp = qb_list_entry(iter, struct downlist_msg, list); downlist_log("comparing", cmp); ignore_msg = 0; for (i = 0; i < cmp->left_nodes; i++) { if (cmp->nodeids[i] == api->totem_nodeid_get()) { log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n"); ignore_msg = 1; break; } } if (ignore_msg) { continue ; } if (best == NULL) { best = cmp; continue; } best_members = best->old_members - best->left_nodes; cmp_members = cmp->old_members - cmp->left_nodes; if (cmp_members > best_members) { best = cmp; } else if (cmp_members == best_members) { if (cmp->old_members > best->old_members) { best = cmp; } else if (cmp->old_members == best->old_members) { if (cmp->sender_nodeid < best->sender_nodeid) { best = cmp; } } } } assert (best != NULL); return best; } static void downlist_master_choose_and_send (void) { struct downlist_msg *stored_msg; - struct list_head *iter; + struct qb_list_head *iter; struct process_info *left_pi; qb_map_t *group_map; struct cpg_name cpg_group; mar_cpg_name_t group; struct confchg_data{ struct cpg_name cpg_group; mar_cpg_address_t left_list[CPG_MEMBERS_MAX]; int left_list_entries; - struct list_head list; + struct qb_list_head list; } *pcd; qb_map_iter_t *miter; int i, size; downlist_state = CPG_DOWNLIST_APPLYING; stored_msg = downlist_master_choose (); if (!stored_msg) { log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist"); return; } downlist_log("chosen downlist", stored_msg); group_map = qb_skiplist_create(); /* * only the cpg groups included in left nodes should receive * confchg event, so we will collect these cpg groups and * relative left_lists here. */ - for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { - struct process_info *pi = list_entry(iter, struct process_info, list); - iter = iter->next; + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry(iter, struct process_info, list); left_pi = NULL; for (i = 0; i < stored_msg->left_nodes; i++) { if (pi->nodeid == stored_msg->nodeids[i]) { left_pi = pi; break; } } if (left_pi) { marshall_from_mar_cpg_name_t(&cpg_group, &left_pi->group); cpg_group.value[cpg_group.length] = 0; pcd = (struct confchg_data *)qb_map_get(group_map, cpg_group.value); if (pcd == NULL) { pcd = (struct confchg_data *)calloc(1, sizeof(struct confchg_data)); memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name)); qb_map_put(group_map, pcd->cpg_group.value, pcd); } size = pcd->left_list_entries; pcd->left_list[size].nodeid = left_pi->nodeid; pcd->left_list[size].pid = left_pi->pid; pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN; pcd->left_list_entries++; - list_del (&left_pi->list); + qb_list_del (&left_pi->list); free (left_pi); } } /* send only one confchg event per cpg group */ miter = qb_map_iter_create(group_map); while (qb_map_iter_next(miter, (void **)&pcd)) { marshall_to_mar_cpg_name_t(&group, &pcd->cpg_group); log_printf (LOG_DEBUG, "left_list_entries:%d", pcd->left_list_entries); for (i=0; ileft_list_entries; i++) { log_printf (LOG_DEBUG, "left_list[%d] group:%s, ip:%s, pid:%d", i, cpg_print_group_name(&group), (char*)api->totem_ifaces_print(pcd->left_list[i].nodeid), pcd->left_list[i].pid); } /* send confchg event */ notify_lib_joinlist(&group, NULL, 0, NULL, pcd->left_list_entries, pcd->left_list, MESSAGE_RES_CPG_CONFCHG_CALLBACK); free(pcd); } qb_map_iter_free(miter); qb_map_destroy(group_map); } /* * Remove processes that might have left the group while we were suspended. */ static void joinlist_remove_zombie_pi_entries (void) { - struct list_head *pi_iter; - struct list_head *jl_iter; + struct qb_list_head *pi_iter; + struct qb_list_head *jl_iter; struct process_info *pi; struct joinlist_msg *stored_msg; int found; - for (pi_iter = process_info_list_head.next; pi_iter != &process_info_list_head; ) { - pi = list_entry (pi_iter, struct process_info, list); - pi_iter = pi_iter->next; + qb_list_for_each(pi_iter, &process_info_list_head) { + pi = qb_list_entry (pi_iter, struct process_info, list); /* * Ignore local node */ if (pi->nodeid == api->totem_nodeid_get()) { continue ; } /* * Try to find message in joinlist messages */ found = 0; - for (jl_iter = joinlist_messages_head.next; - jl_iter != &joinlist_messages_head; - jl_iter = jl_iter->next) { - - stored_msg = list_entry(jl_iter, struct joinlist_msg, list); + qb_list_for_each(jl_iter, &joinlist_messages_head) { + stored_msg = qb_list_entry(jl_iter, struct joinlist_msg, list); if (stored_msg->sender_nodeid == api->totem_nodeid_get()) { continue ; } if (pi->nodeid == stored_msg->sender_nodeid && pi->pid == stored_msg->pid && mar_name_compare (&pi->group, &stored_msg->group_name) == 0) { found = 1; break ; } } if (!found) { do_proc_leave(&pi->group, pi->pid, pi->nodeid, CONFCHG_CPG_REASON_PROCDOWN); } } } static void joinlist_inform_clients (void) { struct joinlist_msg *stored_msg; - struct list_head *iter; + struct qb_list_head *iter; unsigned int i; i = 0; - for (iter = joinlist_messages_head.next; - iter != &joinlist_messages_head; - iter = iter->next) { - - stored_msg = list_entry(iter, struct joinlist_msg, list); + qb_list_for_each(iter, &joinlist_messages_head) { + stored_msg = qb_list_entry(iter, struct joinlist_msg, list); log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%s, ip:%s, pid:%d", i++, cpg_print_group_name(&stored_msg->group_name), (char*)api->totem_ifaces_print(stored_msg->sender_nodeid), stored_msg->pid); /* Ignore our own messages */ if (stored_msg->sender_nodeid == api->totem_nodeid_get()) { continue ; } do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid, CONFCHG_CPG_REASON_NODEUP); } joinlist_remove_zombie_pi_entries (); } static void downlist_messages_delete (void) { struct downlist_msg *stored_msg; - struct list_head *iter, *iter_next; - - for (iter = downlist_messages_head.next; - iter != &downlist_messages_head; - iter = iter_next) { - - iter_next = iter->next; + struct qb_list_head *iter; - stored_msg = list_entry(iter, struct downlist_msg, list); - list_del (&stored_msg->list); + qb_list_for_each(iter, &downlist_messages_head) { + stored_msg = qb_list_entry(iter, struct downlist_msg, list); + qb_list_del (&stored_msg->list); free (stored_msg); } } static void joinlist_messages_delete (void) { struct joinlist_msg *stored_msg; - struct list_head *iter, *iter_next; + struct qb_list_head *iter; - for (iter = joinlist_messages_head.next; - iter != &joinlist_messages_head; - iter = iter_next) { - - iter_next = iter->next; - - stored_msg = list_entry(iter, struct joinlist_msg, list); - list_del (&stored_msg->list); + qb_list_for_each(iter, &joinlist_messages_head) { + stored_msg = qb_list_entry(iter, struct joinlist_msg, list); + qb_list_del (&stored_msg->list); free (stored_msg); } - list_init (&joinlist_messages_head); + qb_list_init (&joinlist_messages_head); } static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) { - list_init (&downlist_messages_head); - list_init (&joinlist_messages_head); + qb_list_init (&downlist_messages_head); + qb_list_init (&joinlist_messages_head); api = corosync_api; return (NULL); } static void cpg_iteration_instance_finalize (struct cpg_iteration_instance *cpg_iteration_instance) { - struct list_head *iter, *iter_next; + struct qb_list_head *iter; struct process_info *pi; - for (iter = cpg_iteration_instance->items_list_head.next; - iter != &cpg_iteration_instance->items_list_head; - iter = iter_next) { - - iter_next = iter->next; - - pi = list_entry (iter, struct process_info, list); - list_del (&pi->list); + qb_list_for_each(iter, &(cpg_iteration_instance->items_list_head)) { + pi = qb_list_entry (iter, struct process_info, list); + qb_list_del (&pi->list); free (pi); } - list_del (&cpg_iteration_instance->list); + qb_list_del (&cpg_iteration_instance->list); hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); } static void cpg_pd_finalize (struct cpg_pd *cpd) { - struct list_head *iter, *iter_next; + struct qb_list_head *iter; struct cpg_iteration_instance *cpii; - zcb_all_free(cpd); - for (iter = cpd->iteration_instance_list_head.next; - iter != &cpd->iteration_instance_list_head; - iter = iter_next) { - - iter_next = iter->next; - - cpii = list_entry (iter, struct cpg_iteration_instance, list); + zcb_all_free(cpd); + qb_list_for_each(iter, &(cpd->iteration_instance_list_head)) { + cpii = qb_list_entry (iter, struct cpg_iteration_instance, list); cpg_iteration_instance_finalize (cpii); } - list_del (&cpd->list); + qb_list_del (&cpd->list); } static int cpg_lib_exit_fn (void *conn) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p", conn); if (cpd->group_name.length > 0 && cpd->cpd_state != CPD_STATE_LEAVE_STARTED) { cpg_node_joinleave_send (cpd->pid, &cpd->group_name, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN); } cpg_pd_finalize (cpd); api->ipc_refcnt_dec (conn); return (0); } static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason) { struct req_exec_cpg_procjoin req_exec_cpg_procjoin; struct iovec req_exec_cpg_iovec; int result; memcpy(&req_exec_cpg_procjoin.group_name, group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_procjoin.pid = pid; req_exec_cpg_procjoin.reason = reason; req_exec_cpg_procjoin.header.size = sizeof(req_exec_cpg_procjoin); req_exec_cpg_procjoin.header.id = SERVICE_ID_MAKE(CPG_SERVICE, fn); req_exec_cpg_iovec.iov_base = (char *)&req_exec_cpg_procjoin; req_exec_cpg_iovec.iov_len = sizeof(req_exec_cpg_procjoin); result = api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED); return (result); } /* Can byteswap join & leave messages */ static void exec_cpg_procjoin_endian_convert (void *msg) { struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = msg; req_exec_cpg_procjoin->pid = swab32(req_exec_cpg_procjoin->pid); swab_mar_cpg_name_t (&req_exec_cpg_procjoin->group_name); req_exec_cpg_procjoin->reason = swab32(req_exec_cpg_procjoin->reason); } static void exec_cpg_joinlist_endian_convert (void *msg_v) { char *msg = msg_v; struct qb_ipc_response_header *res = (struct qb_ipc_response_header *)msg; struct join_list_entry *jle = (struct join_list_entry *)(msg + sizeof(struct qb_ipc_response_header)); swab_mar_int32_t (&res->size); while ((const char*)jle < msg + res->size) { jle->pid = swab32(jle->pid); swab_mar_cpg_name_t (&jle->group_name); jle++; } } static void exec_cpg_downlist_endian_convert_old (void *msg) { } static void exec_cpg_downlist_endian_convert (void *msg) { struct req_exec_cpg_downlist *req_exec_cpg_downlist = msg; unsigned int i; req_exec_cpg_downlist->left_nodes = swab32(req_exec_cpg_downlist->left_nodes); req_exec_cpg_downlist->old_members = swab32(req_exec_cpg_downlist->old_members); for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) { req_exec_cpg_downlist->nodeids[i] = swab32(req_exec_cpg_downlist->nodeids[i]); } } static void exec_cpg_mcast_endian_convert (void *msg) { struct req_exec_cpg_mcast *req_exec_cpg_mcast = msg; swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); swab_mar_message_source_t (&req_exec_cpg_mcast->source); } static void exec_cpg_partial_mcast_endian_convert (void *msg) { struct req_exec_cpg_partial_mcast *req_exec_cpg_mcast = msg; swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); req_exec_cpg_mcast->fraglen = swab32(req_exec_cpg_mcast->fraglen); req_exec_cpg_mcast->type = swab32(req_exec_cpg_mcast->type); swab_mar_message_source_t (&req_exec_cpg_mcast->source); } static struct process_info *process_info_find(const mar_cpg_name_t *group_name, uint32_t pid, unsigned int nodeid) { - struct list_head *iter; - - for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { - struct process_info *pi = list_entry (iter, struct process_info, list); - iter = iter->next; + struct qb_list_head *iter; + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); if (pi->pid == pid && pi->nodeid == nodeid && mar_name_compare (&pi->group, group_name) == 0) { return pi; } } return NULL; } static void do_proc_join( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason) { struct process_info *pi; struct process_info *pi_entry; mar_cpg_address_t notify_info; - struct list_head *list; - struct list_head *list_to_add = NULL; + struct qb_list_head *list; + struct qb_list_head *list_to_add = NULL; if (process_info_find (name, pid, nodeid) != NULL) { return ; } pi = malloc (sizeof (struct process_info)); if (!pi) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); return; } pi->nodeid = nodeid; pi->pid = pid; memcpy(&pi->group, name, sizeof(*name)); - list_init(&pi->list); + qb_list_init(&pi->list); /* * Insert new process in sorted order so synchronization works properly */ list_to_add = &process_info_list_head; - for (list = process_info_list_head.next; list != &process_info_list_head; list = list->next) { - - pi_entry = list_entry(list, struct process_info, list); + qb_list_for_each(list, &process_info_list_head) { + pi_entry = qb_list_entry(list, struct process_info, list); if (pi_entry->nodeid > pi->nodeid || (pi_entry->nodeid == pi->nodeid && pi_entry->pid > pi->pid)) { break; } list_to_add = list; } - list_add (&pi->list, list_to_add); + qb_list_add (&pi->list, list_to_add); notify_info.pid = pi->pid; notify_info.nodeid = nodeid; notify_info.reason = reason; notify_lib_joinlist(&pi->group, NULL, 1, ¬ify_info, 0, NULL, MESSAGE_RES_CPG_CONFCHG_CALLBACK); } static void do_proc_leave( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason) { struct process_info *pi; - struct list_head *iter; + struct qb_list_head *iter; mar_cpg_address_t notify_info; notify_info.pid = pid; notify_info.nodeid = nodeid; notify_info.reason = reason; notify_lib_joinlist(name, NULL, 0, NULL, 1, ¬ify_info, MESSAGE_RES_CPG_CONFCHG_CALLBACK); - for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { - pi = list_entry(iter, struct process_info, list); + qb_list_for_each(iter, &process_info_list_head) { + pi = qb_list_entry(iter, struct process_info, list); iter = iter->next; if (pi->pid == pid && pi->nodeid == nodeid && mar_name_compare (&pi->group, name)==0) { - list_del (&pi->list); + qb_list_del (&pi->list); free (pi); } } } static void message_handler_req_exec_cpg_downlist_old ( const void *message, unsigned int nodeid) { log_printf (LOGSYS_LEVEL_WARNING, "downlist OLD from node 0x%x", nodeid); } static void message_handler_req_exec_cpg_downlist( const void *message, unsigned int nodeid) { const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message; int i; - struct list_head *iter; + struct qb_list_head *iter; struct downlist_msg *stored_msg; int found; if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) { log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d", req_exec_cpg_downlist->left_nodes, downlist_state); return; } stored_msg = malloc (sizeof (struct downlist_msg)); stored_msg->sender_nodeid = nodeid; stored_msg->old_members = req_exec_cpg_downlist->old_members; stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes; memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids, req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t)); - list_init (&stored_msg->list); - list_add (&stored_msg->list, &downlist_messages_head); + qb_list_init (&stored_msg->list); + qb_list_add (&stored_msg->list, &downlist_messages_head); for (i = 0; i < my_member_list_entries; i++) { found = 0; - for (iter = downlist_messages_head.next; - iter != &downlist_messages_head; - iter = iter->next) { + qb_list_for_each(iter, &downlist_messages_head) { - stored_msg = list_entry(iter, struct downlist_msg, list); + stored_msg = qb_list_entry(iter, struct downlist_msg, list); if (my_member_list[i] == stored_msg->sender_nodeid) { found = 1; } } if (!found) { return; } } downlist_master_choose_and_send (); } static void message_handler_req_exec_cpg_procjoin ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; log_printf(LOGSYS_LEVEL_DEBUG, "got procjoin message from cluster node 0x%x (%s) for pid %u", nodeid, api->totem_ifaces_print(nodeid), (unsigned int)req_exec_cpg_procjoin->pid); do_proc_join (&req_exec_cpg_procjoin->group_name, req_exec_cpg_procjoin->pid, nodeid, CONFCHG_CPG_REASON_JOIN); } static void message_handler_req_exec_cpg_procleave ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; log_printf(LOGSYS_LEVEL_DEBUG, "got procleave message from cluster node 0x%x (%s) for pid %u", nodeid, api->totem_ifaces_print(nodeid), (unsigned int)req_exec_cpg_procjoin->pid); do_proc_leave (&req_exec_cpg_procjoin->group_name, req_exec_cpg_procjoin->pid, nodeid, req_exec_cpg_procjoin->reason); } /* Got a proclist from another node */ static void message_handler_req_exec_cpg_joinlist ( const void *message_v, unsigned int nodeid) { const char *message = message_v; const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message; const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header)); struct joinlist_msg *stored_msg; log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node 0x%x", nodeid); while ((const char*)jle < message + res->size) { stored_msg = malloc (sizeof (struct joinlist_msg)); memset(stored_msg, 0, sizeof (struct joinlist_msg)); stored_msg->sender_nodeid = nodeid; stored_msg->pid = jle->pid; memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t)); - list_init (&stored_msg->list); - list_add (&stored_msg->list, &joinlist_messages_head); + qb_list_init (&stored_msg->list); + qb_list_add (&stored_msg->list, &joinlist_messages_head); jle++; } } static void message_handler_req_exec_cpg_mcast ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_mcast *req_exec_cpg_mcast = message; struct res_lib_cpg_deliver_callback res_lib_cpg_mcast; int msglen = req_exec_cpg_mcast->msglen; - struct list_head *iter, *pi_iter; + struct qb_list_head *iter, *pi_iter; struct cpg_pd *cpd; struct iovec iovec[2]; int known_node = 0; res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_DELIVER_CALLBACK; res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; res_lib_cpg_mcast.msglen = msglen; res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; res_lib_cpg_mcast.nodeid = nodeid; memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, sizeof(mar_cpg_name_t)); iovec[0].iov_base = (void *)&res_lib_cpg_mcast; iovec[0].iov_len = sizeof (res_lib_cpg_mcast); iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); iovec[1].iov_len = msglen; - for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) { - cpd = list_entry(iter, struct cpg_pd, list); - iter = iter->next; - + qb_list_for_each(iter, &cpg_pd_list_head) { + cpd = qb_list_entry(iter, struct cpg_pd, list); if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { if (!known_node) { /* Try to find, if we know the node */ - for (pi_iter = process_info_list_head.next; - pi_iter != &process_info_list_head; pi_iter = pi_iter->next) { - - struct process_info *pi = list_entry (pi_iter, struct process_info, list); + qb_list_for_each(pi_iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (pi_iter, struct process_info, list); if (pi->nodeid == nodeid && mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { known_node = 1; break; } } } if (!known_node) { log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); return ; } api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); } } } static void message_handler_req_exec_cpg_partial_mcast ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_partial_mcast *req_exec_cpg_mcast = message; struct res_lib_cpg_partial_deliver_callback res_lib_cpg_mcast; int msglen = req_exec_cpg_mcast->fraglen; - struct list_head *iter, *pi_iter; + struct qb_list_head *iter, *pi_iter; struct cpg_pd *cpd; struct iovec iovec[2]; int known_node = 0; log_printf(LOGSYS_LEVEL_DEBUG, "Got fragmented message from node %d, size = %d bytes\n", nodeid, msglen); res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_PARTIAL_DELIVER_CALLBACK; res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; res_lib_cpg_mcast.fraglen = msglen; res_lib_cpg_mcast.msglen = req_exec_cpg_mcast->msglen; res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; res_lib_cpg_mcast.type = req_exec_cpg_mcast->type; res_lib_cpg_mcast.nodeid = nodeid; memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, sizeof(mar_cpg_name_t)); iovec[0].iov_base = (void *)&res_lib_cpg_mcast; iovec[0].iov_len = sizeof (res_lib_cpg_mcast); iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); iovec[1].iov_len = msglen; - for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) { - cpd = list_entry(iter, struct cpg_pd, list); - iter = iter->next; - + qb_list_for_each(iter, &cpg_pd_list_head) { + cpd = qb_list_entry(iter, struct cpg_pd, list); if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { if (!known_node) { /* Try to find, if we know the node */ - for (pi_iter = process_info_list_head.next; - pi_iter != &process_info_list_head; pi_iter = pi_iter->next) { - - struct process_info *pi = list_entry (pi_iter, struct process_info, list); + qb_list_for_each(pi_iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (pi_iter, struct process_info, list); if (pi->nodeid == nodeid && mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { known_node = 1; break; } } } if (!known_node) { log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); return ; } api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); } } } static int cpg_exec_send_downlist(void) { struct iovec iov; g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST); g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist); g_req_exec_cpg_downlist.old_members = my_old_member_list_entries; iov.iov_base = (void *)&g_req_exec_cpg_downlist; iov.iov_len = g_req_exec_cpg_downlist.header.size; return (api->totem_mcast (&iov, 1, TOTEM_AGREED)); } static int cpg_exec_send_joinlist(void) { int count = 0; - struct list_head *iter; + struct qb_list_head *iter; struct qb_ipc_response_header *res; char *buf; struct join_list_entry *jle; struct iovec req_exec_cpg_iovec; - for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { - struct process_info *pi = list_entry (iter, struct process_info, list); + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get ()) { count++; } } /* Nothing to send */ if (!count) return 0; buf = alloca(sizeof(struct qb_ipc_response_header) + sizeof(struct join_list_entry) * count); if (!buf) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate joinlist buffer"); return -1; } jle = (struct join_list_entry *)(buf + sizeof(struct qb_ipc_response_header)); res = (struct qb_ipc_response_header *)buf; - for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { - struct process_info *pi = list_entry (iter, struct process_info, list); + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get ()) { memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t)); jle->pid = pi->pid; jle++; } } res->id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_JOINLIST); res->size = sizeof(struct qb_ipc_response_header)+sizeof(struct join_list_entry) * count; req_exec_cpg_iovec.iov_base = buf; req_exec_cpg_iovec.iov_len = res->size; return (api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED)); } static int cpg_lib_init_fn (void *conn) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); memset (cpd, 0, sizeof(struct cpg_pd)); cpd->conn = conn; - list_add (&cpd->list, &cpg_pd_list_head); + qb_list_add (&cpd->list, &cpg_pd_list_head); - list_init (&cpd->iteration_instance_list_head); - list_init (&cpd->zcb_mapped_list_head); + qb_list_init (&cpd->iteration_instance_list_head); + qb_list_init (&cpd->zcb_mapped_list_head); api->ipc_refcnt_inc (conn); log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p, cpd=%p", conn, cpd); return (0); } /* Join message from the library */ static void message_handler_req_lib_cpg_join (void *conn, const void *message) { const struct req_lib_cpg_join *req_lib_cpg_join = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct res_lib_cpg_join res_lib_cpg_join; cs_error_t error = CS_OK; - struct list_head *iter; + struct qb_list_head *iter; /* Test, if we don't have same pid and group name joined */ - for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { - struct cpg_pd *cpd_item = list_entry (iter, struct cpg_pd, list); + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpd_item = qb_list_entry (iter, struct cpg_pd, list); if (cpd_item->pid == req_lib_cpg_join->pid && mar_name_compare(&req_lib_cpg_join->group_name, &cpd_item->group_name) == 0) { /* We have same pid and group name joined -> return error */ error = CS_ERR_EXIST; goto response_send; } } /* * Same check must be done in process info list, because there may be not yet delivered * leave of client. */ - for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { - struct process_info *pi = list_entry (iter, struct process_info, list); + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get () && pi->pid == req_lib_cpg_join->pid && mar_name_compare(&req_lib_cpg_join->group_name, &pi->group) == 0) { /* We have same pid and group name joined -> return error */ error = CS_ERR_TRY_AGAIN; goto response_send; } } if (req_lib_cpg_join->group_name.length > CPG_MAX_NAME_LENGTH) { error = CS_ERR_NAME_TOO_LONG; goto response_send; } switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_OK; cpd->cpd_state = CPD_STATE_JOIN_STARTED; cpd->pid = req_lib_cpg_join->pid; cpd->flags = req_lib_cpg_join->flags; memcpy (&cpd->group_name, &req_lib_cpg_join->group_name, sizeof (cpd->group_name)); cpg_node_joinleave_send (req_lib_cpg_join->pid, &req_lib_cpg_join->group_name, MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN); break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_BUSY; break; case CPD_STATE_JOIN_STARTED: error = CS_ERR_EXIST; break; case CPD_STATE_JOIN_COMPLETED: error = CS_ERR_EXIST; break; } response_send: res_lib_cpg_join.header.size = sizeof(res_lib_cpg_join); res_lib_cpg_join.header.id = MESSAGE_RES_CPG_JOIN; res_lib_cpg_join.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_join, sizeof(res_lib_cpg_join)); } /* Leave message from the library */ static void message_handler_req_lib_cpg_leave (void *conn, const void *message) { struct res_lib_cpg_leave res_lib_cpg_leave; cs_error_t error = CS_OK; struct req_lib_cpg_leave *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p", conn); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_ERR_BUSY; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; cpd->cpd_state = CPD_STATE_LEAVE_STARTED; cpg_node_joinleave_send (req_lib_cpg_leave->pid, &req_lib_cpg_leave->group_name, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_LEAVE); break; } /* send return */ res_lib_cpg_leave.header.size = sizeof(res_lib_cpg_leave); res_lib_cpg_leave.header.id = MESSAGE_RES_CPG_LEAVE; res_lib_cpg_leave.header.error = error; api->ipc_response_send(conn, &res_lib_cpg_leave, sizeof(res_lib_cpg_leave)); } /* Finalize message from library */ static void message_handler_req_lib_cpg_finalize ( void *conn, const void *message) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct res_lib_cpg_finalize res_lib_cpg_finalize; cs_error_t error = CS_OK; log_printf (LOGSYS_LEVEL_DEBUG, "cpg finalize for conn=%p", conn); /* * We will just remove cpd from list. After this call, connection will be * closed on lib side, and cpg_lib_exit_fn will be called */ - list_del (&cpd->list); - list_init (&cpd->list); + qb_list_del (&cpd->list); + qb_list_init (&cpd->list); res_lib_cpg_finalize.header.size = sizeof (res_lib_cpg_finalize); res_lib_cpg_finalize.header.id = MESSAGE_RES_CPG_FINALIZE; res_lib_cpg_finalize.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_finalize, sizeof (res_lib_cpg_finalize)); } static int memory_map ( const char *path, size_t bytes, void **buf) { int32_t fd; void *addr; int32_t res; fd = open (path, O_RDWR, 0600); unlink (path); if (fd == -1) { return (-1); } res = ftruncate (fd, bytes); if (res == -1) { goto error_close_unlink; } addr = mmap (NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { goto error_close_unlink; } #ifdef MADV_NOSYNC madvise(addr, bytes, MADV_NOSYNC); #endif res = close (fd); if (res) { munmap (addr, bytes); return (-1); } *buf = addr; return (0); error_close_unlink: close (fd); unlink(path); return -1; } static inline int zcb_alloc ( struct cpg_pd *cpd, const char *path_to_file, size_t size, void **addr) { struct zcb_mapped *zcb_mapped; unsigned int res; zcb_mapped = malloc (sizeof (struct zcb_mapped)); if (zcb_mapped == NULL) { return (-1); } res = memory_map ( path_to_file, size, addr); if (res == -1) { free (zcb_mapped); return (-1); } - list_init (&zcb_mapped->list); + qb_list_init (&zcb_mapped->list); zcb_mapped->addr = *addr; zcb_mapped->size = size; - list_add_tail (&zcb_mapped->list, &cpd->zcb_mapped_list_head); + qb_list_add_tail (&zcb_mapped->list, &cpd->zcb_mapped_list_head); return (0); } static inline int zcb_free (struct zcb_mapped *zcb_mapped) { unsigned int res; res = munmap (zcb_mapped->addr, zcb_mapped->size); - list_del (&zcb_mapped->list); + qb_list_del (&zcb_mapped->list); free (zcb_mapped); return (res); } static inline int zcb_by_addr_free (struct cpg_pd *cpd, void *addr) { - struct list_head *list; + struct qb_list_head *list; struct zcb_mapped *zcb_mapped; unsigned int res = 0; - for (list = cpd->zcb_mapped_list_head.next; - list != &cpd->zcb_mapped_list_head; list = list->next) { - - zcb_mapped = list_entry (list, struct zcb_mapped, list); + qb_list_for_each(list, &(cpd->zcb_mapped_list_head)) { + zcb_mapped = qb_list_entry (list, struct zcb_mapped, list); if (zcb_mapped->addr == addr) { res = zcb_free (zcb_mapped); break; } } return (res); } static inline int zcb_all_free ( struct cpg_pd *cpd) { - struct list_head *list; + struct qb_list_head *list; struct zcb_mapped *zcb_mapped; - for (list = cpd->zcb_mapped_list_head.next; - list != &cpd->zcb_mapped_list_head;) { - - zcb_mapped = list_entry (list, struct zcb_mapped, list); + qb_list_for_each(list, &(cpd->zcb_mapped_list_head)) { + zcb_mapped = qb_list_entry (list, struct zcb_mapped, list); list = list->next; zcb_free (zcb_mapped); } return (0); } union u { uint64_t server_addr; void *server_ptr; }; static uint64_t void2serveraddr (void *server_ptr) { union u u; u.server_ptr = server_ptr; return (u.server_addr); } static void *serveraddr2void (uint64_t server_addr) { union u u; u.server_addr = server_addr; return (u.server_ptr); }; static void message_handler_req_lib_cpg_zc_alloc ( void *conn, const void *message) { mar_req_coroipcc_zc_alloc_t *hdr = (mar_req_coroipcc_zc_alloc_t *)message; struct qb_ipc_response_header res_header; void *addr = NULL; struct coroipcs_zc_header *zc_header; unsigned int res; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "path: %s", hdr->path_to_file); res = zcb_alloc (cpd, hdr->path_to_file, hdr->map_size, &addr); assert(res == 0); zc_header = (struct coroipcs_zc_header *)addr; zc_header->server_address = void2serveraddr(addr); res_header.size = sizeof (struct qb_ipc_response_header); res_header.id = 0; api->ipc_response_send (conn, &res_header, res_header.size); } static void message_handler_req_lib_cpg_zc_free ( void *conn, const void *message) { mar_req_coroipcc_zc_free_t *hdr = (mar_req_coroipcc_zc_free_t *)message; struct qb_ipc_response_header res_header; void *addr = NULL; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, " free'ing"); addr = serveraddr2void (hdr->server_address); zcb_by_addr_free (cpd, addr); res_header.size = sizeof (struct qb_ipc_response_header); res_header.id = 0; api->ipc_response_send ( conn, &res_header, res_header.size); } /* Fragmented mcast message from the library */ static void message_handler_req_lib_cpg_partial_mcast (void *conn, const void *message) { const struct req_lib_cpg_partial_mcast *req_lib_cpg_mcast = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); mar_cpg_name_t group_name = cpd->group_name; struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_partial_mcast req_exec_cpg_mcast; struct res_lib_cpg_partial_send res_lib_cpg_partial_send; int msglen = req_lib_cpg_mcast->fraglen; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_TRACE, "got fragmented mcast request on %p", conn); log_printf(LOGSYS_LEVEL_DEBUG, "Sending fragmented message size = %d bytes\n", msglen); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } res_lib_cpg_partial_send.header.size = sizeof(res_lib_cpg_partial_send); res_lib_cpg_partial_send.header.id = MESSAGE_RES_CPG_PARTIAL_SEND; if (req_lib_cpg_mcast->type == LIBCPG_PARTIAL_FIRST) { cpd->initial_transition_counter = cpd->transition_counter; } if (cpd->transition_counter != cpd->initial_transition_counter) { error = CS_ERR_INTERRUPT; } if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; req_exec_cpg_mcast.type = req_lib_cpg_mcast->type; req_exec_cpg_mcast.fraglen = req_lib_cpg_mcast->fraglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; req_exec_cpg_iovec[1].iov_len = msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); assert(result == 0); } else { log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d", conn, group_name.value, cpd->cpd_state, error); } res_lib_cpg_partial_send.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_partial_send, sizeof (res_lib_cpg_partial_send)); } /* Mcast message from the library */ static void message_handler_req_lib_cpg_mcast (void *conn, const void *message) { const struct req_lib_cpg_mcast *req_lib_cpg_mcast = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); mar_cpg_name_t group_name = cpd->group_name; struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_mcast req_exec_cpg_mcast; int msglen = req_lib_cpg_mcast->msglen; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_TRACE, "got mcast request on %p", conn); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = msglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; req_exec_cpg_iovec[1].iov_len = msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); assert(result == 0); } else { log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d", conn, group_name.value, cpd->cpd_state, error); } } static void message_handler_req_lib_cpg_zc_execute ( void *conn, const void *message) { mar_req_coroipcc_zc_execute_t *hdr = (mar_req_coroipcc_zc_execute_t *)message; struct qb_ipc_request_header *header; struct res_lib_cpg_mcast res_lib_cpg_mcast; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_mcast req_exec_cpg_mcast; struct req_lib_cpg_mcast *req_lib_cpg_mcast; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_TRACE, "got ZC mcast request on %p", conn); header = (struct qb_ipc_request_header *)(((char *)serveraddr2void(hdr->server_address) + sizeof (struct coroipcs_zc_header))); req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)header; switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast); res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_MCAST; if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + req_lib_cpg_mcast->msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &cpd->group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)header + sizeof(struct req_lib_cpg_mcast); req_exec_cpg_iovec[1].iov_len = req_exec_cpg_mcast.msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); if (result == 0) { res_lib_cpg_mcast.header.error = CS_OK; } else { res_lib_cpg_mcast.header.error = CS_ERR_TRY_AGAIN; } } else { res_lib_cpg_mcast.header.error = error; } api->ipc_response_send (conn, &res_lib_cpg_mcast, sizeof (res_lib_cpg_mcast)); } static void message_handler_req_lib_cpg_membership (void *conn, const void *message) { struct req_lib_cpg_membership_get *req_lib_cpg_membership_get = (struct req_lib_cpg_membership_get *)message; struct res_lib_cpg_membership_get res_lib_cpg_membership_get; - struct list_head *iter; + struct qb_list_head *iter; int member_count = 0; res_lib_cpg_membership_get.header.id = MESSAGE_RES_CPG_MEMBERSHIP; res_lib_cpg_membership_get.header.error = CS_OK; res_lib_cpg_membership_get.header.size = sizeof (struct res_lib_cpg_membership_get); - for (iter = process_info_list_head.next; - iter != &process_info_list_head; iter = iter->next) { - - struct process_info *pi = list_entry (iter, struct process_info, list); + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, &req_lib_cpg_membership_get->group_name) == 0) { res_lib_cpg_membership_get.member_list[member_count].nodeid = pi->nodeid; res_lib_cpg_membership_get.member_list[member_count].pid = pi->pid; member_count += 1; } } res_lib_cpg_membership_get.member_count = member_count; api->ipc_response_send (conn, &res_lib_cpg_membership_get, sizeof (res_lib_cpg_membership_get)); } static void message_handler_req_lib_cpg_local_get (void *conn, const void *message) { struct res_lib_cpg_local_get res_lib_cpg_local_get; res_lib_cpg_local_get.header.size = sizeof (res_lib_cpg_local_get); res_lib_cpg_local_get.header.id = MESSAGE_RES_CPG_LOCAL_GET; res_lib_cpg_local_get.header.error = CS_OK; res_lib_cpg_local_get.local_nodeid = api->totem_nodeid_get (); api->ipc_response_send (conn, &res_lib_cpg_local_get, sizeof (res_lib_cpg_local_get)); } static void message_handler_req_lib_cpg_iteration_initialize ( void *conn, const void *message) { const struct req_lib_cpg_iterationinitialize *req_lib_cpg_iterationinitialize = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); hdb_handle_t cpg_iteration_handle = 0; struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize; - struct list_head *iter, *iter2; + struct qb_list_head *iter, *iter2; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration initialize"); /* Because between calling this function and *next can be some operations which will * change list, we must do full copy. */ /* * Create new iteration instance */ res = hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance), &cpg_iteration_handle); if (res != 0) { error = CS_ERR_NO_MEMORY; goto response_send; } res = hdb_handle_get (&cpg_iteration_handle_t_db, cpg_iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_BAD_HANDLE; goto error_destroy; } - list_init (&cpg_iteration_instance->items_list_head); + qb_list_init (&cpg_iteration_instance->items_list_head); cpg_iteration_instance->handle = cpg_iteration_handle; /* * Create copy of process_info list "grouped by" group name */ - for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { - struct process_info *pi = list_entry (iter, struct process_info, list); + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); struct process_info *new_pi; if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { /* * Try to find processed group name in our list new list */ int found = 0; - for (iter2 = cpg_iteration_instance->items_list_head.next; - iter2 != &cpg_iteration_instance->items_list_head; - iter2 = iter2->next) { - struct process_info *pi2 = list_entry (iter2, struct process_info, list); + qb_list_for_each(iter2, &(cpg_iteration_instance->items_list_head)) { + struct process_info *pi2 = qb_list_entry (iter2, struct process_info, list); if (mar_name_compare (&pi2->group, &pi->group) == 0) { found = 1; break; } } if (found) { /* * We have this name in list -> don't add */ continue ; } } else if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_ONE_GROUP) { /* * Test pi group name with request */ if (mar_name_compare (&pi->group, &req_lib_cpg_iterationinitialize->group_name) != 0) /* * Not same -> don't add */ continue ; } new_pi = malloc (sizeof (struct process_info)); if (!new_pi) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); error = CS_ERR_NO_MEMORY; goto error_put_destroy; } memcpy (new_pi, pi, sizeof (struct process_info)); - list_init (&new_pi->list); + qb_list_init (&new_pi->list); if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { /* * pid and nodeid -> undefined */ new_pi->pid = new_pi->nodeid = 0; } /* * We will return list "grouped" by "group name", so try to find right place to add */ - for (iter2 = cpg_iteration_instance->items_list_head.next; - iter2 != &cpg_iteration_instance->items_list_head; - iter2 = iter2->next) { - struct process_info *pi2 = list_entry (iter2, struct process_info, list); + qb_list_for_each(iter2, &(cpg_iteration_instance->items_list_head)) { + struct process_info *pi2 = qb_list_entry (iter2, struct process_info, list); if (mar_name_compare (&pi2->group, &pi->group) == 0) { break; } } - list_add (&new_pi->list, iter2); + qb_list_add (&new_pi->list, iter2); } /* * Now we have a full "grouped by" copy of process_info list */ /* * Add instance to current cpd list */ - list_init (&cpg_iteration_instance->list); - list_add (&cpg_iteration_instance->list, &cpd->iteration_instance_list_head); + qb_list_init (&cpg_iteration_instance->list); + qb_list_add (&cpg_iteration_instance->list, &cpd->iteration_instance_list_head); cpg_iteration_instance->current_pointer = &cpg_iteration_instance->items_list_head; error_put_destroy: hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_handle); error_destroy: if (error != CS_OK) { hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_handle); } response_send: res_lib_cpg_iterationinitialize.header.size = sizeof (res_lib_cpg_iterationinitialize); res_lib_cpg_iterationinitialize.header.id = MESSAGE_RES_CPG_ITERATIONINITIALIZE; res_lib_cpg_iterationinitialize.header.error = error; res_lib_cpg_iterationinitialize.iteration_handle = cpg_iteration_handle; api->ipc_response_send (conn, &res_lib_cpg_iterationinitialize, sizeof (res_lib_cpg_iterationinitialize)); } static void message_handler_req_lib_cpg_iteration_next ( void *conn, const void *message) { const struct req_lib_cpg_iterationnext *req_lib_cpg_iterationnext = message; struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; struct process_info *pi; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration next"); res = hdb_handle_get (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_LIBRARY; goto error_exit; } assert (cpg_iteration_instance); cpg_iteration_instance->current_pointer = cpg_iteration_instance->current_pointer->next; if (cpg_iteration_instance->current_pointer == &cpg_iteration_instance->items_list_head) { error = CS_ERR_NO_SECTIONS; goto error_put; } - pi = list_entry (cpg_iteration_instance->current_pointer, struct process_info, list); + pi = qb_list_entry (cpg_iteration_instance->current_pointer, struct process_info, list); /* * Copy iteration data */ res_lib_cpg_iterationnext.description.nodeid = pi->nodeid; res_lib_cpg_iterationnext.description.pid = pi->pid; memcpy (&res_lib_cpg_iterationnext.description.group, &pi->group, sizeof (mar_cpg_name_t)); error_put: hdb_handle_put (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle); error_exit: res_lib_cpg_iterationnext.header.size = sizeof (res_lib_cpg_iterationnext); res_lib_cpg_iterationnext.header.id = MESSAGE_RES_CPG_ITERATIONNEXT; res_lib_cpg_iterationnext.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_iterationnext, sizeof (res_lib_cpg_iterationnext)); } static void message_handler_req_lib_cpg_iteration_finalize ( void *conn, const void *message) { const struct req_lib_cpg_iterationfinalize *req_lib_cpg_iterationfinalize = message; struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration finalize"); res = hdb_handle_get (&cpg_iteration_handle_t_db, req_lib_cpg_iterationfinalize->iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_LIBRARY; goto error_exit; } assert (cpg_iteration_instance); cpg_iteration_instance_finalize (cpg_iteration_instance); hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); error_exit: res_lib_cpg_iterationfinalize.header.size = sizeof (res_lib_cpg_iterationfinalize); res_lib_cpg_iterationfinalize.header.id = MESSAGE_RES_CPG_ITERATIONFINALIZE; res_lib_cpg_iterationfinalize.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_iterationfinalize, sizeof (res_lib_cpg_iterationfinalize)); } diff --git a/exec/icmap.c b/exec/icmap.c index 337a560c..6bbe9eaf 100644 --- a/exec/icmap.c +++ b/exec/icmap.c @@ -1,1338 +1,1335 @@ /* * Copyright (c) 2011 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include -#include +#include #include #define ICMAP_MAX_VALUE_LEN (16*1024) struct icmap_item { char *key_name; icmap_value_types_t type; size_t value_len; char value[]; }; struct icmap_map { qb_map_t *qb_map; }; static icmap_map_t icmap_global_map; struct icmap_track { char *key_name; int32_t track_type; icmap_notify_fn_t notify_fn; void *user_data; - struct list_head list; + struct qb_list_head list; }; struct icmap_ro_access_item { char *key_name; int prefix; - struct list_head list; + struct qb_list_head list; }; -DECLARE_LIST_INIT(icmap_ro_access_item_list_head); -DECLARE_LIST_INIT(icmap_track_list_head); +QB_LIST_DECLARE (icmap_ro_access_item_list_head); +QB_LIST_DECLARE (icmap_track_list_head); /* * Static functions declarations */ /* * Check if key_name is valid icmap key name. Returns 0 on success, and -1 on fail */ static int icmap_check_key_name(const char *key_name); /* * Check that value with given type has correct length value_len. Returns 0 on success, * and -1 on fail */ static int icmap_check_value_len(const void *value, size_t value_len, icmap_value_types_t type); /* * Returns length of value of given type, or 0 for string and binary data type */ static size_t icmap_get_valuetype_len(icmap_value_types_t type); /* * Converts track type of icmap to qb */ static int32_t icmap_tt_to_qbtt(int32_t track_type); /* * Convert track type of qb to icmap */ static int32_t icmap_qbtt_to_tt(int32_t track_type); /* * Checks if item has same value as value with value_len and given type. Returns 0 if not, otherwise !0. */ static int icmap_item_eq(const struct icmap_item *item, const void *value, size_t value_len, icmap_value_types_t type); /* * Checks if given character is valid in key name. Returns 0 if not, otherwise !0. */ static int icmap_is_valid_name_char(char c); /* * Helper for getting integer and float value with given type for key key_name and store it in value. */ static cs_error_t icmap_get_int_r( const icmap_map_t map, const char *key_name, void *value, icmap_value_types_t type); /* * Return raw item value data. Internal function used by icmap_get_r which does most * of arguments validity checks but doesn't copy data (it returns raw item data * pointer). It's not very safe tho it's static. */ static cs_error_t icmap_get_ref_r( const icmap_map_t map, const char *key_name, void **value, size_t *value_len, icmap_value_types_t *type); /* * Function implementation */ static int32_t icmap_tt_to_qbtt(int32_t track_type) { int32_t res = 0; if (track_type & ICMAP_TRACK_DELETE) { res |= QB_MAP_NOTIFY_DELETED; } if (track_type & ICMAP_TRACK_MODIFY) { res |= QB_MAP_NOTIFY_REPLACED; } if (track_type & ICMAP_TRACK_ADD) { res |= QB_MAP_NOTIFY_INSERTED; } if (track_type & ICMAP_TRACK_PREFIX) { res |= QB_MAP_NOTIFY_RECURSIVE; } return (res); } static int32_t icmap_qbtt_to_tt(int32_t track_type) { int32_t res = 0; if (track_type & QB_MAP_NOTIFY_DELETED) { res |= ICMAP_TRACK_DELETE; } if (track_type & QB_MAP_NOTIFY_REPLACED) { res |= ICMAP_TRACK_MODIFY; } if (track_type & QB_MAP_NOTIFY_INSERTED) { res |= ICMAP_TRACK_ADD; } if (track_type & QB_MAP_NOTIFY_RECURSIVE) { res |= ICMAP_TRACK_PREFIX; } return (res); } static void icmap_map_free_cb(uint32_t event, char* key, void* old_value, void* value, void* user_data) { struct icmap_item *item = (struct icmap_item *)old_value; /* * value == old_value -> fast_adjust_int was used, don't free data */ if (item != NULL && value != old_value) { free(item->key_name); free(item); } } cs_error_t icmap_init_r(icmap_map_t *result) { int32_t err; *result = malloc(sizeof(struct icmap_map)); if (*result == NULL) { return (CS_ERR_NO_MEMORY); } (*result)->qb_map = qb_trie_create(); if ((*result)->qb_map == NULL) return (CS_ERR_INIT); err = qb_map_notify_add((*result)->qb_map, NULL, icmap_map_free_cb, QB_MAP_NOTIFY_FREE, NULL); return (qb_to_cs_error(err)); } cs_error_t icmap_init(void) { return (icmap_init_r(&icmap_global_map)); } static void icmap_set_ro_access_free(void) { - struct list_head *iter = icmap_ro_access_item_list_head.next; + struct qb_list_head *iter; struct icmap_ro_access_item *icmap_ro_ai; - while (iter != &icmap_ro_access_item_list_head) { - icmap_ro_ai = list_entry(iter, struct icmap_ro_access_item, list); - list_del(&icmap_ro_ai->list); + qb_list_for_each(iter, &icmap_ro_access_item_list_head) { + icmap_ro_ai = qb_list_entry(iter, struct icmap_ro_access_item, list); + qb_list_del(&icmap_ro_ai->list); free(icmap_ro_ai->key_name); - free(icmap_ro_ai); - iter = icmap_ro_access_item_list_head.next; + free(icmap_ro_ai); } } static void icmap_del_all_track(void) { - struct list_head *iter = icmap_track_list_head.next; + struct qb_list_head *iter; struct icmap_track *icmap_track; - - while (iter != &icmap_track_list_head) { - icmap_track = list_entry(iter, struct icmap_track, list); - icmap_track_delete(icmap_track); - iter = icmap_track_list_head.next; + qb_list_for_each(iter, &icmap_track_list_head) { + icmap_track = qb_list_entry(iter, struct icmap_track, list); + icmap_track_delete(icmap_track); } } void icmap_fini_r(const icmap_map_t map) { qb_map_destroy(map->qb_map); free(map); return; } void icmap_fini(void) { icmap_del_all_track(); /* * catch 22 warning: * We need to drop this notify but we can't because it calls icmap_map_free_cb * while destroying the tree to free icmap_item(s). * -> qb_map_notify_del_2(icmap_map, NULL, icmap_map_free_cb, QB_MAP_NOTIFY_FREE, NULL); * and we cannot call it after map_destroy. joy! :) */ icmap_fini_r(icmap_global_map); icmap_set_ro_access_free(); return ; } icmap_map_t icmap_get_global_map(void) { return (icmap_global_map); } static int icmap_is_valid_name_char(char c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '.' || c == '_' || c == '-' || c == '/' || c == ':'); } void icmap_convert_name_to_valid_name(char *key_name) { int i; for (i = 0; i < strlen(key_name); i++) { if (!icmap_is_valid_name_char(key_name[i])) { key_name[i] = '_'; } } } static int icmap_check_key_name(const char *key_name) { int i; if ((strlen(key_name) < ICMAP_KEYNAME_MINLEN) || strlen(key_name) > ICMAP_KEYNAME_MAXLEN) { return (-1); } for (i = 0; i < strlen(key_name); i++) { if (!icmap_is_valid_name_char(key_name[i])) { return (-1); } } return (0); } static size_t icmap_get_valuetype_len(icmap_value_types_t type) { size_t res = 0; switch (type) { case ICMAP_VALUETYPE_INT8: res = sizeof(int8_t); break; case ICMAP_VALUETYPE_UINT8: res = sizeof(uint8_t); break; case ICMAP_VALUETYPE_INT16: res = sizeof(int16_t); break; case ICMAP_VALUETYPE_UINT16: res = sizeof(uint16_t); break; case ICMAP_VALUETYPE_INT32: res = sizeof(int32_t); break; case ICMAP_VALUETYPE_UINT32: res = sizeof(uint32_t); break; case ICMAP_VALUETYPE_INT64: res = sizeof(int64_t); break; case ICMAP_VALUETYPE_UINT64: res = sizeof(uint64_t); break; case ICMAP_VALUETYPE_FLOAT: res = sizeof(float); break; case ICMAP_VALUETYPE_DOUBLE: res = sizeof(double); break; case ICMAP_VALUETYPE_STRING: case ICMAP_VALUETYPE_BINARY: res = 0; break; } return (res); } static int icmap_check_value_len(const void *value, size_t value_len, icmap_value_types_t type) { if (value_len > ICMAP_MAX_VALUE_LEN) { return (-1); } if (type != ICMAP_VALUETYPE_STRING && type != ICMAP_VALUETYPE_BINARY) { if (icmap_get_valuetype_len(type) == value_len) { return (0); } else { return (-1); } } if (type == ICMAP_VALUETYPE_STRING) { /* * value_len can be shorter then real string length, but never * longer (+ 1 is because of 0 at the end of string) */ if (value_len > strlen((const char *)value) + 1) { return (-1); } else { return (0); } } return (0); } static int icmap_item_eq(const struct icmap_item *item, const void *value, size_t value_len, icmap_value_types_t type) { size_t ptr_len; if (item->type != type) { return (0); } if (item->type == ICMAP_VALUETYPE_STRING) { ptr_len = strlen((const char *)value); if (ptr_len > value_len) { ptr_len = value_len; } ptr_len++; } else { ptr_len = value_len; } if (item->value_len == ptr_len) { return (memcmp(item->value, value, value_len) == 0); }; return (0); } int icmap_key_value_eq( const icmap_map_t map1, const char *key_name1, const icmap_map_t map2, const char *key_name2) { struct icmap_item *item1, *item2; if (map1 == NULL || key_name1 == NULL || map2 == NULL || key_name2 == NULL) { return (0); } item1 = qb_map_get(map1->qb_map, key_name1); item2 = qb_map_get(map2->qb_map, key_name2); if (item1 == NULL || item2 == NULL) { return (0); } return (icmap_item_eq(item1, item2->value, item2->value_len, item2->type)); } cs_error_t icmap_set_r( const icmap_map_t map, const char *key_name, const void *value, size_t value_len, icmap_value_types_t type) { struct icmap_item *item; struct icmap_item *new_item; size_t new_value_len; size_t new_item_size; if (value == NULL || key_name == NULL) { return (CS_ERR_INVALID_PARAM); } if (icmap_check_value_len(value, value_len, type) != 0) { return (CS_ERR_INVALID_PARAM); } item = qb_map_get(map->qb_map, key_name); if (item != NULL) { /* * Check that key is really changed */ if (icmap_item_eq(item, value, value_len, type)) { return (CS_OK); } } else { if (icmap_check_key_name(key_name) != 0) { return (CS_ERR_NAME_TOO_LONG); } } if (type == ICMAP_VALUETYPE_BINARY || type == ICMAP_VALUETYPE_STRING) { if (type == ICMAP_VALUETYPE_STRING) { new_value_len = strlen((const char *)value); if (new_value_len > value_len) { new_value_len = value_len; } new_value_len++; } else { new_value_len = value_len; } } else { new_value_len = icmap_get_valuetype_len(type); } new_item_size = sizeof(struct icmap_item) + new_value_len; new_item = malloc(new_item_size); if (new_item == NULL) { return (CS_ERR_NO_MEMORY); } memset(new_item, 0, new_item_size); if (item == NULL) { new_item->key_name = strdup(key_name); if (new_item->key_name == NULL) { free(new_item); return (CS_ERR_NO_MEMORY); } } else { new_item->key_name = item->key_name; item->key_name = NULL; } new_item->type = type; new_item->value_len = new_value_len; memcpy(new_item->value, value, new_value_len); if (new_item->type == ICMAP_VALUETYPE_STRING) { ((char *)new_item->value)[new_value_len - 1] = 0; } qb_map_put(map->qb_map, new_item->key_name, new_item); return (CS_OK); } cs_error_t icmap_set( const char *key_name, const void *value, size_t value_len, icmap_value_types_t type) { return (icmap_set_r(icmap_global_map, key_name, value, value_len, type)); } cs_error_t icmap_set_int8_r(const icmap_map_t map, const char *key_name, int8_t value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_INT8)); } cs_error_t icmap_set_uint8_r(const icmap_map_t map, const char *key_name, uint8_t value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_UINT8)); } cs_error_t icmap_set_int16_r(const icmap_map_t map, const char *key_name, int16_t value) { return (icmap_set_r(map,key_name, &value, sizeof(value), ICMAP_VALUETYPE_INT16)); } cs_error_t icmap_set_uint16_r(const icmap_map_t map, const char *key_name, uint16_t value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_UINT16)); } cs_error_t icmap_set_int32_r(const icmap_map_t map, const char *key_name, int32_t value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_INT32)); } cs_error_t icmap_set_uint32_r(const icmap_map_t map, const char *key_name, uint32_t value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_UINT32)); } cs_error_t icmap_set_int64_r(const icmap_map_t map, const char *key_name, int64_t value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_INT64)); } cs_error_t icmap_set_uint64_r(const icmap_map_t map, const char *key_name, uint64_t value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_UINT64)); } cs_error_t icmap_set_float_r(const icmap_map_t map, const char *key_name, float value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_FLOAT)); } cs_error_t icmap_set_double_r(const icmap_map_t map, const char *key_name, double value) { return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_DOUBLE)); } cs_error_t icmap_set_string_r(const icmap_map_t map, const char *key_name, const char *value) { if (value == NULL) { return (CS_ERR_INVALID_PARAM); } return (icmap_set_r(map, key_name, value, strlen(value), ICMAP_VALUETYPE_STRING)); } cs_error_t icmap_set_int8(const char *key_name, int8_t value) { return (icmap_set_int8_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_uint8(const char *key_name, uint8_t value) { return (icmap_set_uint8_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_int16(const char *key_name, int16_t value) { return (icmap_set_int16_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_uint16(const char *key_name, uint16_t value) { return (icmap_set_uint16_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_int32(const char *key_name, int32_t value) { return (icmap_set_int32_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_uint32(const char *key_name, uint32_t value) { return (icmap_set_uint32_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_int64(const char *key_name, int64_t value) { return (icmap_set_int64_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_uint64(const char *key_name, uint64_t value) { return (icmap_set_uint64_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_float(const char *key_name, float value) { return (icmap_set_float_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_double(const char *key_name, double value) { return (icmap_set_double_r(icmap_global_map, key_name, value)); } cs_error_t icmap_set_string(const char *key_name, const char *value) { return (icmap_set_string_r(icmap_global_map, key_name, value)); } cs_error_t icmap_delete_r(const icmap_map_t map, const char *key_name) { struct icmap_item *item; if (key_name == NULL) { return (CS_ERR_INVALID_PARAM); } item = qb_map_get(map->qb_map, key_name); if (item == NULL) { return (CS_ERR_NOT_EXIST); } if (qb_map_rm(map->qb_map, item->key_name) != QB_TRUE) { return (CS_ERR_NOT_EXIST); } return (CS_OK); } cs_error_t icmap_delete(const char *key_name) { return (icmap_delete_r(icmap_global_map, key_name)); } static cs_error_t icmap_get_ref_r( const icmap_map_t map, const char *key_name, void **value, size_t *value_len, icmap_value_types_t *type) { struct icmap_item *item; if (key_name == NULL) { return (CS_ERR_INVALID_PARAM); } item = qb_map_get(map->qb_map, key_name); if (item == NULL) { return (CS_ERR_NOT_EXIST); } if (type != NULL) { *type = item->type; } if (value_len != NULL) { *value_len = item->value_len; } if (value != NULL) { *value = item->value; } return (CS_OK); } cs_error_t icmap_get_r( const icmap_map_t map, const char *key_name, void *value, size_t *value_len, icmap_value_types_t *type) { cs_error_t res; void *tmp_value; size_t tmp_value_len; res = icmap_get_ref_r(map, key_name, &tmp_value, &tmp_value_len, type); if (res != CS_OK) { return (res); } if (value == NULL) { if (value_len != NULL) { *value_len = tmp_value_len; } } else { if (value_len == NULL || *value_len < tmp_value_len) { return (CS_ERR_INVALID_PARAM); } *value_len = tmp_value_len; memcpy(value, tmp_value, tmp_value_len); } return (CS_OK); } cs_error_t icmap_get( const char *key_name, void *value, size_t *value_len, icmap_value_types_t *type) { return (icmap_get_r(icmap_global_map, key_name, value, value_len, type)); } static cs_error_t icmap_get_int_r( const icmap_map_t map, const char *key_name, void *value, icmap_value_types_t type) { char key_value[16]; size_t key_size; cs_error_t err; icmap_value_types_t key_type; key_size = sizeof(key_value); memset(key_value, 0, key_size); err = icmap_get(key_name, key_value, &key_size, &key_type); if (err != CS_OK) return (err); if (key_type != type) { return (CS_ERR_INVALID_PARAM); } memcpy(value, key_value, icmap_get_valuetype_len(key_type)); return (CS_OK); } cs_error_t icmap_get_int8_r(const icmap_map_t map, const char *key_name, int8_t *i8) { return (icmap_get_int_r(map, key_name, i8, ICMAP_VALUETYPE_INT8)); } cs_error_t icmap_get_uint8_r(const icmap_map_t map, const char *key_name, uint8_t *u8) { return (icmap_get_int_r(map, key_name, u8, ICMAP_VALUETYPE_UINT8)); } cs_error_t icmap_get_int16_r(const icmap_map_t map, const char *key_name, int16_t *i16) { return (icmap_get_int_r(map, key_name, i16, ICMAP_VALUETYPE_INT16)); } cs_error_t icmap_get_uint16_r(const icmap_map_t map, const char *key_name, uint16_t *u16) { return (icmap_get_int_r(map, key_name, u16, ICMAP_VALUETYPE_UINT16)); } cs_error_t icmap_get_int32_r(const icmap_map_t map, const char *key_name, int32_t *i32) { return (icmap_get_int_r(map, key_name, i32, ICMAP_VALUETYPE_INT32)); } cs_error_t icmap_get_uint32_r(const icmap_map_t map, const char *key_name, uint32_t *u32) { return (icmap_get_int_r(map, key_name, u32, ICMAP_VALUETYPE_UINT32)); } cs_error_t icmap_get_int64_r(const icmap_map_t map, const char *key_name, int64_t *i64) { return(icmap_get_int_r(map, key_name, i64, ICMAP_VALUETYPE_INT64)); } cs_error_t icmap_get_uint64_r(const icmap_map_t map, const char *key_name, uint64_t *u64) { return (icmap_get_int_r(map, key_name, u64, ICMAP_VALUETYPE_UINT64)); } cs_error_t icmap_get_float_r(const icmap_map_t map, const char *key_name, float *flt) { return (icmap_get_int_r(map, key_name, flt, ICMAP_VALUETYPE_FLOAT)); } cs_error_t icmap_get_double_r(const icmap_map_t map, const char *key_name, double *dbl) { return (icmap_get_int_r(map, key_name, dbl, ICMAP_VALUETYPE_DOUBLE)); } cs_error_t icmap_get_int8(const char *key_name, int8_t *i8) { return (icmap_get_int8_r(icmap_global_map, key_name, i8)); } cs_error_t icmap_get_uint8(const char *key_name, uint8_t *u8) { return (icmap_get_uint8_r(icmap_global_map, key_name, u8)); } cs_error_t icmap_get_int16(const char *key_name, int16_t *i16) { return (icmap_get_int16_r(icmap_global_map, key_name, i16)); } cs_error_t icmap_get_uint16(const char *key_name, uint16_t *u16) { return (icmap_get_uint16_r(icmap_global_map, key_name, u16)); } cs_error_t icmap_get_int32(const char *key_name, int32_t *i32) { return (icmap_get_int32_r(icmap_global_map, key_name, i32)); } cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32) { return (icmap_get_uint32_r(icmap_global_map, key_name, u32)); } cs_error_t icmap_get_int64(const char *key_name, int64_t *i64) { return(icmap_get_int64_r(icmap_global_map, key_name, i64)); } cs_error_t icmap_get_uint64(const char *key_name, uint64_t *u64) { return (icmap_get_uint64_r(icmap_global_map, key_name, u64)); } cs_error_t icmap_get_float(const char *key_name, float *flt) { return (icmap_get_float_r(icmap_global_map, key_name, flt)); } cs_error_t icmap_get_double(const char *key_name, double *dbl) { return (icmap_get_double_r(icmap_global_map, key_name, dbl)); } cs_error_t icmap_get_string(const char *key_name, char **str) { cs_error_t res; size_t str_len; icmap_value_types_t type; res = icmap_get(key_name, NULL, &str_len, &type); if (res != CS_OK || type != ICMAP_VALUETYPE_STRING) { if (res == CS_OK) { res = CS_ERR_INVALID_PARAM; } goto return_error; } *str = malloc(str_len); if (*str == NULL) { res = CS_ERR_NO_MEMORY; goto return_error; } res = icmap_get(key_name, *str, &str_len, &type); if (res != CS_OK) { free(*str); goto return_error; } return (CS_OK); return_error: return (res); } cs_error_t icmap_adjust_int_r( const icmap_map_t map, const char *key_name, int32_t step) { struct icmap_item *item; uint8_t u8; uint16_t u16; uint32_t u32; uint64_t u64; cs_error_t err = CS_OK; if (key_name == NULL) { return (CS_ERR_INVALID_PARAM); } item = qb_map_get(map->qb_map, key_name); if (item == NULL) { return (CS_ERR_NOT_EXIST); } switch (item->type) { case ICMAP_VALUETYPE_INT8: case ICMAP_VALUETYPE_UINT8: memcpy(&u8, item->value, sizeof(u8)); u8 += step; err = icmap_set(key_name, &u8, sizeof(u8), item->type); break; case ICMAP_VALUETYPE_INT16: case ICMAP_VALUETYPE_UINT16: memcpy(&u16, item->value, sizeof(u16)); u16 += step; err = icmap_set(key_name, &u16, sizeof(u16), item->type); break; case ICMAP_VALUETYPE_INT32: case ICMAP_VALUETYPE_UINT32: memcpy(&u32, item->value, sizeof(u32)); u32 += step; err = icmap_set(key_name, &u32, sizeof(u32), item->type); break; case ICMAP_VALUETYPE_INT64: case ICMAP_VALUETYPE_UINT64: memcpy(&u64, item->value, sizeof(u64)); u64 += step; err = icmap_set(key_name, &u64, sizeof(u64), item->type); break; case ICMAP_VALUETYPE_FLOAT: case ICMAP_VALUETYPE_DOUBLE: case ICMAP_VALUETYPE_STRING: case ICMAP_VALUETYPE_BINARY: err = CS_ERR_INVALID_PARAM; break; } return (err); } cs_error_t icmap_adjust_int( const char *key_name, int32_t step) { return (icmap_adjust_int_r(icmap_global_map, key_name, step)); } cs_error_t icmap_fast_adjust_int_r( const icmap_map_t map, const char *key_name, int32_t step) { struct icmap_item *item; cs_error_t err = CS_OK; if (key_name == NULL) { return (CS_ERR_INVALID_PARAM); } item = qb_map_get(map->qb_map, key_name); if (item == NULL) { return (CS_ERR_NOT_EXIST); } switch (item->type) { case ICMAP_VALUETYPE_INT8: case ICMAP_VALUETYPE_UINT8: *(uint8_t *)item->value += step; break; case ICMAP_VALUETYPE_INT16: case ICMAP_VALUETYPE_UINT16: *(uint16_t *)item->value += step; break; case ICMAP_VALUETYPE_INT32: case ICMAP_VALUETYPE_UINT32: *(uint32_t *)item->value += step; break; case ICMAP_VALUETYPE_INT64: case ICMAP_VALUETYPE_UINT64: *(uint64_t *)item->value += step; break; case ICMAP_VALUETYPE_FLOAT: case ICMAP_VALUETYPE_DOUBLE: case ICMAP_VALUETYPE_STRING: case ICMAP_VALUETYPE_BINARY: err = CS_ERR_INVALID_PARAM; break; } if (err == CS_OK) { qb_map_put(map->qb_map, item->key_name, item); } return (err); } cs_error_t icmap_fast_adjust_int( const char *key_name, int32_t step) { return (icmap_fast_adjust_int_r(icmap_global_map, key_name, step)); } cs_error_t icmap_inc_r(const icmap_map_t map, const char *key_name) { return (icmap_adjust_int_r(map, key_name, 1)); } cs_error_t icmap_inc(const char *key_name) { return (icmap_inc_r(icmap_global_map, key_name)); } cs_error_t icmap_dec_r(const icmap_map_t map, const char *key_name) { return (icmap_adjust_int_r(map, key_name, -1)); } cs_error_t icmap_dec(const char *key_name) { return (icmap_dec_r(icmap_global_map, key_name)); } cs_error_t icmap_fast_inc_r(const icmap_map_t map, const char *key_name) { return (icmap_fast_adjust_int_r(map, key_name, 1)); } cs_error_t icmap_fast_inc(const char *key_name) { return (icmap_fast_inc_r(icmap_global_map, key_name)); } cs_error_t icmap_fast_dec_r(const icmap_map_t map, const char *key_name) { return (icmap_fast_adjust_int_r(map, key_name, -1)); } cs_error_t icmap_fast_dec(const char *key_name) { return (icmap_fast_dec_r(icmap_global_map, key_name)); } icmap_iter_t icmap_iter_init_r(const icmap_map_t map, const char *prefix) { return (qb_map_pref_iter_create(map->qb_map, prefix)); } icmap_iter_t icmap_iter_init(const char *prefix) { return (icmap_iter_init_r(icmap_global_map, prefix)); } const char *icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type) { struct icmap_item *item; const char *res; res = qb_map_iter_next(iter, (void **)&item); if (res == NULL) { return (res); } if (value_len != NULL) { *value_len = item->value_len; } if (type != NULL) { *type = item->type; } return (res); } void icmap_iter_finalize(icmap_iter_t iter) { qb_map_iter_free(iter); } static void icmap_notify_fn(uint32_t event, char *key, void *old_value, void *value, void *user_data) { icmap_track_t icmap_track = (icmap_track_t)user_data; struct icmap_item *new_item = (struct icmap_item *)value; struct icmap_item *old_item = (struct icmap_item *)old_value; struct icmap_notify_value new_val; struct icmap_notify_value old_val; if (value == NULL && old_value == NULL) { return ; } if (new_item != NULL) { new_val.type = new_item->type; new_val.len = new_item->value_len; new_val.data = new_item->value; } else { memset(&new_val, 0, sizeof(new_val)); } /* * old_item == new_item if fast functions are used -> don't fill old value */ if (old_item != NULL && old_item != new_item) { old_val.type = old_item->type; old_val.len = old_item->value_len; old_val.data = old_item->value; } else { memset(&old_val, 0, sizeof(old_val)); } icmap_track->notify_fn(icmap_qbtt_to_tt(event), key, new_val, old_val, icmap_track->user_data); } cs_error_t icmap_track_add( const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track) { int32_t err; if (notify_fn == NULL || icmap_track == NULL) { return (CS_ERR_INVALID_PARAM); } if ((track_type & ~(ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX)) != 0) { return (CS_ERR_INVALID_PARAM); } *icmap_track = malloc(sizeof(**icmap_track)); if (*icmap_track == NULL) { return (CS_ERR_NO_MEMORY); } memset(*icmap_track, 0, sizeof(**icmap_track)); if (key_name != NULL) { (*icmap_track)->key_name = strdup(key_name); }; (*icmap_track)->track_type = track_type; (*icmap_track)->notify_fn = notify_fn; (*icmap_track)->user_data = user_data; if ((err = qb_map_notify_add(icmap_global_map->qb_map, (*icmap_track)->key_name, icmap_notify_fn, icmap_tt_to_qbtt(track_type), *icmap_track)) != 0) { free((*icmap_track)->key_name); free(*icmap_track); return (qb_to_cs_error(err)); } - list_init(&(*icmap_track)->list); - list_add (&(*icmap_track)->list, &icmap_track_list_head); + qb_list_init(&(*icmap_track)->list); + qb_list_add (&(*icmap_track)->list, &icmap_track_list_head); return (CS_OK); } cs_error_t icmap_track_delete(icmap_track_t icmap_track) { int32_t err; if ((err = qb_map_notify_del_2(icmap_global_map->qb_map, icmap_track->key_name, icmap_notify_fn, icmap_tt_to_qbtt(icmap_track->track_type), icmap_track)) != 0) { return (qb_to_cs_error(err)); } - list_del(&icmap_track->list); + qb_list_del(&icmap_track->list); free(icmap_track->key_name); free(icmap_track); return (CS_OK); } void *icmap_track_get_user_data(icmap_track_t icmap_track) { return (icmap_track->user_data); } cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access) { - struct list_head *iter; + struct qb_list_head *iter; struct icmap_ro_access_item *icmap_ro_ai; - for (iter = icmap_ro_access_item_list_head.next; iter != &icmap_ro_access_item_list_head; iter = iter->next) { - icmap_ro_ai = list_entry(iter, struct icmap_ro_access_item, list); + qb_list_for_each(iter, &icmap_ro_access_item_list_head) { + icmap_ro_ai = qb_list_entry(iter, struct icmap_ro_access_item, list); if (icmap_ro_ai->prefix == prefix && strcmp(key_name, icmap_ro_ai->key_name) == 0) { /* * We found item */ if (ro_access) { return (CS_ERR_EXIST); } else { - list_del(&icmap_ro_ai->list); + qb_list_del(&icmap_ro_ai->list); free(icmap_ro_ai->key_name); free(icmap_ro_ai); return (CS_OK); } } } if (!ro_access) { return (CS_ERR_NOT_EXIST); } icmap_ro_ai = malloc(sizeof(*icmap_ro_ai)); if (icmap_ro_ai == NULL) { return (CS_ERR_NO_MEMORY); } memset(icmap_ro_ai, 0, sizeof(*icmap_ro_ai)); icmap_ro_ai->key_name = strdup(key_name); if (icmap_ro_ai->key_name == NULL) { free(icmap_ro_ai); return (CS_ERR_NO_MEMORY); } icmap_ro_ai->prefix = prefix; - list_init(&icmap_ro_ai->list); - list_add (&icmap_ro_ai->list, &icmap_ro_access_item_list_head); + qb_list_init(&icmap_ro_ai->list); + qb_list_add (&icmap_ro_ai->list, &icmap_ro_access_item_list_head); return (CS_OK); } int icmap_is_key_ro(const char *key_name) { - struct list_head *iter; + struct qb_list_head *iter; struct icmap_ro_access_item *icmap_ro_ai; - for (iter = icmap_ro_access_item_list_head.next; iter != &icmap_ro_access_item_list_head; iter = iter->next) { - icmap_ro_ai = list_entry(iter, struct icmap_ro_access_item, list); + qb_list_for_each(iter, &icmap_ro_access_item_list_head) { + icmap_ro_ai = qb_list_entry(iter, struct icmap_ro_access_item, list); if (icmap_ro_ai->prefix) { if (strlen(icmap_ro_ai->key_name) > strlen(key_name)) continue; if (strncmp(icmap_ro_ai->key_name, key_name, strlen(icmap_ro_ai->key_name)) == 0) { return (CS_TRUE); } } else { if (strcmp(icmap_ro_ai->key_name, key_name) == 0) { return (CS_TRUE); } } } return (CS_FALSE); } cs_error_t icmap_copy_map(icmap_map_t dst_map, const icmap_map_t src_map) { icmap_iter_t iter; size_t value_len; icmap_value_types_t value_type; const char *key_name; cs_error_t err; void *value; iter = icmap_iter_init_r(src_map, NULL); if (iter == NULL) { return (CS_ERR_NO_MEMORY); } err = CS_OK; while ((key_name = icmap_iter_next(iter, &value_len, &value_type)) != NULL) { err = icmap_get_ref_r(src_map, key_name, &value, &value_len, &value_type); if (err != CS_OK) { goto exit_iter_finalize; } err = icmap_set_r(dst_map, key_name, value, value_len, value_type); if (err != CS_OK) { goto exit_iter_finalize; } } exit_iter_finalize: icmap_iter_finalize(iter); return (err); } diff --git a/exec/ipc_glue.c b/exec/ipc_glue.c index c8cbbf84..4de1ec02 100644 --- a/exec/ipc_glue.c +++ b/exec/ipc_glue.c @@ -1,908 +1,902 @@ /* * Copyright (c) 2010-2012 Red Hat, Inc. * * All rights reserved. * * Author: Angus Salkeld * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sync.h" #include "timer.h" #include "main.h" #include "util.h" #include "apidef.h" #include "service.h" LOGSYS_DECLARE_SUBSYS ("MAIN"); static struct corosync_api_v1 *api = NULL; static int32_t ipc_not_enough_fds_left = 0; static int32_t ipc_fc_is_quorate; /* boolean */ static int32_t ipc_fc_totem_queue_level; /* percentage used */ static int32_t ipc_fc_sync_in_process; /* boolean */ static int32_t ipc_allow_connections = 0; /* boolean */ #define CS_IPCS_MAPPER_SERV_NAME 256 struct cs_ipcs_mapper { int32_t id; qb_ipcs_service_t *inst; char name[CS_IPCS_MAPPER_SERV_NAME]; }; struct outq_item { void *msg; size_t mlen; - struct list_head list; + struct qb_list_head list; }; static struct cs_ipcs_mapper ipcs_mapper[SERVICES_COUNT_MAX]; static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn); static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn); static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn); static int32_t cs_ipcs_dispatch_del(int32_t fd); static void outq_flush (void *data); static struct qb_ipcs_poll_handlers corosync_poll_funcs = { .job_add = cs_ipcs_job_add, .dispatch_add = cs_ipcs_dispatch_add, .dispatch_mod = cs_ipcs_dispatch_mod, .dispatch_del = cs_ipcs_dispatch_del, }; static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid); static void cs_ipcs_connection_created(qb_ipcs_connection_t *c); static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c, void *data, size_t size); static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c); static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c); static struct qb_ipcs_service_handlers corosync_service_funcs = { .connection_accept = cs_ipcs_connection_accept, .connection_created = cs_ipcs_connection_created, .msg_process = cs_ipcs_msg_process, .connection_closed = cs_ipcs_connection_closed, .connection_destroyed = cs_ipcs_connection_destroyed, }; static const char* cs_ipcs_serv_short_name(int32_t service_id) { const char *name; switch (service_id) { case CFG_SERVICE: name = "cfg"; break; case CPG_SERVICE: name = "cpg"; break; case QUORUM_SERVICE: name = "quorum"; break; case PLOAD_SERVICE: name = "pload"; break; case VOTEQUORUM_SERVICE: name = "votequorum"; break; case MON_SERVICE: name = "mon"; break; case WD_SERVICE: name = "wd"; break; case CMAP_SERVICE: name = "cmap"; break; default: name = NULL; break; } return name; } void cs_ipc_allow_connections(int32_t allow) { ipc_allow_connections = allow; } int32_t cs_ipcs_service_destroy(int32_t service_id) { if (ipcs_mapper[service_id].inst) { qb_ipcs_destroy(ipcs_mapper[service_id].inst); ipcs_mapper[service_id].inst = NULL; } return 0; } static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid) { int32_t service = qb_ipcs_service_id_get(c); uint8_t u8; char key_name[ICMAP_KEYNAME_MAXLEN]; if (!ipc_allow_connections) { log_printf(LOGSYS_LEVEL_DEBUG, "Denied connection, corosync is not ready"); return -EAGAIN; } if (corosync_service[service] == NULL || ipcs_mapper[service].inst == NULL) { return -ENOSYS; } if (ipc_not_enough_fds_left) { return -EMFILE; } if (euid == 0 || egid == 0) { return 0; } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.uid.%u", euid); if (icmap_get_uint8(key_name, &u8) == CS_OK && u8 == 1) return 0; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.uid.%u", euid); if (icmap_get_uint8(key_name, &u8) == CS_OK && u8 == 1) return 0; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.gid.%u", egid); if (icmap_get_uint8(key_name, &u8) == CS_OK && u8 == 1) return 0; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.gid.%u", egid); if (icmap_get_uint8(key_name, &u8) == CS_OK && u8 == 1) return 0; log_printf(LOGSYS_LEVEL_ERROR, "Denied connection attempt from %d:%d", euid, egid); return -EACCES; } static char * pid_to_name (pid_t pid, char *out_name, size_t name_len) { char *name; char *rest; FILE *fp; char fname[32]; char buf[256]; snprintf (fname, 32, "/proc/%d/stat", pid); fp = fopen (fname, "r"); if (!fp) { return NULL; } if (fgets (buf, sizeof (buf), fp) == NULL) { fclose (fp); return NULL; } fclose (fp); name = strrchr (buf, '('); if (!name) { return NULL; } /* move past the bracket */ name++; rest = strrchr (buf, ')'); if (rest == NULL || rest[1] != ' ') { return NULL; } *rest = '\0'; /* move past the NULL and space */ rest += 2; /* copy the name */ strncpy (out_name, name, name_len); out_name[name_len - 1] = '\0'; return out_name; } struct cs_ipcs_conn_context { char *icmap_path; - struct list_head outq_head; + struct qb_list_head outq_head; int32_t queuing; uint32_t queued; uint64_t invalid_request; uint64_t overload; uint32_t sent; char data[1]; }; static void cs_ipcs_connection_created(qb_ipcs_connection_t *c) { int32_t service = 0; struct cs_ipcs_conn_context *context; char proc_name[32]; struct qb_ipcs_connection_stats stats; int32_t size = sizeof(struct cs_ipcs_conn_context); char key_name[ICMAP_KEYNAME_MAXLEN]; int set_client_pid = 0; int set_proc_name = 0; log_printf(LOG_DEBUG, "connection created"); service = qb_ipcs_service_id_get(c); size += corosync_service[service]->private_data_size; context = calloc(1, size); if (context == NULL) { qb_ipcs_disconnect(c); return; } - list_init(&context->outq_head); + qb_list_init(&context->outq_head); context->queuing = QB_FALSE; context->queued = 0; context->sent = 0; qb_ipcs_context_set(c, context); if (corosync_service[service]->lib_init_fn(c) != 0) { log_printf(LOG_ERR, "lib_init_fn failed, disconnecting"); qb_ipcs_disconnect(c); return; } icmap_inc("runtime.connections.active"); qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); if (stats.client_pid > 0) { if (pid_to_name (stats.client_pid, proc_name, sizeof(proc_name))) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.connections.%s:%u:%p", proc_name, stats.client_pid, c); set_proc_name = 1; } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.connections.%u:%p", stats.client_pid, c); } set_client_pid = 1; } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.connections.%p", c); } icmap_convert_name_to_valid_name(key_name); context->icmap_path = strdup(key_name); if (context->icmap_path == NULL) { qb_ipcs_disconnect(c); return; } if (set_proc_name) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.name", context->icmap_path); icmap_set_string(key_name, proc_name); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.client_pid", context->icmap_path); if (set_client_pid) { icmap_set_uint32(key_name, stats.client_pid); } else { icmap_set_uint32(key_name, 0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.service_id", context->icmap_path); icmap_set_uint32(key_name, service); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.responses", context->icmap_path); icmap_set_uint64(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.dispatched", context->icmap_path); icmap_set_uint64(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.requests", context->icmap_path); icmap_set_uint64(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.send_retries", context->icmap_path); icmap_set_uint64(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.recv_retries", context->icmap_path); icmap_set_uint64(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.flow_control", context->icmap_path); icmap_set_uint32(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.flow_control_count", context->icmap_path); icmap_set_uint64(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.queue_size", context->icmap_path); icmap_set_uint32(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.invalid_request", context->icmap_path); icmap_set_uint64(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.overload", context->icmap_path); icmap_set_uint64(key_name, 0); } void cs_ipc_refcnt_inc(void *conn) { qb_ipcs_connection_ref(conn); } void cs_ipc_refcnt_dec(void *conn) { qb_ipcs_connection_unref(conn); } void *cs_ipcs_private_data_get(void *conn) { struct cs_ipcs_conn_context *cnx; cnx = qb_ipcs_context_get(conn); return &cnx->data[0]; } static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c) { struct cs_ipcs_conn_context *context; - struct list_head *list, *list_next; + struct qb_list_head *list; struct outq_item *outq_item; log_printf(LOG_DEBUG, "%s() ", __func__); context = qb_ipcs_context_get(c); if (context) { - for (list = context->outq_head.next; - list != &context->outq_head; list = list_next) { + qb_list_for_each(list, &(context->outq_head)) { + outq_item = qb_list_entry (list, struct outq_item, list); - list_next = list->next; - outq_item = list_entry (list, struct outq_item, list); - - list_del (list); + qb_list_del (list); free (outq_item->msg); free (outq_item); } free(context); } } static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c) { int32_t res = 0; int32_t service = qb_ipcs_service_id_get(c); icmap_iter_t iter; char prefix[ICMAP_KEYNAME_MAXLEN]; const char *key_name; struct cs_ipcs_conn_context *cnx; log_printf(LOG_DEBUG, "%s() ", __func__); res = corosync_service[service]->lib_exit_fn(c); if (res != 0) { return res; } qb_loop_job_del(cs_poll_handle_get(), QB_LOOP_HIGH, c, outq_flush); cnx = qb_ipcs_context_get(c); snprintf(prefix, ICMAP_KEYNAME_MAXLEN, "%s.", cnx->icmap_path); iter = icmap_iter_init(prefix); while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) { icmap_delete(key_name); } icmap_iter_finalize(iter); free(cnx->icmap_path); icmap_inc("runtime.connections.closed"); icmap_dec("runtime.connections.active"); return 0; } int cs_ipcs_response_iov_send (void *conn, const struct iovec *iov, unsigned int iov_len) { int32_t rc = qb_ipcs_response_sendv(conn, iov, iov_len); if (rc >= 0) { return 0; } return rc; } int cs_ipcs_response_send(void *conn, const void *msg, size_t mlen) { int32_t rc = qb_ipcs_response_send(conn, msg, mlen); if (rc >= 0) { return 0; } return rc; } static void outq_flush (void *data) { qb_ipcs_connection_t *conn = data; - struct list_head *list, *list_next; + struct qb_list_head *list; struct outq_item *outq_item; int32_t rc; struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn); - for (list = context->outq_head.next; - list != &context->outq_head; list = list_next) { - - list_next = list->next; - outq_item = list_entry (list, struct outq_item, list); + qb_list_for_each(list, &(context->outq_head)) { + outq_item = qb_list_entry (list, struct outq_item, list); rc = qb_ipcs_event_send(conn, outq_item->msg, outq_item->mlen); if (rc < 0 && rc != -EAGAIN) { errno = -rc; qb_perror(LOG_ERR, "qb_ipcs_event_send"); return; } else if (rc == -EAGAIN) { break; } assert(rc == outq_item->mlen); context->sent++; context->queued--; - list_del (list); + qb_list_del (list); free (outq_item->msg); free (outq_item); } - if (list_empty (&context->outq_head)) { + if (qb_list_empty (&context->outq_head)) { context->queuing = QB_FALSE; log_printf(LOGSYS_LEVEL_INFO, "Q empty, queued:%d sent:%d.", context->queued, context->sent); context->queued = 0; context->sent = 0; } else { qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush); } } static void msg_send_or_queue(qb_ipcs_connection_t *conn, const struct iovec *iov, uint32_t iov_len) { int32_t rc = 0; int32_t i; int32_t bytes_msg = 0; struct outq_item *outq_item; char *write_buf = 0; struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn); for (i = 0; i < iov_len; i++) { bytes_msg += iov[i].iov_len; } if (!context->queuing) { - assert(list_empty (&context->outq_head)); + assert(qb_list_empty (&context->outq_head)); rc = qb_ipcs_event_sendv(conn, iov, iov_len); if (rc == bytes_msg) { context->sent++; return; } if (rc == -EAGAIN) { context->queued = 0; context->sent = 0; context->queuing = QB_TRUE; qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush); } else { log_printf(LOGSYS_LEVEL_ERROR, "event_send retuned %d, expected %d!", rc, bytes_msg); return; } } outq_item = malloc (sizeof (struct outq_item)); if (outq_item == NULL) { qb_ipcs_disconnect(conn); return; } outq_item->msg = malloc (bytes_msg); if (outq_item->msg == NULL) { free (outq_item); qb_ipcs_disconnect(conn); return; } write_buf = outq_item->msg; for (i = 0; i < iov_len; i++) { memcpy (write_buf, iov[i].iov_base, iov[i].iov_len); write_buf += iov[i].iov_len; } outq_item->mlen = bytes_msg; - list_init (&outq_item->list); - list_add_tail (&outq_item->list, &context->outq_head); + qb_list_init (&outq_item->list); + qb_list_add_tail (&outq_item->list, &context->outq_head); context->queued++; } int cs_ipcs_dispatch_send(void *conn, const void *msg, size_t mlen) { struct iovec iov; iov.iov_base = (void *)msg; iov.iov_len = mlen; msg_send_or_queue (conn, &iov, 1); return 0; } int cs_ipcs_dispatch_iov_send (void *conn, const struct iovec *iov, unsigned int iov_len) { msg_send_or_queue(conn, iov, iov_len); return 0; } static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c, void *data, size_t size) { struct qb_ipc_response_header response; struct qb_ipc_request_header *request_pt = (struct qb_ipc_request_header *)data; int32_t service = qb_ipcs_service_id_get(c); int32_t send_ok = 0; int32_t is_async_call = QB_FALSE; ssize_t res = -1; int sending_allowed_private_data; struct cs_ipcs_conn_context *cnx; send_ok = corosync_sending_allowed (service, request_pt->id, request_pt, &sending_allowed_private_data); is_async_call = (service == CPG_SERVICE && request_pt->id == 2); /* * This happens when the message contains some kind of invalid * parameter, such as an invalid size */ if (send_ok == -EINVAL) { response.size = sizeof (response); response.id = 0; response.error = CS_ERR_INVALID_PARAM; cnx = qb_ipcs_context_get(c); if (cnx) { cnx->invalid_request++; } if (is_async_call) { log_printf(LOGSYS_LEVEL_INFO, "*** %s() invalid message! size:%d error:%d", __func__, response.size, response.error); } else { qb_ipcs_response_send (c, &response, sizeof (response)); } res = -EINVAL; } else if (send_ok < 0) { cnx = qb_ipcs_context_get(c); if (cnx) { cnx->overload++; } if (!is_async_call) { /* * Overload, tell library to retry */ response.size = sizeof (response); response.id = 0; response.error = CS_ERR_TRY_AGAIN; qb_ipcs_response_send (c, &response, sizeof (response)); } else { log_printf(LOGSYS_LEVEL_WARNING, "*** %s() (%d:%d - %d) %s!", __func__, service, request_pt->id, is_async_call, strerror(-send_ok)); } res = -ENOBUFS; } if (send_ok >= 0) { corosync_service[service]->lib_engine[request_pt->id].lib_handler_fn(c, request_pt); res = 0; } corosync_sending_allowed_release (&sending_allowed_private_data); return res; } static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn) { return qb_loop_job_add(cs_poll_handle_get(), p, data, fn); } static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn) { return qb_loop_poll_add(cs_poll_handle_get(), p, fd, events, data, fn); } static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn) { return qb_loop_poll_mod(cs_poll_handle_get(), p, fd, events, data, fn); } static int32_t cs_ipcs_dispatch_del(int32_t fd) { return qb_loop_poll_del(cs_poll_handle_get(), fd); } static void cs_ipcs_low_fds_event(int32_t not_enough, int32_t fds_available) { ipc_not_enough_fds_left = not_enough; if (not_enough) { log_printf(LOGSYS_LEVEL_WARNING, "refusing new connections (fds_available:%d)", fds_available); } else { log_printf(LOGSYS_LEVEL_NOTICE, "allowing new connections (fds_available:%d)", fds_available); } } int32_t cs_ipcs_q_level_get(void) { return ipc_fc_totem_queue_level; } static qb_loop_timer_handle ipcs_check_for_flow_control_timer; static void cs_ipcs_check_for_flow_control(void) { int32_t i; int32_t fc_enabled; for (i = 0; i < SERVICES_COUNT_MAX; i++) { if (corosync_service[i] == NULL || ipcs_mapper[i].inst == NULL) { continue; } fc_enabled = QB_IPCS_RATE_OFF; if (ipc_fc_is_quorate == 1 || corosync_service[i]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { /* * we are quorate * now check flow control */ if (ipc_fc_totem_queue_level != TOTEM_Q_LEVEL_CRITICAL && ipc_fc_sync_in_process == 0) { fc_enabled = QB_FALSE; } else if (ipc_fc_totem_queue_level != TOTEM_Q_LEVEL_CRITICAL && i == VOTEQUORUM_SERVICE) { /* * Allow message processing for votequorum service even * in sync phase */ fc_enabled = QB_FALSE; } else { fc_enabled = QB_IPCS_RATE_OFF_2; } } if (fc_enabled) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, fc_enabled); qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &ipcs_check_for_flow_control_timer); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_LOW) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_FAST); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_GOOD) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_NORMAL); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_HIGH) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_SLOW); } } } static void cs_ipcs_fc_quorum_changed(int quorate, void *context) { ipc_fc_is_quorate = quorate; cs_ipcs_check_for_flow_control(); } static void cs_ipcs_totem_queue_level_changed(enum totem_q_level level) { ipc_fc_totem_queue_level = level; cs_ipcs_check_for_flow_control(); } void cs_ipcs_sync_state_changed(int32_t sync_in_process) { ipc_fc_sync_in_process = sync_in_process; cs_ipcs_check_for_flow_control(); } void cs_ipcs_stats_update(void) { int32_t i; struct qb_ipcs_stats srv_stats; struct qb_ipcs_connection_stats stats; qb_ipcs_connection_t *c, *prev; struct cs_ipcs_conn_context *cnx; char key_name[ICMAP_KEYNAME_MAXLEN]; for (i = 0; i < SERVICES_COUNT_MAX; i++) { if (corosync_service[i] == NULL || ipcs_mapper[i].inst == NULL) { continue; } qb_ipcs_stats_get(ipcs_mapper[i].inst, &srv_stats, QB_FALSE); for (c = qb_ipcs_connection_first_get(ipcs_mapper[i].inst); c; prev = c, c = qb_ipcs_connection_next_get(ipcs_mapper[i].inst, prev), qb_ipcs_connection_unref(prev)) { cnx = qb_ipcs_context_get(c); if (cnx == NULL) continue; qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.client_pid", cnx->icmap_path); icmap_set_uint32(key_name, stats.client_pid); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.requests", cnx->icmap_path); icmap_set_uint64(key_name, stats.requests); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.responses", cnx->icmap_path); icmap_set_uint64(key_name, stats.responses); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.dispatched", cnx->icmap_path); icmap_set_uint64(key_name, stats.events); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.send_retries", cnx->icmap_path); icmap_set_uint64(key_name, stats.send_retries); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.recv_retries", cnx->icmap_path); icmap_set_uint64(key_name, stats.recv_retries); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.flow_control", cnx->icmap_path); icmap_set_uint32(key_name, stats.flow_control_state); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.flow_control_count", cnx->icmap_path); icmap_set_uint64(key_name, stats.flow_control_count); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.queue_size", cnx->icmap_path); icmap_set_uint32(key_name, cnx->queued); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.invalid_request", cnx->icmap_path); icmap_set_uint64(key_name, cnx->invalid_request); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s.overload", cnx->icmap_path); icmap_set_uint64(key_name, cnx->overload); } } } static enum qb_ipc_type cs_get_ipc_type (void) { char *str; int found = 0; enum qb_ipc_type ret = QB_IPC_NATIVE; if (icmap_get_string("qb.ipc_type", &str) != CS_OK) { log_printf(LOGSYS_LEVEL_DEBUG, "No configured qb.ipc_type. Using native ipc"); return QB_IPC_NATIVE; } if (strcmp(str, "native") == 0) { ret = QB_IPC_NATIVE; found = 1; } if (strcmp(str, "shm") == 0) { ret = QB_IPC_SHM; found = 1; } if (strcmp(str, "socket") == 0) { ret = QB_IPC_SOCKET; found = 1; } if (found) { log_printf(LOGSYS_LEVEL_DEBUG, "Using %s ipc", str); } else { log_printf(LOGSYS_LEVEL_DEBUG, "Unknown ipc type %s", str); } free(str); return ret; } const char *cs_ipcs_service_init(struct corosync_service_engine *service) { const char *serv_short_name; serv_short_name = cs_ipcs_serv_short_name(service->id); if (service->lib_engine_count == 0) { log_printf (LOGSYS_LEVEL_DEBUG, "NOT Initializing IPC on %s [%d]", serv_short_name, service->id); return NULL; } if (strlen(serv_short_name) >= CS_IPCS_MAPPER_SERV_NAME) { log_printf (LOGSYS_LEVEL_ERROR, "service name %s is too long", serv_short_name); return "qb_ipcs_run error"; } ipcs_mapper[service->id].id = service->id; strcpy(ipcs_mapper[service->id].name, serv_short_name); log_printf (LOGSYS_LEVEL_DEBUG, "Initializing IPC on %s [%d]", ipcs_mapper[service->id].name, ipcs_mapper[service->id].id); ipcs_mapper[service->id].inst = qb_ipcs_create(ipcs_mapper[service->id].name, ipcs_mapper[service->id].id, cs_get_ipc_type(), &corosync_service_funcs); assert(ipcs_mapper[service->id].inst); qb_ipcs_poll_handlers_set(ipcs_mapper[service->id].inst, &corosync_poll_funcs); if (qb_ipcs_run(ipcs_mapper[service->id].inst) != 0) { log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize IPC"); return "qb_ipcs_run error"; } return NULL; } void cs_ipcs_init(void) { api = apidef_get (); qb_loop_poll_low_fds_event_set(cs_poll_handle_get(), cs_ipcs_low_fds_event); api->quorum_register_callback (cs_ipcs_fc_quorum_changed, NULL); totempg_queue_level_register_callback (cs_ipcs_totem_queue_level_changed); icmap_set_uint64("runtime.connections.active", 0); icmap_set_uint64("runtime.connections.closed", 0); } diff --git a/exec/logconfig.h b/exec/logconfig.h index b49aaea0..14305ad4 100644 --- a/exec/logconfig.h +++ b/exec/logconfig.h @@ -1,63 +1,62 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef LOGCONFIG_H_DEFINED #define LOGCONFIG_H_DEFINED #include -#include #include #include /** * All service handlers */ struct dynamic_service { char *name; unsigned int ver; unsigned int handle; }; #define MAX_DYNAMIC_SERVICES 128 #ifdef LOGCONFIG_USE_ICMAP extern int corosync_log_config_read ( const char **error_string); #else extern int corosync_log_config_read ( cmap_handle_t cmap_h, const char *default_logfile, const char **error_string); #endif #endif /* LOGCONFIG_H_DEFINED */ diff --git a/exec/logsys.c b/exec/logsys.c index 6b4995dd..45cf8776 100644 --- a/exec/logsys.c +++ b/exec/logsys.c @@ -1,840 +1,839 @@ /* * Copyright (c) 2002-2004 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * Author: Steven Dake (sdake@redhat.com) * Author: Lon Hohberger (lhh@redhat.com) * Author: Fabio M. Di Nitto (fdinitto@redhat.com) * * All rights reserved. * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include -#include #include /* * syslog prioritynames, facility names to value mapping * Some C libraries build this in to their headers, but it is non-portable * so logsys supplies its own version. */ struct syslog_names { const char *c_name; int c_val; }; static struct syslog_names prioritynames[] = { { "alert", LOG_ALERT }, { "crit", LOG_CRIT }, { "debug", LOG_DEBUG }, { "emerg", LOG_EMERG }, { "err", LOG_ERR }, { "error", LOG_ERR }, { "info", LOG_INFO }, { "notice", LOG_NOTICE }, { "warning", LOG_WARNING }, { NULL, -1 } }; #define MAX_FILES_PER_SUBSYS 32 #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define IPC_LOGSYS_SIZE 8192*64 #else #define IPC_LOGSYS_SIZE 8192*1024 #endif /* * need unlogical order to preserve 64bit alignment */ struct logsys_logger { char subsys[LOGSYS_MAX_SUBSYS_NAMELEN]; /* subsystem name */ char *logfile; /* log to file */ unsigned int mode; /* subsystem mode */ unsigned int debug; /* debug on|off|trace */ int syslog_priority; /* priority */ int logfile_priority; /* priority to file */ int init_status; /* internal field to handle init queues for subsystems */ int32_t target_id; char *files[MAX_FILES_PER_SUBSYS]; int32_t file_idx; int32_t dirty; }; /* values for logsys_logger init_status */ #define LOGSYS_LOGGER_INIT_DONE 0 #define LOGSYS_LOGGER_NEEDS_INIT 1 static int logsys_system_needs_init = LOGSYS_LOGGER_NEEDS_INIT; static struct logsys_logger logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT + 1]; static pthread_mutex_t logsys_config_mutex = PTHREAD_MUTEX_INITIALIZER; static int32_t _logsys_config_mode_set_unlocked(int32_t subsysid, uint32_t new_mode); static void _logsys_config_apply_per_file(int32_t s, const char *filename); static void _logsys_config_apply_per_subsys(int32_t s); static void _logsys_subsys_filename_add (int32_t s, const char *filename); static void logsys_file_format_get(char* file_format, int buf_len); static char *format_buffer=NULL; static int logsys_thread_started = 0; static int _logsys_config_subsys_get_unlocked (const char *subsys) { unsigned int i; if (!subsys) { return LOGSYS_MAX_SUBSYS_COUNT; } for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { if (strcmp (logsys_loggers[i].subsys, subsys) == 0) { return i; } } return (-1); } /* * we need a version that can work when somebody else is already * holding a config mutex lock or we will never get out of here */ static int logsys_config_file_set_unlocked ( int subsysid, const char **error_string, const char *file) { static char error_string_response[512]; int i; char file_format[128]; if (logsys_loggers[subsysid].target_id > 0) { int32_t f; for (f = 0; f < logsys_loggers[subsysid].file_idx; f++) { qb_log_filter_ctl(logsys_loggers[subsysid].target_id, QB_LOG_FILTER_REMOVE, QB_LOG_FILTER_FILE, logsys_loggers[subsysid].files[f], LOG_TRACE); } } logsys_loggers[subsysid].dirty = QB_TRUE; if (file == NULL) { return (0); } if (logsys_loggers[subsysid].target_id > 0 && logsys_loggers[subsysid].logfile != NULL && strcmp(file, logsys_loggers[subsysid].logfile) == 0) { return (0); } if (strlen(file) >= PATH_MAX) { snprintf (error_string_response, sizeof(error_string_response), "%s: logfile name exceed maximum system filename length", logsys_loggers[subsysid].subsys); *error_string = error_string_response; return (-1); } if (logsys_loggers[subsysid].logfile != NULL) { free(logsys_loggers[subsysid].logfile); logsys_loggers[subsysid].logfile = NULL; } logsys_loggers[subsysid].logfile = strdup(file); if (logsys_loggers[subsysid].logfile == NULL) { snprintf (error_string_response, sizeof(error_string_response), "Unable to allocate memory for logfile '%s'", file); *error_string = error_string_response; return (-1); } for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { if ((logsys_loggers[i].logfile != NULL) && (strcmp (logsys_loggers[i].logfile, file) == 0) && (i != subsysid)) { /* we have found another subsys with this config file * so add a filter */ logsys_loggers[subsysid].target_id = logsys_loggers[i].target_id; return (0); } } if (logsys_loggers[subsysid].target_id > 0) { int num_using_current = 0; for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { if (logsys_loggers[subsysid].target_id == logsys_loggers[i].target_id) { num_using_current++; } } if (num_using_current == 1) { /* no one else is using this close it */ qb_log_file_close(logsys_loggers[subsysid].target_id); } } logsys_loggers[subsysid].target_id = qb_log_file_open(file); if (logsys_loggers[subsysid].target_id < 0) { int err = -logsys_loggers[subsysid].target_id; char error_str[LOGSYS_MAX_PERROR_MSG_LEN]; const char *error_ptr; error_ptr = qb_strerror_r(err, error_str, sizeof(error_str)); free(logsys_loggers[subsysid].logfile); logsys_loggers[subsysid].logfile = NULL; snprintf (error_string_response, sizeof(error_string_response), "Can't open logfile '%s' for reason: %s (%d)", file, error_ptr, err); *error_string = error_string_response; return (-1); } logsys_file_format_get(file_format, 128); qb_log_format_set(logsys_loggers[subsysid].target_id, file_format); qb_log_ctl(logsys_loggers[subsysid].target_id, QB_LOG_CONF_ENABLED, (logsys_loggers[subsysid].mode & LOGSYS_MODE_OUTPUT_FILE)); if (logsys_thread_started) { qb_log_ctl(logsys_loggers[subsysid].target_id, QB_LOG_CONF_THREADED, QB_TRUE); } return (0); } static void logsys_subsys_init ( const char *subsys, int subsysid) { if (logsys_system_needs_init == LOGSYS_LOGGER_NEEDS_INIT) { logsys_loggers[subsysid].init_status = LOGSYS_LOGGER_NEEDS_INIT; } else { logsys_loggers[subsysid].mode = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode; logsys_loggers[subsysid].debug = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].debug; logsys_loggers[subsysid].syslog_priority = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].syslog_priority; logsys_loggers[subsysid].logfile_priority = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].logfile_priority; logsys_loggers[subsysid].init_status = LOGSYS_LOGGER_INIT_DONE; } strncpy (logsys_loggers[subsysid].subsys, subsys, sizeof (logsys_loggers[subsysid].subsys)); logsys_loggers[subsysid].subsys[ sizeof (logsys_loggers[subsysid].subsys) - 1] = '\0'; logsys_loggers[subsysid].file_idx = 0; } static const char *_logsys_tags_stringify(uint32_t tags) { if (tags == QB_LOG_TAG_LIBQB_MSG) { return "QB"; } else { return logsys_loggers[tags].subsys; } } void logsys_system_fini (void) { int i; int f; for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) { free(logsys_loggers[i].logfile); for (f = 0; f < logsys_loggers[i].file_idx; f++) { free(logsys_loggers[i].files[f]); } } qb_log_fini (); } /* * Internal API - exported */ int _logsys_system_setup( const char *mainsystem, unsigned int mode, int syslog_facility, int syslog_priority) { int i; int32_t fidx; char tempsubsys[LOGSYS_MAX_SUBSYS_NAMELEN]; int blackbox_enable_res; if ((mainsystem == NULL) || (strlen(mainsystem) >= LOGSYS_MAX_SUBSYS_NAMELEN)) { return -1; } /* * Setup libqb as a subsys */ i = _logsys_subsys_create ("QB", "array.c,log.c,log_syslog.c,log_blackbox.c,log_format.c," "log_file.c,log_dcs.c,log_thread.c,ipc_shm.c,ipcs.c,ipc_us.c,loop.c," "loop_poll_epoll.c,loop_job.c,loop_poll_poll.c,loop_poll_kqueue.c," "loop_timerlist.c,loop_poll.c,ringbuffer.c,ringbuffer_helper.c,trie.c," "map.c,skiplist.c,rpl_sem.c,hdb.c,unix.c,hashtable.c,strlcpy.c,ipc_socket.c," "strchrnul.c,ipc_setup.c,strlcat.c"); if (i < 0) { return -1; } /* * name clash * _logsys_subsys_filename_add (i, "util.c"); */ /* * This file (logsys.c) is not exactly QB. We need tag for logsys.c if flightrecorder init * fails, and QB seems to be closest. */ _logsys_subsys_filename_add (i, "logsys.c"); i = LOGSYS_MAX_SUBSYS_COUNT; pthread_mutex_lock (&logsys_config_mutex); snprintf(logsys_loggers[i].subsys, LOGSYS_MAX_SUBSYS_NAMELEN, "%s", mainsystem); logsys_loggers[i].mode = mode; logsys_loggers[i].debug = LOGSYS_DEBUG_OFF; logsys_loggers[i].file_idx = 0; logsys_loggers[i].logfile_priority = syslog_priority; logsys_loggers[i].syslog_priority = syslog_priority; qb_log_init(mainsystem, syslog_facility, syslog_priority); if (logsys_loggers[i].mode & LOGSYS_MODE_OUTPUT_STDERR) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); } else { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); } if (logsys_loggers[i].mode & LOGSYS_MODE_OUTPUT_SYSLOG) { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); } else { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); } qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_PRIORITY_BUMP, LOG_INFO - LOG_DEBUG); qb_log_filter_ctl(QB_LOG_BLACKBOX, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_TRACE); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, IPC_LOGSYS_SIZE); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_THREADED, QB_FALSE); blackbox_enable_res = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); if (logsys_format_set(NULL) == -1) { return -1; } qb_log_tags_stringify_fn_set(_logsys_tags_stringify); logsys_loggers[i].init_status = LOGSYS_LOGGER_INIT_DONE; logsys_system_needs_init = LOGSYS_LOGGER_INIT_DONE; for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) { if ((strcmp (logsys_loggers[i].subsys, "") != 0) && (logsys_loggers[i].init_status == LOGSYS_LOGGER_NEEDS_INIT)) { fidx = logsys_loggers[i].file_idx; strncpy (tempsubsys, logsys_loggers[i].subsys, sizeof (tempsubsys)); tempsubsys[sizeof (tempsubsys) - 1] = '\0'; logsys_subsys_init(tempsubsys, i); logsys_loggers[i].file_idx = fidx; _logsys_config_mode_set_unlocked(i, logsys_loggers[i].mode); _logsys_config_apply_per_subsys(i); } } if (blackbox_enable_res < 0) { LOGSYS_PERROR (-blackbox_enable_res, LOGSYS_LEVEL_WARNING, "Unable to initialize log flight recorder. "\ "The most common cause of this error is " \ "not enough space on /dev/shm. Corosync will continue work, " \ "but blackbox will not be available"); } pthread_mutex_unlock (&logsys_config_mutex); return (0); } static void _logsys_subsys_filename_add (int32_t s, const char *filename) { int i; if (filename == NULL) { return; } assert(logsys_loggers[s].file_idx < MAX_FILES_PER_SUBSYS); assert(logsys_loggers[s].file_idx >= 0); for (i = 0; i < logsys_loggers[s].file_idx; i++) { if (strcmp(logsys_loggers[s].files[i], filename) == 0) { return; } } logsys_loggers[s].files[logsys_loggers[s].file_idx++] = strdup(filename); if (logsys_system_needs_init == LOGSYS_LOGGER_INIT_DONE) { _logsys_config_apply_per_file(s, filename); } } int _logsys_subsys_create (const char *subsys, const char *filename) { int i; if ((subsys == NULL) || (strlen(subsys) >= LOGSYS_MAX_SUBSYS_NAMELEN)) { return -1; } pthread_mutex_lock (&logsys_config_mutex); i = _logsys_config_subsys_get_unlocked (subsys); if ((i > -1) && (i < LOGSYS_MAX_SUBSYS_COUNT)) { _logsys_subsys_filename_add(i, filename); pthread_mutex_unlock (&logsys_config_mutex); return i; } for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) { if (strcmp (logsys_loggers[i].subsys, "") == 0) { logsys_subsys_init(subsys, i); _logsys_subsys_filename_add(i, filename); break; } } if (i >= LOGSYS_MAX_SUBSYS_COUNT) { i = -1; } pthread_mutex_unlock (&logsys_config_mutex); return i; } int _logsys_config_subsys_get (const char *subsys) { unsigned int i; pthread_mutex_lock (&logsys_config_mutex); i = _logsys_config_subsys_get_unlocked (subsys); pthread_mutex_unlock (&logsys_config_mutex); return i; } static int32_t _logsys_config_mode_set_unlocked(int32_t subsysid, uint32_t new_mode) { if ( logsys_loggers[subsysid].mode == new_mode) { return 0; } if (logsys_loggers[subsysid].target_id > 0) { qb_log_ctl(logsys_loggers[subsysid].target_id, QB_LOG_CONF_ENABLED, (new_mode & LOGSYS_MODE_OUTPUT_FILE)); } if (subsysid == LOGSYS_MAX_SUBSYS_COUNT) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, (new_mode & LOGSYS_MODE_OUTPUT_STDERR)); qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, (new_mode & LOGSYS_MODE_OUTPUT_SYSLOG)); } logsys_loggers[subsysid].mode = new_mode; return 0; } int logsys_config_mode_set (const char *subsys, unsigned int mode) { int i; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i >= 0) { i = _logsys_config_mode_set_unlocked(i, mode); } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { _logsys_config_mode_set_unlocked(i, mode); } i = 0; } pthread_mutex_unlock (&logsys_config_mutex); return i; } unsigned int logsys_config_mode_get (const char *subsys) { int i; i = _logsys_config_subsys_get (subsys); if (i < 0) { return i; } return logsys_loggers[i].mode; } int logsys_config_file_set ( const char *subsys, const char **error_string, const char *file) { int i; int res; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i < 0) { res = i; } else { res = logsys_config_file_set_unlocked(i, error_string, file); } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { res = logsys_config_file_set_unlocked(i, error_string, file); if (res < 0) { break; } } } pthread_mutex_unlock (&logsys_config_mutex); return res; } static void logsys_file_format_get(char* file_format, int buf_len) { char *per_t; file_format[0] = '\0'; per_t = strstr(format_buffer, "%t"); if (per_t) { strcpy(file_format, "%t [%P] %H %N"); per_t += 2; strncat(file_format, per_t, buf_len - strlen("%t [%P] %H %N")); } else { strcpy(file_format, "[%P] %H %N"); strncat(file_format, format_buffer, buf_len - strlen("[%P] %H %N")); } } int logsys_format_set (const char *format) { int i; int c; int w; int reminder; char syslog_format[128]; char file_format[128]; if (format_buffer) { free(format_buffer); format_buffer = NULL; } format_buffer = strdup(format ? format : "%7p [%6g] %b"); if (format_buffer == NULL) { return -1; } qb_log_format_set(QB_LOG_STDERR, format_buffer); logsys_file_format_get(file_format, 128); for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { if (logsys_loggers[i].target_id > 0) { qb_log_format_set(logsys_loggers[i].target_id, file_format); } } /* * This just goes through and remove %t and %p from * the format string for syslog. */ w = 0; memset(syslog_format, '\0', sizeof(syslog_format)); for (c = 0; c < strlen(format_buffer); c++) { if (format_buffer[c] == '%') { reminder = c; for (c++; c < strlen(format_buffer); c++) { if (isdigit(format_buffer[c])) { continue; } if (format_buffer[c] == 't' || format_buffer[c] == 'p') { c++; } else { c = reminder; } break; } } syslog_format[w] = format_buffer[c]; w++; } qb_log_format_set(QB_LOG_SYSLOG, syslog_format); return 0; } char *logsys_format_get (void) { return format_buffer; } int logsys_config_syslog_facility_set ( const char *subsys, unsigned int facility) { return qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, facility); } int logsys_config_syslog_priority_set ( const char *subsys, unsigned int priority) { int i; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i >= 0) { logsys_loggers[i].syslog_priority = priority; logsys_loggers[i].dirty = QB_TRUE; i = 0; } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { logsys_loggers[i].syslog_priority = priority; logsys_loggers[i].dirty = QB_TRUE; } i = 0; } pthread_mutex_unlock (&logsys_config_mutex); return i; } int logsys_config_logfile_priority_set ( const char *subsys, unsigned int priority) { int i; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i >= 0) { logsys_loggers[i].logfile_priority = priority; logsys_loggers[i].dirty = QB_TRUE; i = 0; } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { logsys_loggers[i].logfile_priority = priority; logsys_loggers[i].dirty = QB_TRUE; } i = 0; } pthread_mutex_unlock (&logsys_config_mutex); return i; } static void _logsys_config_apply_per_file(int32_t s, const char *filename) { uint32_t syslog_priority = logsys_loggers[s].syslog_priority; uint32_t logfile_priority = logsys_loggers[s].logfile_priority; qb_log_filter_ctl(s, QB_LOG_TAG_SET, QB_LOG_FILTER_FILE, filename, LOG_TRACE); qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_REMOVE, QB_LOG_FILTER_FILE, filename, LOG_TRACE); qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_REMOVE, QB_LOG_FILTER_FILE, filename, LOG_TRACE); if (logsys_loggers[s].target_id > 0) { qb_log_filter_ctl(logsys_loggers[s].target_id, QB_LOG_FILTER_REMOVE, QB_LOG_FILTER_FILE, filename, LOG_TRACE); } if (logsys_loggers[s].debug != LOGSYS_DEBUG_OFF) { switch (logsys_loggers[s].debug) { case LOGSYS_DEBUG_ON: syslog_priority = LOG_DEBUG; logfile_priority = LOG_DEBUG; break; case LOGSYS_DEBUG_TRACE: syslog_priority = LOG_TRACE; logfile_priority = LOG_TRACE; break; default: assert(0); } } qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, filename, syslog_priority); qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, filename, logfile_priority); if (logsys_loggers[s].target_id > 0) { qb_log_filter_ctl(logsys_loggers[s].target_id, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, filename, logfile_priority); } } static void _logsys_config_apply_per_subsys(int32_t s) { int32_t f; for (f = 0; f < logsys_loggers[s].file_idx; f++) { _logsys_config_apply_per_file(s, logsys_loggers[s].files[f]); } if (logsys_loggers[s].target_id > 0) { qb_log_ctl(logsys_loggers[s].target_id, QB_LOG_CONF_ENABLED, (logsys_loggers[s].mode & LOGSYS_MODE_OUTPUT_FILE)); } logsys_loggers[s].dirty = QB_FALSE; } void logsys_config_apply(void) { int32_t s; for (s = 0; s <= LOGSYS_MAX_SUBSYS_COUNT; s++) { if (strcmp(logsys_loggers[s].subsys, "") == 0) { continue; } _logsys_config_apply_per_subsys(s); } } int logsys_config_debug_set ( const char *subsys, unsigned int debug) { int i; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i >= 0) { logsys_loggers[i].dirty = QB_TRUE; logsys_loggers[i].debug = debug; i = 0; } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { logsys_loggers[i].debug = debug; logsys_loggers[i].dirty = QB_TRUE; } i = 0; } pthread_mutex_unlock (&logsys_config_mutex); return i; } int logsys_priority_id_get (const char *name) { unsigned int i; for (i = 0; prioritynames[i].c_name != NULL; i++) { if (strcasecmp(name, prioritynames[i].c_name) == 0) { return (prioritynames[i].c_val); } } return (-1); } int logsys_thread_start (void) { int i; int err; err = qb_log_thread_start(); if (err != 0) { return (err); } qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_THREADED, QB_TRUE); for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { if (logsys_loggers[i].target_id > 0) { qb_log_ctl(logsys_loggers[i].target_id, QB_LOG_CONF_THREADED, QB_TRUE); } } logsys_thread_started = 1; return (0); } diff --git a/exec/main.c b/exec/main.c index 2a286d85..ba8e06e1 100644 --- a/exec/main.c +++ b/exec/main.c @@ -1,1425 +1,1424 @@ /* * Copyright (c) 2002-2006 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** * \mainpage Corosync * * This is the doxygen generated developer documentation for the Corosync * project. For more information about Corosync, please see the project * web site, corosync.org. * * \section license License * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include "quorum.h" #include "totemsrp.h" #include "logconfig.h" #include "totemconfig.h" #include "main.h" #include "sync.h" #include "timer.h" #include "util.h" #include "apidef.h" #include "service.h" #include "schedwrk.h" #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define IPC_LOGSYS_SIZE 1024*64 #else #define IPC_LOGSYS_SIZE 8192*128 #endif LOGSYS_DECLARE_SYSTEM ("corosync", LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG, LOG_DAEMON, LOG_INFO); LOGSYS_DECLARE_SUBSYS ("MAIN"); #define SERVER_BACKLOG 5 static int sched_priority = 0; static unsigned int service_count = 32; static struct totem_logging_configuration totem_logging_configuration; static struct corosync_api_v1 *api = NULL; static int sync_in_process = 1; static qb_loop_t *corosync_poll_handle; struct sched_param global_sched_param; static corosync_timer_handle_t corosync_stats_timer_handle; static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; static int ip_version = AF_INET; qb_loop_t *cs_poll_handle_get (void) { return (corosync_poll_handle); } int cs_poll_dispatch_add (qb_loop_t * handle, int fd, int events, void *data, int (*dispatch_fn) (int fd, int revents, void *data)) { return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data, dispatch_fn); } int cs_poll_dispatch_delete(qb_loop_t * handle, int fd) { return qb_loop_poll_del(handle, fd); } void corosync_state_dump (void) { int i; for (i = 0; i < SERVICES_COUNT_MAX; i++) { if (corosync_service[i] && corosync_service[i]->exec_dump_fn) { corosync_service[i]->exec_dump_fn (); } } } static void corosync_blackbox_write_to_file (void) { char fname[PATH_MAX]; char fdata_fname[PATH_MAX]; char time_str[PATH_MAX]; struct tm cur_time_tm; time_t cur_time_t; ssize_t res; cur_time_t = time(NULL); localtime_r(&cur_time_t, &cur_time_tm); strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm); snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld", get_run_dir(), time_str, (long long int)getpid()); if ((res = qb_log_blackbox_write_to_file(fname)) < 0) { LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file"); } snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_run_dir()); unlink(fdata_fname); if (symlink(fname, fdata_fname) == -1) { log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'", fname, fdata_fname); } } static void unlink_all_completed (void) { api->timer_delete (corosync_stats_timer_handle); qb_loop_stop (corosync_poll_handle); icmap_fini(); } void corosync_shutdown_request (void) { corosync_service_unlink_all (api, unlink_all_completed); } static int32_t sig_diag_handler (int num, void *data) { corosync_state_dump (); return 0; } static int32_t sig_exit_handler (int num, void *data) { log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal"); corosync_service_unlink_all (api, unlink_all_completed); return 0; } static void sigsegv_handler (int num) { (void)signal (SIGSEGV, SIG_DFL); corosync_blackbox_write_to_file (); qb_log_fini(); raise (SIGSEGV); } /* * QB wrapper for real signal handler */ static int32_t sig_segv_handler (int num, void *data) { sigsegv_handler(num); return 0; } static void sigabrt_handler (int num) { (void)signal (SIGABRT, SIG_DFL); corosync_blackbox_write_to_file (); qb_log_fini(); raise (SIGABRT); } /* * QB wrapper for real signal handler */ static int32_t sig_abrt_handler (int num, void *data) { sigabrt_handler(num); return 0; } #define LOCALHOST_IP inet_addr("127.0.0.1") static void *corosync_group_handle; static struct totempg_group corosync_group = { .group = "a", .group_len = 1 }; static void serialize_lock (void) { } static void serialize_unlock (void) { } static void corosync_sync_completed (void) { log_printf (LOGSYS_LEVEL_NOTICE, "Completed service synchronization, ready to provide service."); sync_in_process = 0; cs_ipcs_sync_state_changed(sync_in_process); cs_ipc_allow_connections(1); /* * Inform totem to start using new message queue again */ totempg_trans_ack(); } static int corosync_sync_callbacks_retrieve ( int service_id, struct sync_callbacks *callbacks) { if (corosync_service[service_id] == NULL) { return (-1); } if (callbacks == NULL) { return (0); } callbacks->name = corosync_service[service_id]->name; callbacks->sync_init = corosync_service[service_id]->sync_init; callbacks->sync_process = corosync_service[service_id]->sync_process; callbacks->sync_activate = corosync_service[service_id]->sync_activate; callbacks->sync_abort = corosync_service[service_id]->sync_abort; return (0); } static struct memb_ring_id corosync_ring_id; static void member_object_joined (unsigned int nodeid) { char member_ip[ICMAP_KEYNAME_MAXLEN]; char member_join_count[ICMAP_KEYNAME_MAXLEN]; char member_status[ICMAP_KEYNAME_MAXLEN]; snprintf(member_ip, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.ip", nodeid); snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid); snprintf(member_status, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.status", nodeid); if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) { icmap_inc(member_join_count); icmap_set_string(member_status, "joined"); } else { icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid)); icmap_set_uint32(member_join_count, 1); icmap_set_string(member_status, "joined"); } log_printf (LOGSYS_LEVEL_DEBUG, "Member joined: %s", api->totem_ifaces_print (nodeid)); } static void member_object_left (unsigned int nodeid) { char member_status[ICMAP_KEYNAME_MAXLEN]; snprintf(member_status, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.status", nodeid); icmap_set_string(member_status, "left"); log_printf (LOGSYS_LEVEL_DEBUG, "Member left: %s", api->totem_ifaces_print (nodeid)); } static void confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int i; int abort_activate = 0; if (sync_in_process == 1) { abort_activate = 1; } sync_in_process = 1; cs_ipcs_sync_state_changed(sync_in_process); memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id)); for (i = 0; i < left_list_entries; i++) { member_object_left (left_list[i]); } for (i = 0; i < joined_list_entries; i++) { member_object_joined (joined_list[i]); } /* * Call configuration change for all services */ for (i = 0; i < service_count; i++) { if (corosync_service[i] && corosync_service[i]->confchg_fn) { corosync_service[i]->confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } } if (abort_activate) { sync_abort (); } if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) { sync_save_transitional (member_list, member_list_entries, ring_id); } if (configuration_type == TOTEM_CONFIGURATION_REGULAR) { sync_start (member_list, member_list_entries, ring_id); } } static void priv_drop (void) { return; /* TODO: we are still not dropping privs */ } static void corosync_tty_detach (void) { int devnull; /* * Disconnect from TTY if this is not a debug run */ switch (fork ()) { case -1: corosync_exit_error (COROSYNC_DONE_FORK); break; case 0: /* * child which is disconnected, run this process */ break; default: exit (0); break; } /* Create new session */ (void)setsid(); /* * Map stdin/out/err to /dev/null. */ devnull = open("/dev/null", O_RDWR); if (devnull == -1) { corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0 || dup2(devnull, 2) < 0) { close(devnull); corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } close(devnull); } static void corosync_mlockall (void) { int res; struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; #ifndef RLIMIT_MEMLOCK #define RLIMIT_MEMLOCK RLIMIT_VMEM #endif setrlimit (RLIMIT_MEMLOCK, &rlimit); res = mlockall (MCL_CURRENT | MCL_FUTURE); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults"); }; } static void corosync_totem_stats_updater (void *data) { totempg_stats_t * stats; uint32_t total_mtt_rx_token; uint32_t total_backlog_calc; uint32_t total_token_holdtime; int t, prev; int32_t token_count; stats = api->totem_get_stats(); icmap_set_uint32("runtime.totem.pg.msg_reserved", stats->msg_reserved); icmap_set_uint32("runtime.totem.pg.msg_queue_avail", stats->msg_queue_avail); icmap_set_uint64("runtime.totem.pg.srp.orf_token_tx", stats->srp->orf_token_tx); icmap_set_uint64("runtime.totem.pg.srp.orf_token_rx", stats->srp->orf_token_rx); icmap_set_uint64("runtime.totem.pg.srp.memb_merge_detect_tx", stats->srp->memb_merge_detect_tx); icmap_set_uint64("runtime.totem.pg.srp.memb_merge_detect_rx", stats->srp->memb_merge_detect_rx); icmap_set_uint64("runtime.totem.pg.srp.memb_join_tx", stats->srp->memb_join_tx); icmap_set_uint64("runtime.totem.pg.srp.memb_join_rx", stats->srp->memb_join_rx); icmap_set_uint64("runtime.totem.pg.srp.mcast_tx", stats->srp->mcast_tx); icmap_set_uint64("runtime.totem.pg.srp.mcast_retx", stats->srp->mcast_retx); icmap_set_uint64("runtime.totem.pg.srp.mcast_rx", stats->srp->mcast_rx); icmap_set_uint64("runtime.totem.pg.srp.memb_commit_token_tx", stats->srp->memb_commit_token_tx); icmap_set_uint64("runtime.totem.pg.srp.memb_commit_token_rx", stats->srp->memb_commit_token_rx); icmap_set_uint64("runtime.totem.pg.srp.token_hold_cancel_tx", stats->srp->token_hold_cancel_tx); icmap_set_uint64("runtime.totem.pg.srp.token_hold_cancel_rx", stats->srp->token_hold_cancel_rx); icmap_set_uint64("runtime.totem.pg.srp.operational_entered", stats->srp->operational_entered); icmap_set_uint64("runtime.totem.pg.srp.operational_token_lost", stats->srp->operational_token_lost); icmap_set_uint64("runtime.totem.pg.srp.gather_entered", stats->srp->gather_entered); icmap_set_uint64("runtime.totem.pg.srp.gather_token_lost", stats->srp->gather_token_lost); icmap_set_uint64("runtime.totem.pg.srp.commit_entered", stats->srp->commit_entered); icmap_set_uint64("runtime.totem.pg.srp.commit_token_lost", stats->srp->commit_token_lost); icmap_set_uint64("runtime.totem.pg.srp.recovery_entered", stats->srp->recovery_entered); icmap_set_uint64("runtime.totem.pg.srp.recovery_token_lost", stats->srp->recovery_token_lost); icmap_set_uint64("runtime.totem.pg.srp.consensus_timeouts", stats->srp->consensus_timeouts); icmap_set_uint64("runtime.totem.pg.srp.rx_msg_dropped", stats->srp->rx_msg_dropped); icmap_set_uint32("runtime.totem.pg.srp.continuous_gather", stats->srp->continuous_gather); icmap_set_uint32("runtime.totem.pg.srp.continuous_sendmsg_failures", stats->srp->continuous_sendmsg_failures); icmap_set_uint8("runtime.totem.pg.srp.firewall_enabled_or_nic_failure", stats->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0); if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER || stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) { log_printf (LOGSYS_LEVEL_WARNING, "Totem is unable to form a cluster because of an " "operating system or network fault. The most common " "cause of this message is that the local firewall is " "configured improperly."); icmap_set_uint8("runtime.totem.pg.srp.firewall_enabled_or_nic_failure", 1); } else { icmap_set_uint8("runtime.totem.pg.srp.firewall_enabled_or_nic_failure", 0); } total_mtt_rx_token = 0; total_token_holdtime = 0; total_backlog_calc = 0; token_count = 0; t = stats->srp->latest_token; while (1) { if (t == 0) prev = TOTEM_TOKEN_STATS_MAX - 1; else prev = t - 1; if (prev == stats->srp->earliest_token) break; /* if tx == 0, then dropped token (not ours) */ if (stats->srp->token[t].tx != 0 || (stats->srp->token[t].rx - stats->srp->token[prev].rx) > 0 ) { total_mtt_rx_token += (stats->srp->token[t].rx - stats->srp->token[prev].rx); total_token_holdtime += (stats->srp->token[t].tx - stats->srp->token[t].rx); total_backlog_calc += stats->srp->token[t].backlog_calc; token_count++; } t = prev; } if (token_count) { icmap_set_uint32("runtime.totem.pg.srp.mtt_rx_token", (total_mtt_rx_token / token_count)); icmap_set_uint32("runtime.totem.pg.srp.avg_token_workload", (total_token_holdtime / token_count)); icmap_set_uint32("runtime.totem.pg.srp.avg_backlog_calc", (total_backlog_calc / token_count)); } cs_ipcs_stats_update(); api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void corosync_totem_stats_init (void) { icmap_set_uint32("runtime.totem.pg.srp.mtt_rx_token", 0); icmap_set_uint32("runtime.totem.pg.srp.avg_token_workload", 0); icmap_set_uint32("runtime.totem.pg.srp.avg_backlog_calc", 0); /* start stats timer */ api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { const struct qb_ipc_request_header *header; int32_t service; int32_t fn_id; uint32_t id; header = msg; if (endian_conversion_required) { id = swab32 (header->id); } else { id = header->id; } /* * Call the proper executive handler */ service = id >> 16; fn_id = id & 0xffff; if (!corosync_service[service]) { return; } if (fn_id >= corosync_service[service]->exec_engine_count) { log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)", fn_id, service, corosync_service[service]->exec_engine_count); return; } icmap_fast_inc(service_stats_rx[service][fn_id]); if (endian_conversion_required) { assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL); corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn ((void *)msg); } corosync_service[service]->exec_engine[fn_id].exec_handler_fn (msg, nodeid); } int main_mcast ( const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee) { const struct qb_ipc_request_header *req = iovec->iov_base; int32_t service; int32_t fn_id; service = req->id >> 16; fn_id = req->id & 0xffff; if (corosync_service[service]) { icmap_fast_inc(service_stats_tx[service][fn_id]); } return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee)); } static void corosync_ring_id_create_or_load ( struct memb_ring_id *memb_ring_id, const struct totem_ip_address *addr) { int fd; int res = 0; char filename[PATH_MAX]; snprintf (filename, sizeof(filename), "%s/ringid_%s", get_run_dir(), totemip_print (addr)); fd = open (filename, O_RDONLY, 0700); /* * If file can be opened and read, read the ring id */ if (fd != -1) { res = read (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); } /* * If file could not be opened or read, create a new ring id */ if ((fd == -1) || (res != sizeof (uint64_t))) { memb_ring_id->seq = 0; umask(0); fd = open (filename, O_CREAT|O_RDWR, 0700); if (fd != -1) { res = write (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR, "Couldn't write ringid file '%s'", filename); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR, "Couldn't create ringid file '%s'", filename); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } totemip_copy(&memb_ring_id->rep, addr); assert (!totemip_zero_check(&memb_ring_id->rep)); } static void corosync_ring_id_store ( const struct memb_ring_id *memb_ring_id, const struct totem_ip_address *addr) { char filename[PATH_MAX]; int fd; int res; snprintf (filename, sizeof(filename), "%s/ringid_%s", get_run_dir(), totemip_print (addr)); fd = open (filename, O_WRONLY, 0700); if (fd == -1) { fd = open (filename, O_CREAT|O_RDWR, 0700); } if (fd == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "Couldn't store new ring id %llx to stable storage", memb_ring_id->seq); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } log_printf (LOGSYS_LEVEL_DEBUG, "Storing new sequence id for ring %llx", memb_ring_id->seq); res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq)); close (fd); if (res != sizeof(memb_ring_id->seq)) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "Couldn't store new ring id %llx to stable storage", memb_ring_id->seq); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } static qb_loop_timer_handle recheck_the_q_level_timer; void corosync_recheck_the_q_level(void *data) { totempg_check_q_level(corosync_group_handle); if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) { qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer); } } struct sending_allowed_private_data_struct { int reserved_msgs; }; int corosync_sending_allowed ( unsigned int service, unsigned int id, const void *msg, void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; struct iovec reserve_iovec; struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; int sending_allowed; reserve_iovec.iov_base = (char *)header; reserve_iovec.iov_len = header->size; pd->reserved_msgs = totempg_groups_joined_reserve ( corosync_group_handle, &reserve_iovec, 1); if (pd->reserved_msgs == -1) { return -EINVAL; } sending_allowed = QB_FALSE; if (corosync_quorum_is_quorate() == 1 || corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { // we are quorate // now check flow control if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs && sync_in_process == 0) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs == 0) { return -ENOBUFS; } else /* (sync_in_process) */ { return -EINPROGRESS; } } else { return -EHOSTUNREACH; } return (sending_allowed); } void corosync_sending_allowed_release (void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; if (pd->reserved_msgs == -1) { return; } totempg_groups_joined_release (pd->reserved_msgs); } int message_source_is_local (const mar_message_source_t *source) { int ret = 0; assert (source != NULL); if (source->nodeid == totempg_my_nodeid_get ()) { ret = 1; } return ret; } void message_source_set ( mar_message_source_t *source, void *conn) { assert ((source != NULL) && (conn != NULL)); memset (source, 0, sizeof (mar_message_source_t)); source->nodeid = totempg_my_nodeid_get (); source->conn = conn; } struct scheduler_pause_timeout_data { struct totem_config *totem_config; qb_loop_timer_handle handle; unsigned long long tv_prev; unsigned long long max_tv_diff; }; static void timer_function_scheduler_timeout (void *data) { struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data; unsigned long long tv_current; unsigned long long tv_diff; tv_current = qb_util_nano_current_get (); if (timeout_data->tv_prev == 0) { /* * Initial call -> just pretent everything is ok */ timeout_data->tv_prev = tv_current; timeout_data->max_tv_diff = 0; } tv_diff = tv_current - timeout_data->tv_prev; timeout_data->tv_prev = tv_current; if (tv_diff > timeout_data->max_tv_diff) { log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms " "(threshold is %0.4f ms). Consider token timeout increase.", (float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC); } /* * Set next threshold, because token_timeout can change */ timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8; qb_loop_timer_add (corosync_poll_handle, QB_LOOP_MED, timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3, timeout_data, timer_function_scheduler_timeout, &timeout_data->handle); } static void corosync_setscheduler (void) { #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER) int res; sched_priority = sched_get_priority_max (SCHED_RR); if (sched_priority != -1) { global_sched_param.sched_priority = sched_priority; res = sched_setscheduler (0, SCHED_RR, &global_sched_param); if (res == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "Could not set SCHED_RR at priority %d", global_sched_param.sched_priority); global_sched_param.sched_priority = 0; #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET qb_log_thread_priority_set (SCHED_OTHER, 0); #endif } else { /* * Turn on SCHED_RR in logsys system */ #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET res = qb_log_thread_priority_set (SCHED_RR, sched_priority); #else res = -1; #endif if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not set logsys thread priority." " Can't continue because of priority inversions."); corosync_exit_error (COROSYNC_DONE_LOGSETUP); } } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not get maximum scheduler priority"); sched_priority = 0; } #else log_printf(LOGSYS_LEVEL_WARNING, "The Platform is missing process priority setting features. Leaving at default."); #endif } /* The basename man page contains scary warnings about thread-safety and portability, hence this */ static const char *corosync_basename(const char *file_name) { char *base; base = strrchr (file_name, '/'); if (base) { return base + 1; } return file_name; } static void _logsys_log_printf(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) __attribute__((format(printf, 6, 7))); static void _logsys_log_printf(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) { va_list ap; va_start(ap, format); qb_log_from_external_source_va(function_name, corosync_basename(file_name), format, level, file_line, subsys, ap); va_end(ap); } static void fplay_key_change_notify_fn ( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) { fprintf(stderr,"Writetofile\n"); corosync_blackbox_write_to_file (); } if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) { fprintf(stderr,"statefump\n"); corosync_state_dump (); } } static void corosync_fplay_control_init (void) { icmap_track_t track = NULL; icmap_set_string("runtime.blackbox.dump_flight_data", "no"); icmap_set_string("runtime.blackbox.dump_state", "no"); icmap_track_add("runtime.blackbox.dump_flight_data", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, fplay_key_change_notify_fn, NULL, &track); icmap_track_add("runtime.blackbox.dump_state", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, fplay_key_change_notify_fn, NULL, &track); } /* * Set RO flag for keys, which ether doesn't make sense to change by user (statistic) * or which when changed are not reflected by runtime (totem.crypto_cipher, ...). * * Also some RO keys cannot be determined in this stage, so they are set later in * other functions (like nodelist.local_node_pos, ...) */ static void set_icmap_ro_keys_flag (void) { /* * Set RO flag for all keys of internal configuration and runtime statistics */ icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.connections.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE); icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE); /* * Set RO flag for constrete keys of configuration which can't be changed * during runtime */ icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE); icmap_set_ro_access("qb.ipc_type", CS_FALSE, CS_TRUE); icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE); icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE); } static void main_service_ready (void) { int res; /* * This must occur after totempg is initialized because "this_ip" must be set */ res = corosync_service_defaults_link_and_init (api); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services"); corosync_exit_error (COROSYNC_DONE_INIT_SERVICES); } cs_ipcs_init(); corosync_totem_stats_init (); corosync_fplay_control_init (); sync_init ( corosync_sync_callbacks_retrieve, corosync_sync_completed); } static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid) { struct flock lock; enum e_corosync_done err; char pid_s[17]; int fd_flag; int lf; err = COROSYNC_DONE_EXIT; lf = open (lockfile, O_WRONLY | O_CREAT, 0640); if (lf == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file."); return (COROSYNC_DONE_ACQUIRE_LOCK); } retry_fcntl: lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; if (fcntl (lf, F_SETLK, &lock) == -1) { switch (errno) { case EINTR: goto retry_fcntl; break; case EAGAIN: case EACCES: log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running."); err = COROSYNC_DONE_ALREADY_RUNNING; goto error_close; break; default: log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s", strerror(errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close; break; } } if (ftruncate (lf, 0) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s", strerror (errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } memset (pid_s, 0, sizeof (pid_s)); snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid); retry_write: if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) { if (errno == EINTR) { goto retry_write; } else { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } } if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } fd_flag |= FD_CLOEXEC; if (fcntl (lf, F_SETFD, fd_flag) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } return (err); error_close_unlink: unlink (lockfile); error_close: close (lf); return (err); } int main (int argc, char **argv, char **envp) { const char *error_string; struct totem_config totem_config; int res, ch; int background, setprio, testonly; struct stat stat_out; enum e_corosync_done flock_err; uint64_t totem_config_warnings; struct scheduler_pause_timeout_data scheduler_pause_timeout_data; /* default configuration */ background = 1; setprio = 1; testonly = 0; while ((ch = getopt (argc, argv, "fprtv")) != EOF) { switch (ch) { case 'f': background = 0; break; case 'p': setprio = 0; break; case 'r': setprio = 1; break; case 't': testonly = 1; break; case 'v': printf ("Corosync Cluster Engine, version '%s'\n", VERSION); printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n"); logsys_system_fini(); return EXIT_SUCCESS; break; default: fprintf(stderr, \ "usage:\n"\ " -f : Start application in foreground.\n"\ " -p : Do not set process priority.\n"\ " -t : Test configuration and exit.\n"\ " -r : Set round robin realtime scheduling (default).\n"\ " -v : Display version and SVN revision of Corosync and exit.\n"); logsys_system_fini(); return EXIT_FAILURE; } } /* * Set round robin realtime scheduling with priority 99 * Lock all memory to avoid page faults which may interrupt * application healthchecking */ if (setprio) { corosync_setscheduler (); } corosync_mlockall (); /* * Other signals are registered later via qb_loop_signal_add */ (void)signal (SIGSEGV, sigsegv_handler); (void)signal (SIGABRT, sigabrt_handler); #if MSG_NOSIGNAL != 0 (void)signal (SIGPIPE, SIG_IGN); #endif if (icmap_init() != CS_OK) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component."); corosync_exit_error (COROSYNC_DONE_ICMAP); } set_icmap_ro_keys_flag(); /* * Initialize the corosync_api_v1 definition */ api = apidef_get (); res = coroparse_configparse(icmap_get_global_map(), &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } res = corosync_log_config_read (&error_string); if (res == -1) { /* * if we are here, we _must_ flush the logsys queue * and try to inform that we couldn't read the config. * this is a desperate attempt before certain death * and there is no guarantee that we can print to stderr * nor that logsys is sending the messages where we expect. */ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); fprintf(stderr, "%s", error_string); syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); } if (!testonly) { log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.", VERSION); log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES ""); } /* * Make sure required directory is present */ res = stat (get_run_dir(), &stat_out); if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) { log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.", get_run_dir()); corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT); } res = chdir(get_run_dir()); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to run directory %s. " "Please make sure it has correct context and rights.", get_run_dir()); corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT); } res = totem_config_read (&totem_config, &error_string, &totem_config_warnings); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) { log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored."); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) { log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated."); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) { log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used."); } if (totem_config_warnings != 0) { log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist."); } res = totem_config_keyread (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } res = totem_config_validate (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } if (testonly) { corosync_exit_error (COROSYNC_DONE_EXIT); } ip_version = totem_config.ip_version; totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load; totem_config.totem_memb_ring_id_store = corosync_ring_id_store; totem_config.totem_logging_configuration = totem_logging_configuration; totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem," "totemmrp.c,totemrrp.c,totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c," "totempg.c,totemiba.c,totemudp.c,totemudpu.c,totemnet.c,totemknet.c"); totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR; totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE; totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG; totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE; totem_config.totem_logging_configuration.log_printf = _logsys_log_printf; logsys_config_apply(); /* * Now we are fully initialized. */ if (background) { corosync_tty_detach (); } corosync_poll_handle = qb_loop_create (); memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data)); scheduler_pause_timeout_data.totem_config = &totem_config; timer_function_scheduler_timeout (&scheduler_pause_timeout_data); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW, SIGUSR2, NULL, sig_diag_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGINT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGSEGV, NULL, sig_segv_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGABRT, NULL, sig_abrt_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGQUIT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGTERM, NULL, sig_exit_handler, NULL); if (logsys_thread_start() != 0) { log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread"); corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); } if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) { corosync_exit_error (flock_err); } /* * if totempg_initialize doesn't have root priveleges, it cannot * bind to a specific interface. This only matters if * there is more then one interface in a system, so * in this case, only a warning is printed */ /* * Join multicast group and setup delivery * and configuration change functions */ totempg_initialize ( corosync_poll_handle, &totem_config); totempg_service_ready_register ( main_service_ready); totempg_groups_initialize ( &corosync_group_handle, deliver_fn, confchg_fn); totempg_groups_join ( corosync_group_handle, &corosync_group, 1); /* * Drop root privleges to user 'corosync' * TODO: Don't really need full root capabilities; * needed capabilities are: * CAP_NET_RAW (bindtodevice) * CAP_SYS_NICE (setscheduler) * CAP_IPC_LOCK (mlockall) */ priv_drop (); schedwrk_init ( serialize_lock, serialize_unlock); /* * Start main processing loop */ qb_loop_run (corosync_poll_handle); /* * Exit was requested */ totempg_finalize (); /* * free the loop resources */ qb_loop_destroy (corosync_poll_handle); /* * free up the icmap */ /* * Remove pid lock file */ unlink (corosync_lock_file); corosync_exit_error (COROSYNC_DONE_EXIT); return EXIT_SUCCESS; } diff --git a/exec/mon.c b/exec/mon.c index 8b500fbb..3f71fb52 100644 --- a/exec/mon.c +++ b/exec/mon.c @@ -1,511 +1,511 @@ /* * Copyright (c) 2010-2012 Red Hat, Inc. * * All rights reserved. * * Author: Angus Salkeld * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include -#include +#include #include #include #include "fsm.h" #include "service.h" LOGSYS_DECLARE_SUBSYS ("MON"); /* * Service Interfaces required by service_message_handler struct */ static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api); static struct corosync_api_v1 *api; #define MON_DEFAULT_PERIOD 3000 #define MON_MIN_PERIOD 500 #define MON_MAX_PERIOD (120 * CS_TIME_MS_IN_SEC) struct corosync_service_engine mon_service_engine = { .name = "corosync resource monitoring service", .id = MON_SERVICE, .priority = 1, .private_data_size = 0, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, .lib_init_fn = NULL, .lib_exit_fn = NULL, .lib_engine = NULL, .lib_engine_count = 0, .exec_engine = NULL, .exec_engine_count = 0, .confchg_fn = NULL, .exec_init_fn = mon_exec_init_fn, .exec_dump_fn = NULL }; -static DECLARE_LIST_INIT (confchg_notify); +static QB_LIST_DECLARE (confchg_notify); struct resource_instance { const char *icmap_path; const char *name; corosync_timer_handle_t timer_handle; void (*update_stats_fn) (void *data); struct cs_fsm fsm; uint64_t period; icmap_value_types_t max_type; union { int32_t int32; double dbl; } max; }; static void mem_update_stats_fn (void *data); static void load_update_stats_fn (void *data); static struct resource_instance memory_used_inst = { .name = "memory_used", .icmap_path = "resources.system.memory_used.", .update_stats_fn = mem_update_stats_fn, .max_type = ICMAP_VALUETYPE_INT32, .max.int32 = INT32_MAX, .period = MON_DEFAULT_PERIOD, }; static struct resource_instance load_15min_inst = { .name = "load_15min", .icmap_path = "resources.system.load_15min.", .update_stats_fn = load_update_stats_fn, .max_type = ICMAP_VALUETYPE_DOUBLE, .max.dbl = INT32_MAX, .period = MON_DEFAULT_PERIOD, }; /* * F S M */ static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data); static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data); const char * mon_running_str = "running"; const char * mon_failed_str = "failed"; const char * mon_failure_str = "failure"; const char * mon_stopped_str = "stopped"; const char * mon_config_changed_str = "config_changed"; enum mon_resource_state { MON_S_STOPPED, MON_S_RUNNING, MON_S_FAILED }; enum mon_resource_event { MON_E_CONFIG_CHANGED, MON_E_FAILURE }; struct cs_fsm_entry mon_fsm_table[] = { { MON_S_STOPPED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_STOPPED, MON_S_RUNNING, -1} }, { MON_S_STOPPED, MON_E_FAILURE, NULL, {-1} }, { MON_S_RUNNING, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} }, { MON_S_RUNNING, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} }, { MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} }, { MON_S_FAILED, MON_E_FAILURE, NULL, {-1} }, }; struct corosync_service_engine *mon_get_service_engine_ver0 (void) { return (&mon_service_engine); } static const char * mon_res_state_to_str(struct cs_fsm* fsm, int32_t state) { switch (state) { case MON_S_STOPPED: return mon_stopped_str; break; case MON_S_RUNNING: return mon_running_str; break; case MON_S_FAILED: return mon_failed_str; break; } return NULL; } static const char * mon_res_event_to_str(struct cs_fsm* fsm, int32_t event) { switch (event) { case MON_E_CONFIG_CHANGED: return mon_config_changed_str; break; case MON_E_FAILURE: return mon_failure_str; break; } return NULL; } static void mon_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state, int32_t next_state, int32_t fsm_event, void *data) { switch (cb_event) { case CS_FSM_CB_EVENT_PROCESS_NF: log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"", fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state)); corosync_exit_error(COROSYNC_DONE_FATAL_ERR); break; case CS_FSM_CB_EVENT_STATE_SET: log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"", fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state), fsm->state_to_str(fsm, next_state)); break; case CS_FSM_CB_EVENT_STATE_SET_NF: log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")", fsm->name, fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state), fsm->state_to_str(fsm, next_state), fsm->event_to_str(fsm, fsm_event)); corosync_exit_error(COROSYNC_DONE_FATAL_ERR); break; default: log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Can't find callback event!"); corosync_exit_error(COROSYNC_DONE_FATAL_ERR); break; } } static void mon_fsm_state_set (struct cs_fsm* fsm, enum mon_resource_state next_state, struct resource_instance* inst) { enum mon_resource_state prev_state = fsm->curr_state; const char *state_str; char key_name[ICMAP_KEYNAME_MAXLEN]; ENTER(); cs_fsm_state_set(fsm, next_state, inst, mon_fsm_cb); if (prev_state == fsm->curr_state) { return; } state_str = mon_res_state_to_str(fsm, fsm->curr_state); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state"); icmap_set_string(key_name, state_str); } static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data) { struct resource_instance * inst = (struct resource_instance *)data; char *tmp_str; uint64_t tmp_value; char key_name[ICMAP_KEYNAME_MAXLEN]; int run_updater; int scanf_res = 0; int32_t i32; double dbl; ENTER(); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period"); if (icmap_get_string(key_name, &tmp_str) == CS_OK) { scanf_res = sscanf(tmp_str, "%"PRIu64, &tmp_value); if (scanf_res != 1) { log_printf (LOGSYS_LEVEL_WARNING, "Could NOT use poll_period: %s (not uint64 type) for resource %s", tmp_str, inst->name); } free(tmp_str); if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) { log_printf (LOGSYS_LEVEL_DEBUG, "poll_period changing from:%"PRIu64" to %"PRIu64".", inst->period, tmp_value); inst->period = tmp_value; } else { log_printf (LOGSYS_LEVEL_WARNING, "Could NOT use poll_period:%"PRIu64" ms for resource %s", tmp_value, inst->name); } } if (inst->timer_handle) { api->timer_delete(inst->timer_handle); inst->timer_handle = 0; } run_updater = 0; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "max"); if (icmap_get_string(key_name, &tmp_str) == CS_OK) { if (inst->max_type == ICMAP_VALUETYPE_INT32) { if (sscanf(tmp_str, "%"PRId32, &i32) != 1) { inst->max.int32 = INT32_MAX; mon_fsm_state_set (fsm, MON_S_STOPPED, inst); } else { inst->max.int32 = i32; run_updater = 1; } } if (inst->max_type == ICMAP_VALUETYPE_DOUBLE) { if (sscanf(tmp_str, "%lf", &dbl) != 1) { inst->max.dbl = INT32_MAX; mon_fsm_state_set (fsm, MON_S_STOPPED, inst); } else { inst->max.dbl = dbl; run_updater = 1; } } free(tmp_str); } if (run_updater) { mon_fsm_state_set (fsm, MON_S_RUNNING, inst); /* * run the updater, incase the period has shortened * and to start the timer. */ inst->update_stats_fn (inst); } } void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data) { struct resource_instance * inst = (struct resource_instance *)data; ENTER(); mon_fsm_state_set (fsm, MON_S_FAILED, inst); } static int32_t percent_mem_used_get(void) { sg_mem_stats *mem_stats; sg_swap_stats *swap_stats; long long total, freemem; #ifdef HAVE_LIBSTATGRAB_GE_090 mem_stats = sg_get_mem_stats(NULL); swap_stats = sg_get_swap_stats(NULL); #else mem_stats = sg_get_mem_stats(); swap_stats = sg_get_swap_stats(); #endif if (mem_stats == NULL || swap_stats == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s", sg_str_error(sg_get_error())); return -1; } total = mem_stats->total + swap_stats->total; freemem = mem_stats->free + swap_stats->free; return ((total - freemem) * 100) / total; } static void mem_update_stats_fn (void *data) { struct resource_instance * inst = (struct resource_instance *)data; int32_t new_value; uint64_t timestamp; char key_name[ICMAP_KEYNAME_MAXLEN]; new_value = percent_mem_used_get(); if (new_value > 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current"); icmap_set_uint32(key_name, new_value); timestamp = cs_timestamp_get(); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated"); icmap_set_uint64(key_name, timestamp); if (new_value > inst->max.int32 && inst->fsm.curr_state != MON_S_FAILED) { cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb); } } api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS, inst, inst->update_stats_fn, &inst->timer_handle); } static double min15_loadavg_get(void) { sg_load_stats *load_stats; #ifdef HAVE_LIBSTATGRAB_GE_090 load_stats = sg_get_load_stats (NULL); #else load_stats = sg_get_load_stats (); #endif if (load_stats == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s", sg_str_error (sg_get_error())); return -1; } return load_stats->min15; } static void load_update_stats_fn (void *data) { struct resource_instance * inst = (struct resource_instance *)data; uint64_t timestamp; char key_name[ICMAP_KEYNAME_MAXLEN]; double min15 = min15_loadavg_get(); if (min15 > 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current"); icmap_set_double(key_name, min15); timestamp = cs_timestamp_get(); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated"); icmap_set_uint64(key_name, timestamp); if (min15 > inst->max.dbl && inst->fsm.curr_state != MON_S_FAILED) { cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb); } } api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS, inst, inst->update_stats_fn, &inst->timer_handle); } static void mon_key_changed_cb ( int32_t event, const char *key_name, struct icmap_notify_value new_value, struct icmap_notify_value old_value, void *user_data) { struct resource_instance* inst = (struct resource_instance*)user_data; char *last_key_part; if (event == ICMAP_TRACK_DELETE && inst) { log_printf (LOGSYS_LEVEL_WARNING, "resource \"%s\" deleted from cmap!", inst->name); cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb); } if (event == ICMAP_TRACK_MODIFY) { last_key_part = strrchr(key_name, '.'); if (last_key_part == NULL) return ; last_key_part++; if (strcmp(last_key_part, "max") == 0 || strcmp(last_key_part, "poll_period") == 0) { ENTER(); cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb); } } } static void mon_instance_init (struct resource_instance* inst) { uint64_t tmp_value; char key_name[ICMAP_KEYNAME_MAXLEN]; icmap_track_t icmap_track = NULL; char *tmp_str; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current"); if (inst->max_type == ICMAP_VALUETYPE_INT32) { icmap_set_int32(key_name, 0); } else { icmap_set_double(key_name, 0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated"); icmap_set_uint64(key_name, 0); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state"); icmap_set_string(key_name, mon_stopped_str); inst->fsm.name = inst->name; inst->fsm.curr_entry = 0; inst->fsm.curr_state = MON_S_STOPPED; inst->fsm.table = mon_fsm_table; inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry); inst->fsm.state_to_str = mon_res_state_to_str; inst->fsm.event_to_str = mon_res_event_to_str; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period"); if (icmap_get_string(key_name, &tmp_str) != CS_OK || sscanf(tmp_str, "%"PRIu64, &tmp_value) != 1) { icmap_set_uint64(key_name, inst->period); } else { if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) { inst->period = tmp_value; } else { log_printf (LOGSYS_LEVEL_WARNING, "Could NOT use poll_period:%"PRIu64" ms for resource %s", tmp_value, inst->name); } free(tmp_str); } cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb); icmap_track_add(inst->icmap_path, ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX, mon_key_changed_cb, inst, &icmap_track); } static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api) { #ifdef HAVE_LIBSTATGRAB_GE_090 sg_init(1); #else sg_init(); #endif api = corosync_api; mon_instance_init (&memory_used_inst); mon_instance_init (&load_15min_inst); return NULL; } diff --git a/exec/totemconfig.c b/exec/totemconfig.c index 7b44d047..a1e3dabd 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -1,1653 +1,1653 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. * Copyright (c) 2006-2013 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include "util.h" #include "totemconfig.h" #define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4 #define TOKEN_TIMEOUT 1000 #define TOKEN_COEFFICIENT 650 #define JOIN_TIMEOUT 50 #define MERGE_TIMEOUT 200 #define DOWNCHECK_TIMEOUT 1000 #define FAIL_TO_RECV_CONST 2500 #define SEQNO_UNCHANGED_CONST 30 #define MINIMUM_TIMEOUT (int)(1000/HZ)*3 #define MAX_NETWORK_DELAY 50 #define WINDOW_SIZE 50 #define MAX_MESSAGES 17 #define MISS_COUNT_CONST 5 /* These currently match the defaults in libknet.h */ #define KNET_PING_INTERVAL 1000 #define KNET_PING_TIMEOUT 2000 #define KNET_PING_PRECISION 2048 #define DEFAULT_PORT 5405 static char error_string_response[512]; static void add_totem_config_notification(struct totem_config *totem_config); /* All the volatile parameters are uint32s, luckily */ static uint32_t *totem_get_param_by_name(struct totem_config *totem_config, const char *param_name) { if (strcmp(param_name, "totem.token") == 0) return &totem_config->token_timeout; if (strcmp(param_name, "totem.token_retransmit") == 0) return &totem_config->token_retransmit_timeout; if (strcmp(param_name, "totem.hold") == 0) return &totem_config->token_hold_timeout; if (strcmp(param_name, "totem.token_retransmits_before_loss_const") == 0) return &totem_config->token_retransmits_before_loss_const; if (strcmp(param_name, "totem.join") == 0) return &totem_config->join_timeout; if (strcmp(param_name, "totem.send_join") == 0) return &totem_config->send_join_timeout; if (strcmp(param_name, "totem.consensus") == 0) return &totem_config->consensus_timeout; if (strcmp(param_name, "totem.merge") == 0) return &totem_config->merge_timeout; if (strcmp(param_name, "totem.downcheck") == 0) return &totem_config->downcheck_timeout; if (strcmp(param_name, "totem.fail_recv_const") == 0) return &totem_config->fail_to_recv_const; if (strcmp(param_name, "totem.seqno_unchanged_const") == 0) return &totem_config->seqno_unchanged_const; if (strcmp(param_name, "totem.heartbeat_failures_allowed") == 0) return &totem_config->heartbeat_failures_allowed; if (strcmp(param_name, "totem.max_network_delay") == 0) return &totem_config->max_network_delay; if (strcmp(param_name, "totem.window_size") == 0) return &totem_config->window_size; if (strcmp(param_name, "totem.max_messages") == 0) return &totem_config->max_messages; if (strcmp(param_name, "totem.miss_count_const") == 0) return &totem_config->miss_count_const; return NULL; } /* * Read key_name from icmap. If key is not found or key_name == delete_key or if allow_zero is false * and readed value is zero, default value is used and stored into totem_config. */ static void totem_volatile_config_set_value (struct totem_config *totem_config, const char *key_name, const char *deleted_key, unsigned int default_value, int allow_zero_value) { char runtime_key_name[ICMAP_KEYNAME_MAXLEN]; if (icmap_get_uint32(key_name, totem_get_param_by_name(totem_config, key_name)) != CS_OK || (deleted_key != NULL && strcmp(deleted_key, key_name) == 0) || (!allow_zero_value && *totem_get_param_by_name(totem_config, key_name) == 0)) { *totem_get_param_by_name(totem_config, key_name) = default_value; } /* * Store totem_config value to cmap runtime section */ if (strlen("runtime.config.") + strlen(key_name) >= ICMAP_KEYNAME_MAXLEN) { /* * This shouldn't happen */ return ; } strcpy(runtime_key_name, "runtime.config."); strcat(runtime_key_name, key_name); icmap_set_uint32(runtime_key_name, *totem_get_param_by_name(totem_config, key_name)); } /* * Read and validate config values from cmap and store them into totem_config. If key doesn't exists, * default value is stored. deleted_key is name of key beeing processed by delete operation * from cmap. It is considered as non existing even if it can be read. Can be NULL. */ static void totem_volatile_config_read (struct totem_config *totem_config, const char *deleted_key) { uint32_t u32; totem_volatile_config_set_value(totem_config, "totem.token_retransmits_before_loss_const", deleted_key, TOKEN_RETRANSMITS_BEFORE_LOSS_CONST, 0); totem_volatile_config_set_value(totem_config, "totem.token", deleted_key, TOKEN_TIMEOUT, 0); if (totem_config->interface_count > 0 && totem_config->interfaces[0].member_count > 2) { u32 = TOKEN_COEFFICIENT; icmap_get_uint32("totem.token_coefficient", &u32); totem_config->token_timeout += (totem_config->interfaces[0].member_count - 2) * u32; /* * Store totem_config value to cmap runtime section */ icmap_set_uint32("runtime.config.totem.token", totem_config->token_timeout); } totem_volatile_config_set_value(totem_config, "totem.max_network_delay", deleted_key, MAX_NETWORK_DELAY, 0); totem_volatile_config_set_value(totem_config, "totem.window_size", deleted_key, WINDOW_SIZE, 0); totem_volatile_config_set_value(totem_config, "totem.max_messages", deleted_key, MAX_MESSAGES, 0); totem_volatile_config_set_value(totem_config, "totem.miss_count_const", deleted_key, MISS_COUNT_CONST, 0); totem_volatile_config_set_value(totem_config, "totem.token_retransmit", deleted_key, (int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)), 0); totem_volatile_config_set_value(totem_config, "totem.hold", deleted_key, (int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)), 0); totem_volatile_config_set_value(totem_config, "totem.join", deleted_key, JOIN_TIMEOUT, 0); totem_volatile_config_set_value(totem_config, "totem.consensus", deleted_key, (int)(float)(1.2 * totem_config->token_timeout), 0); totem_volatile_config_set_value(totem_config, "totem.merge", deleted_key, MERGE_TIMEOUT, 0); totem_volatile_config_set_value(totem_config, "totem.downcheck", deleted_key, DOWNCHECK_TIMEOUT, 0); totem_volatile_config_set_value(totem_config, "totem.fail_recv_const", deleted_key, FAIL_TO_RECV_CONST, 0); totem_volatile_config_set_value(totem_config, "totem.seqno_unchanged_const", deleted_key, SEQNO_UNCHANGED_CONST, 0); totem_volatile_config_set_value(totem_config, "totem.send_join", deleted_key, 0, 1); totem_volatile_config_set_value(totem_config, "totem.heartbeat_failures_allowed", deleted_key, 0, 1); } static int totem_volatile_config_validate ( struct totem_config *totem_config, const char **error_string) { static char local_error_reason[512]; const char *error_reason = local_error_reason; if (totem_config->max_network_delay < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The max_network_delay parameter (%d ms) may not be less than (%d ms).", totem_config->max_network_delay, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token timeout parameter (%d ms) may not be less than (%d ms).", totem_config->token_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token retransmit timeout parameter (%d ms) may not be less than (%d ms).", totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token hold timeout parameter (%d ms) may not be less than (%d ms).", totem_config->token_hold_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->join_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The join timeout parameter (%d ms) may not be less than (%d ms).", totem_config->join_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The consensus timeout parameter (%d ms) may not be less than (%d ms).", totem_config->consensus_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->consensus_timeout < totem_config->join_timeout) { snprintf (local_error_reason, sizeof(local_error_reason), "The consensus timeout parameter (%d ms) may not be less than join timeout (%d ms).", totem_config->consensus_timeout, totem_config->join_timeout); goto parse_error; } if (totem_config->merge_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The merge timeout parameter (%d ms) may not be less than (%d ms).", totem_config->merge_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The downcheck timeout parameter (%d ms) may not be less than (%d ms).", totem_config->downcheck_timeout, MINIMUM_TIMEOUT); goto parse_error; } return 0; parse_error: snprintf (error_string_response, sizeof(error_string_response), "parse error in config: %s\n", error_reason); *error_string = error_string_response; return (-1); } static int totem_get_crypto(struct totem_config *totem_config) { char *str; const char *tmp_cipher; const char *tmp_hash; tmp_hash = "none"; tmp_cipher = "none"; if (icmap_get_string("totem.crypto_cipher", &str) == CS_OK) { if (strcmp(str, "none") == 0) { tmp_cipher = "none"; } if (strcmp(str, "aes256") == 0) { tmp_cipher = "aes256"; } if (strcmp(str, "aes192") == 0) { tmp_cipher = "aes192"; } if (strcmp(str, "aes128") == 0) { tmp_cipher = "aes128"; } if (strcmp(str, "aes256") == 0) { tmp_cipher = "aes256"; } if (strcmp(str, "3des") == 0) { tmp_cipher = "3des"; } free(str); } if (icmap_get_string("totem.crypto_hash", &str) == CS_OK) { if (strcmp(str, "none") == 0) { tmp_hash = "none"; } if (strcmp(str, "md5") == 0) { tmp_hash = "md5"; } if (strcmp(str, "sha1") == 0) { tmp_hash = "sha1"; } if (strcmp(str, "sha256") == 0) { tmp_hash = "sha256"; } if (strcmp(str, "sha384") == 0) { tmp_hash = "sha384"; } if (strcmp(str, "sha512") == 0) { tmp_hash = "sha512"; } free(str); } if ((strcmp(tmp_cipher, "none") != 0) && (strcmp(tmp_hash, "none") == 0)) { return -1; } free(totem_config->crypto_cipher_type); free(totem_config->crypto_hash_type); totem_config->crypto_cipher_type = strdup(tmp_cipher); totem_config->crypto_hash_type = strdup(tmp_hash); return 0; } static int totem_config_get_ip_version(void) { int res; char *str; res = AF_INET; if (icmap_get_string("totem.ip_version", &str) == CS_OK) { if (strcmp(str, "ipv4") == 0) { res = AF_INET; } if (strcmp(str, "ipv6") == 0) { res = AF_INET6; } free(str); } return (res); } static uint16_t generate_cluster_id (const char *cluster_name) { int i; int value = 0; for (i = 0; i < strlen(cluster_name); i++) { value <<= 1; value += cluster_name[i]; } return (value & 0xFFFF); } static int get_cluster_mcast_addr ( const char *cluster_name, unsigned int linknumber, int ip_version, struct totem_ip_address *res) { uint16_t clusterid; char addr[INET6_ADDRSTRLEN + 1]; int err; if (cluster_name == NULL) { return (-1); } clusterid = generate_cluster_id(cluster_name) + linknumber; memset (res, 0, sizeof(*res)); switch (ip_version) { case AF_INET: snprintf(addr, sizeof(addr), "239.192.%d.%d", clusterid >> 8, clusterid % 0xFF); break; case AF_INET6: snprintf(addr, sizeof(addr), "ff15::%x", clusterid); break; default: /* * Unknown family */ return (-1); } err = totemip_parse (res, addr, ip_version); return (err); } static unsigned int generate_nodeid_for_duplicate_test( struct totem_config *totem_config, char *addr) { unsigned int nodeid; struct totem_ip_address totemip; /* AF_INET hard-coded here because auto-generated nodeids are only for IPv4 */ if (totemip_parse(&totemip, addr, AF_INET) != 0) return -1; memcpy (&nodeid, &totemip.addr, sizeof (unsigned int)); #if __BYTE_ORDER == __LITTLE_ENDIAN nodeid = swab32 (nodeid); #endif if (totem_config->clear_node_high_bit) { nodeid &= 0x7FFFFFFF; } return nodeid; } static int check_for_duplicate_nodeids( struct totem_config *totem_config, const char **error_string) { icmap_iter_t iter; icmap_iter_t subiter; const char *iter_key; int res = 0; int retval = 0; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char *ring0_addr=NULL; char *ring0_addr1=NULL; unsigned int node_pos; unsigned int node_pos1; unsigned int nodeid; unsigned int nodeid1; int autogenerated; iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos); autogenerated = 0; if (icmap_get_uint32(tmp_key, &nodeid) != CS_OK) { snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); if (icmap_get_string(tmp_key, &ring0_addr) != CS_OK) { continue; } /* Generate nodeid so we can check that auto-generated nodeids don't clash either */ nodeid = generate_nodeid_for_duplicate_test(totem_config, ring0_addr); if (nodeid == -1) { continue; } autogenerated = 1; } node_pos1 = 0; subiter = icmap_iter_init("nodelist.node."); while (((iter_key = icmap_iter_next(subiter, NULL, NULL)) != NULL) && (node_pos1 < node_pos)) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos1, tmp_key); if ((res != 2) || (node_pos1 >= node_pos)) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos1); if (icmap_get_uint32(tmp_key, &nodeid1) != CS_OK) { snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos1); if (icmap_get_string(tmp_key, &ring0_addr1) != CS_OK) { continue; } nodeid1 = generate_nodeid_for_duplicate_test(totem_config, ring0_addr1); if (nodeid1 == -1) { continue; } } if (nodeid == nodeid1) { retval = -1; snprintf (error_string_response, sizeof(error_string_response), "Nodeid %u%s%s%s appears twice in corosync.conf", nodeid, autogenerated?"(autogenerated from ":"", autogenerated?ring0_addr:"", autogenerated?")":""); log_printf (LOGSYS_LEVEL_ERROR, error_string_response); *error_string = error_string_response; break; } } icmap_iter_finalize(subiter); } icmap_iter_finalize(iter); return retval; } static int find_local_node_in_nodelist(struct totem_config *totem_config) { icmap_iter_t iter; const char *iter_key; int res = 0; unsigned int node_pos; int local_node_pos = -1; struct totem_ip_address bind_addr; int interface_up, interface_num; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; struct totem_ip_address node_addr; res = totemip_iface_check(&totem_config->interfaces[0].bindnet, &bind_addr, &interface_up, &interface_num, totem_config->clear_node_high_bit); if (res == -1) { return (-1); } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); if (icmap_get_string(tmp_key, &node_addr_str) != CS_OK) { continue; } res = totemip_parse (&node_addr, node_addr_str, totem_config->ip_version); free(node_addr_str); if (res == -1) { continue ; } if (totemip_equal(&bind_addr, &node_addr)) { local_node_pos = node_pos; } } icmap_iter_finalize(iter); return (local_node_pos); } /* * Compute difference between two set of totem interface arrays. set1 and set2 * are changed so for same ring, ip existing in both set1 and set2 are cleared * (set to 0), and ips which are only in set1 or set2 remains untouched. * totempg_node_add/remove is called. */ static void compute_interfaces_diff(int interface_count, struct totem_interface *set1, struct totem_interface *set2) { int ring_no, set1_pos, set2_pos; struct totem_ip_address empty_ip_address; memset(&empty_ip_address, 0, sizeof(empty_ip_address)); for (ring_no = 0; ring_no < interface_count; ring_no++) { for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) { for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) { /* * For current ring_no remove all set1 items existing * in set2 */ if (memcmp(&set1[ring_no].member_list[set1_pos], &set2[ring_no].member_list[set2_pos], sizeof(struct totem_ip_address)) == 0) { memset(&set1[ring_no].member_list[set1_pos], 0, sizeof(struct totem_ip_address)); memset(&set2[ring_no].member_list[set2_pos], 0, sizeof(struct totem_ip_address)); } } } } for (ring_no = 0; ring_no < interface_count; ring_no++) { for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) { /* * All items which remained in set1 doesn't exists in set2 any longer so * node has to be removed. */ if (memcmp(&set1[ring_no].member_list[set1_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) { log_printf(LOGSYS_LEVEL_DEBUG, "removing dynamic member %s for ring %u", totemip_print(&set1[ring_no].member_list[set1_pos]), ring_no); totempg_member_remove(&set1[ring_no].member_list[set1_pos], ring_no); } } for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) { /* * All items which remained in set2 doesn't existed in set1 so this is no node * and has to be added. */ if (memcmp(&set2[ring_no].member_list[set2_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) { log_printf(LOGSYS_LEVEL_DEBUG, "adding dynamic member %s for ring %u", totemip_print(&set2[ring_no].member_list[set2_pos]), ring_no); totempg_member_add(&set2[ring_no].member_list[set2_pos], ring_no); } } } } static void put_nodelist_members_to_config(struct totem_config *totem_config, int reload) { icmap_iter_t iter, iter2; const char *iter_key, *iter_key2; int res = 0; unsigned int node_pos; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key2[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; int member_count; unsigned int linknumber = 0; int i, j; struct totem_interface *orig_interfaces = NULL; struct totem_interface *new_interfaces = NULL; if (reload) { /* * We need to compute diff only for reload. Also for initial configuration * not all totem structures are initialized so corosync will crash during * member_add/remove */ orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); assert(orig_interfaces != NULL); new_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); assert(new_interfaces != NULL); memcpy(orig_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX); } /* Clear out nodelist so we can put the new one in if needed */ for (i = 0; i < totem_config->interface_count; i++) { for (j = 0; j < PROCESSOR_COUNT_MAX; j++) { memset(&totem_config->interfaces[i].member_list[j], 0, sizeof(struct totem_ip_address)); } totem_config->interfaces[i].member_count = 0; } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); iter2 = icmap_iter_init(tmp_key); while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) { unsigned int nodeid; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos); if (icmap_get_uint32(tmp_key, &nodeid) != CS_OK) { } res = sscanf(iter_key2, "nodelist.node.%u.ring%u%s", &node_pos, &linknumber, tmp_key2); if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { continue; } if (icmap_get_string(iter_key2, &node_addr_str) != CS_OK) { continue; } member_count = totem_config->interfaces[linknumber].member_count; res = totemip_parse(&totem_config->interfaces[linknumber].member_list[member_count], node_addr_str, totem_config->ip_version); if (res != -1) { totem_config->interfaces[linknumber].member_list[member_count].nodeid = nodeid; totem_config->interfaces[linknumber].member_count++; } free(node_addr_str); } icmap_iter_finalize(iter2); } icmap_iter_finalize(iter); if (reload) { memcpy(new_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX); compute_interfaces_diff(totem_config->interface_count, orig_interfaces, new_interfaces); free(new_interfaces); free(orig_interfaces); } } static void nodelist_dynamic_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { int res; unsigned int ring_no; unsigned int member_no; char tmp_str[ICMAP_KEYNAME_MAXLEN]; uint8_t reloading; struct totem_config *totem_config = (struct totem_config *)user_data; /* * If a full reload is in progress then don't do anything until it's done and * can reconfigure it all atomically */ if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) { return ; } res = sscanf(key_name, "nodelist.node.%u.ring%u%s", &member_no, &ring_no, tmp_str); if (res != 3) return ; if (strcmp(tmp_str, "_addr") != 0) { return; } put_nodelist_members_to_config(totem_config, 1); } /* * Tries to find node (node_pos) in config nodelist which address matches any * local interface. Address can be stored in ring0_addr or if ipaddr_key_prefix is not NULL * key with prefix ipaddr_key is used (there can be multiuple of them) * This function differs * from find_local_node_in_nodelist because it doesn't need bindnetaddr, * but doesn't work when bind addr is network address (so IP must be exact * match). * * Returns 1 on success (address was found, node_pos is then correctly set) or 0 on failure. */ int totem_config_find_local_addr_in_nodelist(const char *ipaddr_key_prefix, unsigned int *node_pos) { - struct list_head addrs; + struct qb_list_head addrs; struct totem_ip_if_address *if_addr; icmap_iter_t iter, iter2; const char *iter_key, *iter_key2; - struct list_head *list; + struct qb_list_head *list; const char *ipaddr_key; int ip_version; struct totem_ip_address node_addr; char *node_addr_str; int node_found = 0; int res = 0; char tmp_key[ICMAP_KEYNAME_MAXLEN]; if (totemip_getifaddrs(&addrs) == -1) { return 0; } ip_version = totem_config_get_ip_version(); iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) { continue ; } free(node_addr_str); /* * ring0_addr found -> let's iterate thru ipaddr_key_prefix */ snprintf(tmp_key, sizeof(tmp_key), "nodelist.node.%u.%s", *node_pos, (ipaddr_key_prefix != NULL ? ipaddr_key_prefix : "ring0_addr")); iter2 = icmap_iter_init(tmp_key); while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) { /* * ring0_addr must be exact match, not prefix */ ipaddr_key = (ipaddr_key_prefix != NULL ? iter_key2 : tmp_key); if (icmap_get_string(ipaddr_key, &node_addr_str) != CS_OK) { continue ; } if (totemip_parse(&node_addr, node_addr_str, ip_version) == -1) { free(node_addr_str); continue ; } free(node_addr_str); /* * Try to match ip with if_addrs */ node_found = 0; - for (list = addrs.next; list != &addrs; list = list->next) { - if_addr = list_entry(list, struct totem_ip_if_address, list); + qb_list_for_each(list, &(addrs)) { + if_addr = qb_list_entry(list, struct totem_ip_if_address, list); if (totemip_equal(&node_addr, &if_addr->ip_addr)) { node_found = 1; break; } } if (node_found) { break ; } } icmap_iter_finalize(iter2); if (node_found) { break ; } } icmap_iter_finalize(iter); totemip_freeifaddrs(&addrs); return (node_found); } static void config_convert_nodelist_to_interface(struct totem_config *totem_config) { int res = 0; unsigned int node_pos; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key2[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; unsigned int linknumber = 0; icmap_iter_t iter; const char *iter_key; if (totem_config_find_local_addr_in_nodelist(NULL, &node_pos)) { /* * We found node, so create interface section */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); iter = icmap_iter_init(tmp_key); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.ring%u%s", &node_pos, &linknumber, tmp_key2); if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { continue ; } if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) { continue; } snprintf(tmp_key2, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", linknumber); icmap_set_string(tmp_key2, node_addr_str); free(node_addr_str); } icmap_iter_finalize(iter); } } extern int totem_config_read ( struct totem_config *totem_config, const char **error_string, uint64_t *warnings) { int res = 0; char *str; unsigned int linknumber = 0; int member_count = 0; icmap_iter_t iter, member_iter; const char *iter_key; const char *member_iter_key; char linknumber_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key[ICMAP_KEYNAME_MAXLEN]; uint8_t u8; uint16_t u16; uint32_t u32; char *cluster_name = NULL; int i; int local_node_pos; int nodeid_set; *warnings = 0; memset (totem_config, 0, sizeof (struct totem_config)); totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); if (totem_config->interfaces == 0) { *error_string = "Out of memory trying to allocate ethernet interface storage area"; return -1; } memset (totem_config->interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX); strcpy (totem_config->link_mode, "passive"); icmap_get_uint32("totem.version", (uint32_t *)&totem_config->version); if (totem_get_crypto(totem_config) != 0) { *error_string = "crypto_cipher requires crypto_hash with value other than none"; return -1; } if (icmap_get_string("totem.link_mode", &str) == CS_OK) { if (strlen(str) >= TOTEM_LINK_MODE_BYTES) { *error_string = "totem.link_mode is too long"; free(str); return -1; } strcpy (totem_config->link_mode, str); free(str); } icmap_get_uint32("totem.nodeid", &totem_config->node_id); totem_config->clear_node_high_bit = 0; if (icmap_get_string("totem.clear_node_high_bit", &str) == CS_OK) { if (strcmp (str, "yes") == 0) { totem_config->clear_node_high_bit = 1; } free(str); } icmap_get_uint32("totem.threads", &totem_config->threads); icmap_get_uint32("totem.netmtu", &totem_config->net_mtu); if (icmap_get_string("totem.cluster_name", &cluster_name) != CS_OK) { cluster_name = NULL; } totem_config->ip_version = totem_config_get_ip_version(); if (icmap_get_string("totem.interface.0.bindnetaddr", &str) != CS_OK) { /* * We were not able to find ring 0 bindnet addr. Try to use nodelist informations */ config_convert_nodelist_to_interface(totem_config); } else { free(str); } /* * Broadcast option is global but set in interface section, * so reset before processing interfaces. */ totem_config->broadcast_use = 0; iter = icmap_iter_init("totem.interface."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "totem.interface.%[^.].%s", linknumber_key, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "bindnetaddr") != 0) { continue; } member_count = 0; linknumber = atoi(linknumber_key); if (linknumber >= INTERFACE_MAX) { free(cluster_name); snprintf (error_string_response, sizeof(error_string_response), "parse error in config: interface ring number %u is bigger than allowed maximum %u\n", linknumber, INTERFACE_MAX - 1); *error_string = error_string_response; return -1; } /* * Get the bind net address */ if (icmap_get_string(iter_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[linknumber].bindnet, str, totem_config->ip_version); free(str); } /* * Get interface multicast address */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", linknumber); if (icmap_get_string(tmp_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[linknumber].mcast_addr, str, totem_config->ip_version); free(str); } else { /* * User not specified address -> autogenerate one from cluster_name key * (if available). Return code is intentionally ignored, because * udpu doesn't need mcastaddr and validity of mcastaddr for udp is * checked later anyway. */ (void)get_cluster_mcast_addr (cluster_name, linknumber, totem_config->ip_version, &totem_config->interfaces[linknumber].mcast_addr); } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", linknumber); if (icmap_get_string(tmp_key, &str) == CS_OK) { if (strcmp (str, "yes") == 0) { totem_config->broadcast_use = 1; } free(str); } /* * Get mcast port */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", linknumber); if (icmap_get_uint16(tmp_key, &totem_config->interfaces[linknumber].ip_port) != CS_OK) { if (totem_config->broadcast_use) { totem_config->interfaces[linknumber].ip_port = DEFAULT_PORT + (2 * linknumber); } else { totem_config->interfaces[linknumber].ip_port = DEFAULT_PORT; } } /* * Get the TTL */ totem_config->interfaces[linknumber].ttl = 1; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", linknumber); if (icmap_get_uint8(tmp_key, &u8) == CS_OK) { totem_config->interfaces[linknumber].ttl = u8; } /* * Get the knet link params */ totem_config->interfaces[linknumber].knet_link_priority = 1; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_link_priority", linknumber); if (icmap_get_uint8(tmp_key, &u8) == CS_OK) { totem_config->interfaces[linknumber].knet_link_priority = u8; } totem_config->interfaces[linknumber].knet_ping_interval = KNET_PING_INTERVAL; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_interval", linknumber); if (icmap_get_uint32(tmp_key, &u32) == CS_OK) { totem_config->interfaces[linknumber].knet_ping_interval = u32; } totem_config->interfaces[linknumber].knet_ping_timeout = KNET_PING_TIMEOUT; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_timeout", linknumber); if (icmap_get_uint32(tmp_key, &u32) == CS_OK) { totem_config->interfaces[linknumber].knet_ping_timeout = u32; } totem_config->interfaces[linknumber].knet_ping_precision = KNET_PING_PRECISION; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_precision", linknumber); if (icmap_get_uint32(tmp_key, &u32) == CS_OK) { totem_config->interfaces[linknumber].knet_ping_precision = u32; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.", linknumber); member_iter = icmap_iter_init(tmp_key); while ((member_iter_key = icmap_iter_next(member_iter, NULL, NULL)) != NULL) { if (member_count == 0) { if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { free(str); *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_IGNORED; break; } else { *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED; } } if (icmap_get_string(member_iter_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[linknumber].member_list[member_count++], str, totem_config->ip_version); } } icmap_iter_finalize(member_iter); totem_config->interfaces[linknumber].member_count = member_count; totem_config->interface_count++; } icmap_iter_finalize(iter); /* * Use broadcast is global, so if set, make sure to fill mcast addr correctly */ if (totem_config->broadcast_use) { for (linknumber = 0; linknumber < totem_config->interface_count; linknumber++) { totemip_parse (&totem_config->interfaces[linknumber].mcast_addr, "255.255.255.255", 0); } } /* * Store automatically generated items back to icmap */ for (i = 0; i < totem_config->interface_count; i++) { snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", i); if (icmap_get_string(tmp_key, &str) == CS_OK) { free(str); } else { str = (char *)totemip_print(&totem_config->interfaces[i].mcast_addr); icmap_set_string(tmp_key, str); } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", i); if (icmap_get_uint16(tmp_key, &u16) != CS_OK) { icmap_set_uint16(tmp_key, totem_config->interfaces[i].ip_port); } } totem_config->transport_number = TOTEM_TRANSPORT_KNET; if (icmap_get_string("totem.transport", &str) == CS_OK) { if (strcmp (str, "udpu") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_UDPU; } if (strcmp (str, "udp") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_UDP; } if (strcmp (str, "knet") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_KNET; } free(str); } free(cluster_name); /* * Check existence of nodelist */ if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { free(str); /* * find local node */ local_node_pos = find_local_node_in_nodelist(totem_config); if (local_node_pos != -1) { icmap_set_uint32("nodelist.local_node_pos", local_node_pos); snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", local_node_pos); nodeid_set = (totem_config->node_id != 0); if (icmap_get_uint32(tmp_key, &totem_config->node_id) == CS_OK && nodeid_set) { *warnings |= TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED; } /* * Make localnode ring0_addr read only, so we can be sure that local * node never changes. If rebinding to other IP would be in future * supported, this must be changed and handled properly! */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", local_node_pos); icmap_set_ro_access(tmp_key, 0, 1); icmap_set_ro_access("nodelist.local_node_pos", 0, 1); } put_nodelist_members_to_config(totem_config, 0); } /* * Get things that might change in the future (and can depend on totem_config->interfaces); */ totem_volatile_config_read(totem_config, NULL); icmap_set_uint8("config.totemconfig_reload_in_progress", 0); add_totem_config_notification(totem_config); return 0; } int totem_config_validate ( struct totem_config *totem_config, const char **error_string) { static char local_error_reason[512]; char parse_error[512]; const char *error_reason = local_error_reason; int i, j; unsigned int interface_max = INTERFACE_MAX; unsigned int port1, port2; if (totem_config->interface_count == 0) { error_reason = "No interfaces defined"; goto parse_error; } for (i = 0; i < totem_config->interface_count; i++) { /* * Some error checking of parsed data to make sure its valid */ struct totem_ip_address null_addr; memset (&null_addr, 0, sizeof (struct totem_ip_address)); if ((totem_config->transport_number == 0) && memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, sizeof (struct totem_ip_address)) == 0) { error_reason = "No multicast address specified"; goto parse_error; } if (totem_config->interfaces[i].ip_port == 0) { error_reason = "No multicast port specified"; goto parse_error; } if (totem_config->interfaces[i].ttl > 255) { error_reason = "Invalid TTL (should be 0..255)"; goto parse_error; } if (totem_config->transport_number != TOTEM_TRANSPORT_UDP && totem_config->interfaces[i].ttl != 1) { error_reason = "Can only set ttl on multicast transport types"; goto parse_error; } if (totem_config->interfaces[i].knet_link_priority > 255) { error_reason = "Invalid link priority (should be 0..255)"; goto parse_error; } if (totem_config->transport_number != TOTEM_TRANSPORT_KNET && totem_config->interfaces[i].knet_link_priority != 1) { error_reason = "Can only set link priority on knet transport type"; goto parse_error; } if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 && totem_config->node_id == 0) { error_reason = "An IPV6 network requires that a node ID be specified."; goto parse_error; } if (totem_config->broadcast_use == 0 && totem_config->transport_number == TOTEM_TRANSPORT_UDP) { if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Multicast address family does not match bind address family"; goto parse_error; } if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) { error_reason = "mcastaddr is not a correct multicast address."; goto parse_error; } } if (totem_config->interfaces[0].bindnet.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Not all bind address belong to the same IP family"; goto parse_error; } /* * Ensure mcast address/port differs */ if (totem_config->transport_number == TOTEM_TRANSPORT_UDP) { for (j = i + 1; j < totem_config->interface_count; j++) { port1 = totem_config->interfaces[i].ip_port; port2 = totem_config->interfaces[j].ip_port; if (totemip_equal(&totem_config->interfaces[i].mcast_addr, &totem_config->interfaces[j].mcast_addr) && (((port1 > port2 ? port1 : port2) - (port1 < port2 ? port1 : port2)) <= 1)) { error_reason = "Interfaces multicast address/port pair must differ"; goto parse_error; } } } } if (totem_config->version != 2) { error_reason = "This totem parser can only parse version 2 configurations."; goto parse_error; } if (totem_volatile_config_validate(totem_config, error_string) == -1) { return (-1); } if (check_for_duplicate_nodeids(totem_config, error_string) == -1) { return (-1); } /* * KNET Link values validation */ if (strcmp (totem_config->link_mode, "active") && strcmp (totem_config->link_mode, "rr") && strcmp (totem_config->link_mode, "passive")) { snprintf (local_error_reason, sizeof(local_error_reason), "The Knet link mode \"%s\" specified is invalid. It must be active, passive or rr.\n", totem_config->link_mode); goto parse_error; } /* Only Knet does multiple interfaces */ if (totem_config->transport_number != TOTEM_TRANSPORT_KNET) { interface_max = 1; } if (interface_max < totem_config->interface_count) { snprintf (parse_error, sizeof(parse_error), "%d is too many configured interfaces for non-Knet transport.", totem_config->interface_count); error_reason = parse_error; goto parse_error; } /* Only knet allows crypto */ if (totem_config->transport_number != TOTEM_TRANSPORT_KNET) { if ((strcmp(totem_config->crypto_cipher_type, "none") != 0) || (strcmp(totem_config->crypto_hash_type, "none") != 0)) { snprintf (parse_error, sizeof(parse_error), "crypto_cipher & crypto_hash are only valid for the Knet transport."); error_reason = parse_error; goto parse_error; } } if (totem_config->net_mtu == 0) { totem_config->net_mtu = 1500; } return 0; parse_error: snprintf (error_string_response, sizeof(error_string_response), "parse error in config: %s\n", error_reason); *error_string = error_string_response; return (-1); } static int read_keyfile ( const char *key_location, struct totem_config *totem_config, const char **error_string) { int fd; int res; ssize_t expected_key_len = sizeof (totem_config->private_key); int saved_errno; char error_str[100]; const char *error_ptr; fd = open (key_location, O_RDONLY); if (fd == -1) { error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not open %s: %s\n", key_location, error_ptr); goto parse_error; } res = read (fd, totem_config->private_key, expected_key_len); saved_errno = errno; close (fd); if (res == -1) { error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not read %s: %s\n", key_location, error_ptr); goto parse_error; } totem_config->private_key_len = expected_key_len; if (res != expected_key_len) { snprintf (error_string_response, sizeof(error_string_response), "Could only read %d bits of 1024 bits from %s.\n", res * 8, key_location); goto parse_error; } return 0; parse_error: *error_string = error_string_response; return (-1); } int totem_config_keyread ( struct totem_config *totem_config, const char **error_string) { int got_key = 0; char *key_location = NULL; int res; size_t key_len; memset (totem_config->private_key, 0, 128); totem_config->private_key_len = 128; if (strcmp(totem_config->crypto_cipher_type, "none") == 0 && strcmp(totem_config->crypto_hash_type, "none") == 0) { return (0); } /* cmap may store the location of the key file */ if (icmap_get_string("totem.keyfile", &key_location) == CS_OK) { res = read_keyfile(key_location, totem_config, error_string); free(key_location); if (res) { goto key_error; } got_key = 1; } else { /* Or the key itself may be in the cmap */ if (icmap_get("totem.key", NULL, &key_len, NULL) == CS_OK) { if (key_len > sizeof (totem_config->private_key)) { sprintf(error_string_response, "key is too long"); goto key_error; } if (icmap_get("totem.key", totem_config->private_key, &key_len, NULL) == CS_OK) { totem_config->private_key_len = key_len; got_key = 1; } else { sprintf(error_string_response, "can't store private key"); goto key_error; } } } /* In desperation we read the default filename */ if (!got_key) { const char *filename = getenv("COROSYNC_TOTEM_AUTHKEY_FILE"); if (!filename) filename = COROSYSCONFDIR "/authkey"; res = read_keyfile(filename, totem_config, error_string); if (res) goto key_error; } return (0); key_error: *error_string = error_string_response; return (-1); } static void debug_dump_totem_config(const struct totem_config *totem_config) { log_printf(LOGSYS_LEVEL_DEBUG, "Token Timeout (%d ms) retransmit timeout (%d ms)", totem_config->token_timeout, totem_config->token_retransmit_timeout); log_printf(LOGSYS_LEVEL_DEBUG, "token hold (%d ms) retransmits before loss (%d retrans)", totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const); log_printf(LOGSYS_LEVEL_DEBUG, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)", totem_config->join_timeout, totem_config->send_join_timeout, totem_config->consensus_timeout, totem_config->merge_timeout); log_printf(LOGSYS_LEVEL_DEBUG, "downcheck (%d ms) fail to recv const (%d msgs)", totem_config->downcheck_timeout, totem_config->fail_to_recv_const); log_printf(LOGSYS_LEVEL_DEBUG, "seqno unchanged const (%d rotations) Maximum network MTU %d", totem_config->seqno_unchanged_const, totem_config->net_mtu); log_printf(LOGSYS_LEVEL_DEBUG, "window size per rotation (%d messages) maximum messages per rotation (%d messages)", totem_config->window_size, totem_config->max_messages); log_printf(LOGSYS_LEVEL_DEBUG, "missed count const (%d messages)", totem_config->miss_count_const); log_printf(LOGSYS_LEVEL_DEBUG, "heartbeat_failures_allowed (%d)", totem_config->heartbeat_failures_allowed); log_printf(LOGSYS_LEVEL_DEBUG, "max_network_delay (%d ms)", totem_config->max_network_delay); } static void totem_change_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { struct totem_config *totem_config = (struct totem_config *)user_data; uint32_t *param; uint8_t reloading; const char *deleted_key = NULL; const char *error_string; /* * If a full reload is in progress then don't do anything until it's done and * can reconfigure it all atomically */ if (icmap_get_uint8("config.reload_in_progress", &reloading) == CS_OK && reloading) return; param = totem_get_param_by_name((struct totem_config *)user_data, key_name); /* * Process change only if changed key is found in totem_config (-> param is not NULL) * or for special key token_coefficient. token_coefficient key is not stored in * totem_config, but it is used for computation of token timeout. */ if (!param && strcmp(key_name, "totem.token_coefficient") != 0) return; /* * Values other than UINT32 are not supported, or needed (yet) */ switch (event) { case ICMAP_TRACK_DELETE: deleted_key = key_name; break; case ICMAP_TRACK_ADD: case ICMAP_TRACK_MODIFY: deleted_key = NULL; break; default: break; } totem_volatile_config_read (totem_config, deleted_key); log_printf(LOGSYS_LEVEL_DEBUG, "Totem related config key changed. Dumping actual totem config."); debug_dump_totem_config(totem_config); if (totem_volatile_config_validate(totem_config, &error_string) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); /* * TODO: Consider corosync exit and/or load defaults for volatile * values. For now, log error seems to be enough */ } } static void totem_reload_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { struct totem_config *totem_config = (struct totem_config *)user_data; uint32_t local_node_pos; const char *error_string; /* Reload has completed */ if (*(uint8_t *)new_val.data == 0) { put_nodelist_members_to_config (totem_config, 1); totem_volatile_config_read (totem_config, NULL); log_printf(LOGSYS_LEVEL_DEBUG, "Configuration reloaded. Dumping actual totem config."); debug_dump_totem_config(totem_config); if (totem_volatile_config_validate(totem_config, &error_string) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); /* * TODO: Consider corosync exit and/or load defaults for volatile * values. For now, log error seems to be enough */ } /* Reinstate the local_node_pos */ local_node_pos = find_local_node_in_nodelist(totem_config); if (local_node_pos != -1) { icmap_set_uint32("nodelist.local_node_pos", local_node_pos); } icmap_set_uint8("config.totemconfig_reload_in_progress", 0); } else { icmap_set_uint8("config.totemconfig_reload_in_progress", 1); } } static void add_totem_config_notification(struct totem_config *totem_config) { icmap_track_t icmap_track; icmap_track_add("totem.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, totem_change_notify, totem_config, &icmap_track); icmap_track_add("config.reload_in_progress", ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY, totem_reload_notify, totem_config, &icmap_track); icmap_track_add("nodelist.node.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, nodelist_dynamic_notify, (void *)totem_config, &icmap_track); } diff --git a/exec/totemconfig.h b/exec/totemconfig.h index 10607cc2..f42b8631 100644 --- a/exec/totemconfig.h +++ b/exec/totemconfig.h @@ -1,67 +1,66 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef TOTEMCONFIG_H_DEFINED #define TOTEMCONFIG_H_DEFINED #include #include -#include #include #include #include "totemsrp.h" #define TOTEM_CONFIG_WARNING_MEMBERS_IGNORED (1<<1) #define TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED (1<<2) #define TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED (1<<3) extern int totem_config_read ( struct totem_config *totem_config, const char **error_string, uint64_t *warnings); extern int totem_config_validate ( struct totem_config *totem_config, const char **error_string); extern int totem_config_keyread ( struct totem_config *totem_config, const char **error_string); extern int totem_config_find_local_addr_in_nodelist( const char *ipaddr_key_prefix, unsigned int *node_pos); #endif /* TOTEMCONFIG_H_DEFINED */ diff --git a/exec/totemip.c b/exec/totemip.c index 28a88365..013f39cb 100644 --- a/exec/totemip.c +++ b/exec/totemip.c @@ -1,512 +1,511 @@ /* * Copyright (c) 2005-2011 Red Hat, Inc. * * All rights reserved. * * Author: Patrick Caulfield (pcaulfie@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* IPv4/6 abstraction */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOCALHOST_IPV4 "127.0.0.1" #define LOCALHOST_IPV6 "::1" #define NETLINK_BUFSIZE 16384 #ifdef SO_NOSIGPIPE void totemip_nosigpipe(int s) { int on = 1; setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, (void *)&on, sizeof(on)); } #endif /* Compare two addresses */ int totemip_equal(const struct totem_ip_address *addr1, const struct totem_ip_address *addr2) { int addrlen = 0; if (addr1->family != addr2->family) return 0; if (addr1->family == AF_INET) { addrlen = sizeof(struct in_addr); } if (addr1->family == AF_INET6) { addrlen = sizeof(struct in6_addr); } assert(addrlen); if (memcmp(addr1->addr, addr2->addr, addrlen) == 0) return 1; else return 0; } /* Copy a totem_ip_address */ void totemip_copy(struct totem_ip_address *addr1, const struct totem_ip_address *addr2) { memcpy(addr1, addr2, sizeof(struct totem_ip_address)); } void totemip_copy_endian_convert(struct totem_ip_address *addr1, const struct totem_ip_address *addr2) { addr1->nodeid = swab32(addr2->nodeid); addr1->family = swab16(addr2->family); memcpy(addr1->addr, addr2->addr, TOTEMIP_ADDRLEN); } /* * Multicast address range is 224.0.0.0 to 239.255.255.255 this * translates to the first 4 bits == 1110 (0xE). * http://en.wikipedia.org/wiki/Multicast_address */ int32_t totemip_is_mcast(struct totem_ip_address *ip_addr) { uint32_t addr = 0; memcpy (&addr, ip_addr->addr, sizeof (uint32_t)); if (ip_addr->family == AF_INET) { addr = ntohl(addr); if ((addr >> 28) != 0xE) { return -1; } } return 0; } /* For sorting etc. params are void * for qsort's benefit */ int totemip_compare(const void *a, const void *b) { int i; const struct totem_ip_address *totemip_a = (const struct totem_ip_address *)a; const struct totem_ip_address *totemip_b = (const struct totem_ip_address *)b; struct in_addr ipv4_a1; struct in_addr ipv4_a2; struct in6_addr ipv6_a1; struct in6_addr ipv6_a2; unsigned short family; /* * Use memcpy to align since totem_ip_address is unaligned on various archs */ memcpy (&family, &totemip_a->family, sizeof (unsigned short)); if (family == AF_INET) { memcpy (&ipv4_a1, totemip_a->addr, sizeof (struct in_addr)); memcpy (&ipv4_a2, totemip_b->addr, sizeof (struct in_addr)); if (ipv4_a1.s_addr == ipv4_a2.s_addr) { return (0); } if (htonl(ipv4_a1.s_addr) < htonl(ipv4_a2.s_addr)) { return -1; } else { return +1; } } else if (family == AF_INET6) { /* * We can only compare 8 bits at time for portability reasons */ memcpy (&ipv6_a1, totemip_a->addr, sizeof (struct in6_addr)); memcpy (&ipv6_a2, totemip_b->addr, sizeof (struct in6_addr)); for (i = 0; i < 16; i++) { int res = ipv6_a1.s6_addr[i] - ipv6_a2.s6_addr[i]; if (res) { return res; } } return 0; } else { /* * Family not set, should be! */ assert (0); } return 0; } /* Build a localhost totem_ip_address */ int totemip_localhost(int family, struct totem_ip_address *localhost) { const char *addr_text; memset (localhost, 0, sizeof (struct totem_ip_address)); if (family == AF_INET) { addr_text = LOCALHOST_IPV4; if (inet_pton(family, addr_text, (char *)&localhost->nodeid) <= 0) { return -1; } } else { addr_text = LOCALHOST_IPV6; } if (inet_pton(family, addr_text, (char *)localhost->addr) <= 0) return -1; localhost->family = family; return 0; } int totemip_localhost_check(const struct totem_ip_address *addr) { struct totem_ip_address localhost; if (totemip_localhost(addr->family, &localhost)) return 0; return totemip_equal(addr, &localhost); } const char *totemip_print(const struct totem_ip_address *addr) { static char buf[INET6_ADDRSTRLEN]; return (inet_ntop(addr->family, addr->addr, buf, sizeof(buf))); } /* Make a totem_ip_address into a usable sockaddr_storage */ int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, uint16_t port, struct sockaddr_storage *saddr, int *addrlen) { int ret = -1; if (ip_addr->family == AF_INET) { struct sockaddr_in *sin = (struct sockaddr_in *)saddr; memset(sin, 0, sizeof(struct sockaddr_in)); #ifdef HAVE_SOCK_SIN_LEN sin->sin_len = sizeof(struct sockaddr_in); #endif sin->sin_family = ip_addr->family; sin->sin_port = ntohs(port); memcpy(&sin->sin_addr, ip_addr->addr, sizeof(struct in_addr)); *addrlen = sizeof(struct sockaddr_in); ret = 0; } if (ip_addr->family == AF_INET6) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)saddr; memset(sin, 0, sizeof(struct sockaddr_in6)); #ifdef HAVE_SOCK_SIN6_LEN sin->sin6_len = sizeof(struct sockaddr_in6); #endif sin->sin6_family = ip_addr->family; sin->sin6_port = ntohs(port); sin->sin6_scope_id = 2; memcpy(&sin->sin6_addr, ip_addr->addr, sizeof(struct in6_addr)); *addrlen = sizeof(struct sockaddr_in6); ret = 0; } return ret; } /* Converts an address string string into a totem_ip_address. family can be AF_INET, AF_INET6 or 0 ("for "don't care") */ int totemip_parse(struct totem_ip_address *totemip, const char *addr, int family) { struct addrinfo *ainfo; struct addrinfo ahints; struct sockaddr_in *sa; struct sockaddr_in6 *sa6; int ret; memset(&ahints, 0, sizeof(ahints)); ahints.ai_socktype = SOCK_DGRAM; ahints.ai_protocol = IPPROTO_UDP; ahints.ai_family = family; /* Lookup the nodename address */ ret = getaddrinfo(addr, NULL, &ahints, &ainfo); if (ret) return -1; sa = (struct sockaddr_in *)ainfo->ai_addr; sa6 = (struct sockaddr_in6 *)ainfo->ai_addr; totemip->family = ainfo->ai_family; if (ainfo->ai_family == AF_INET) memcpy(totemip->addr, &sa->sin_addr, sizeof(struct in_addr)); else memcpy(totemip->addr, &sa6->sin6_addr, sizeof(struct in6_addr)); freeaddrinfo(ainfo); return 0; } /* Make a sockaddr_* into a totem_ip_address */ int totemip_sockaddr_to_totemip_convert(const struct sockaddr_storage *saddr, struct totem_ip_address *ip_addr) { int ret = -1; ip_addr->family = saddr->ss_family; ip_addr->nodeid = 0; if (saddr->ss_family == AF_INET) { const struct sockaddr_in *sin = (const struct sockaddr_in *)saddr; memcpy(ip_addr->addr, &sin->sin_addr, sizeof(struct in_addr)); ret = 0; } if (saddr->ss_family == AF_INET6) { const struct sockaddr_in6 *sin = (const struct sockaddr_in6 *)saddr; memcpy(ip_addr->addr, &sin->sin6_addr, sizeof(struct in6_addr)); ret = 0; } return ret; } -int totemip_getifaddrs(struct list_head *addrs) +int totemip_getifaddrs(struct qb_list_head *addrs) { struct ifaddrs *ifap, *ifa; struct totem_ip_if_address *if_addr; if (getifaddrs(&ifap) != 0) return (-1); - list_init(addrs); + qb_list_init(addrs); for (ifa = ifap; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr == NULL || ifa->ifa_netmask == NULL) continue ; if ((ifa->ifa_addr->sa_family != AF_INET && ifa->ifa_addr->sa_family != AF_INET6) || (ifa->ifa_netmask->sa_family != AF_INET && ifa->ifa_netmask->sa_family != AF_INET6 && ifa->ifa_netmask->sa_family != 0)) continue ; if (ifa->ifa_netmask->sa_family == 0) { ifa->ifa_netmask->sa_family = ifa->ifa_addr->sa_family; } if_addr = malloc(sizeof(struct totem_ip_if_address)); if (if_addr == NULL) { goto error_free_ifaddrs; } - list_init(&if_addr->list); + qb_list_init(&if_addr->list); memset(if_addr, 0, sizeof(struct totem_ip_if_address)); if_addr->interface_up = ifa->ifa_flags & IFF_UP; if_addr->interface_num = if_nametoindex(ifa->ifa_name); if_addr->name = strdup(ifa->ifa_name); if (if_addr->name == NULL) { goto error_free_addr; } if (totemip_sockaddr_to_totemip_convert((const struct sockaddr_storage *)ifa->ifa_addr, &if_addr->ip_addr) == -1) { goto error_free_addr_name; } if (totemip_sockaddr_to_totemip_convert((const struct sockaddr_storage *)ifa->ifa_netmask, &if_addr->mask_addr) == -1) { goto error_free_addr_name; } - list_add_tail(&if_addr->list, addrs); + qb_list_add_tail(&if_addr->list, addrs); } freeifaddrs(ifap); return (0); error_free_addr_name: free(if_addr->name); error_free_addr: free(if_addr); error_free_ifaddrs: totemip_freeifaddrs(addrs); freeifaddrs(ifap); return (-1); } -void totemip_freeifaddrs(struct list_head *addrs) +void totemip_freeifaddrs(struct qb_list_head *addrs) { struct totem_ip_if_address *if_addr; - struct list_head *list; + struct qb_list_head *list; - for (list = addrs->next; list != addrs;) { - if_addr = list_entry(list, struct totem_ip_if_address, list); - list = list->next; + qb_list_for_each(list, addrs) { + if_addr = qb_list_entry(list, struct totem_ip_if_address, list); free(if_addr->name); - list_del(&if_addr->list); + qb_list_del(&if_addr->list); free(if_addr); } - list_init(addrs); + qb_list_init(addrs); } int totemip_iface_check(struct totem_ip_address *bindnet, struct totem_ip_address *boundto, int *interface_up, int *interface_num, int mask_high_bit) { - struct list_head addrs; - struct list_head *list; + struct qb_list_head addrs; + struct qb_list_head *list; struct totem_ip_if_address *if_addr; struct totem_ip_address bn_netaddr, if_netaddr; socklen_t addr_len; socklen_t si; int res = -1; int exact_match_found = 0; int net_match_found = 0; *interface_up = 0; *interface_num = 0; if (totemip_getifaddrs(&addrs) == -1) { return (-1); } - for (list = addrs.next; list != &addrs; list = list->next) { - if_addr = list_entry(list, struct totem_ip_if_address, list); + qb_list_for_each(list, &addrs) { + if_addr = qb_list_entry(list, struct totem_ip_if_address, list); if (bindnet->family != if_addr->ip_addr.family) continue ; addr_len = 0; switch (bindnet->family) { case AF_INET: addr_len = sizeof(struct in_addr); break; case AF_INET6: addr_len = sizeof(struct in6_addr); break; } if (addr_len == 0) continue ; totemip_copy(&bn_netaddr, bindnet); totemip_copy(&if_netaddr, &if_addr->ip_addr); if (totemip_equal(&bn_netaddr, &if_netaddr)) { exact_match_found = 1; } for (si = 0; si < addr_len; si++) { bn_netaddr.addr[si] = bn_netaddr.addr[si] & if_addr->mask_addr.addr[si]; if_netaddr.addr[si] = if_netaddr.addr[si] & if_addr->mask_addr.addr[si]; } if (exact_match_found || (!net_match_found && totemip_equal(&bn_netaddr, &if_netaddr))) { totemip_copy(boundto, &if_addr->ip_addr); boundto->nodeid = bindnet->nodeid; *interface_up = if_addr->interface_up; *interface_num = if_addr->interface_num; if (boundto->family == AF_INET && boundto->nodeid == 0) { unsigned int nodeid = 0; memcpy (&nodeid, boundto->addr, sizeof (int)); #if __BYTE_ORDER == __LITTLE_ENDIAN nodeid = swab32 (nodeid); #endif if (mask_high_bit) { nodeid &= 0x7FFFFFFF; } boundto->nodeid = nodeid; } net_match_found = 1; res = 0; if (exact_match_found) { goto finished; } } } finished: totemip_freeifaddrs(&addrs); return (res); } #define TOTEMIP_UDP_HEADER_SIZE 8 #define TOTEMIP_IPV4_HEADER_SIZE 20 #define TOTEMIP_IPV6_HEADER_SIZE 40 size_t totemip_udpip_header_size(int family) { size_t header_size; header_size = 0; switch (family) { case AF_INET: header_size = TOTEMIP_UDP_HEADER_SIZE + TOTEMIP_IPV4_HEADER_SIZE; break; case AF_INET6: header_size = TOTEMIP_UDP_HEADER_SIZE + TOTEMIP_IPV6_HEADER_SIZE; break; } return (header_size); } diff --git a/exec/totemknet.c b/exec/totemknet.c index c8932c88..46bff46f 100644 --- a/exec/totemknet.c +++ b/exec/totemknet.c @@ -1,1018 +1,1017 @@ /* * Copyright (c) 2016 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield (ccaulfie@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include "totemknet.h" #include "util.h" #include #include #include #include #include #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) /* Buffers for sendmmsg/recvmmsg */ #define MAX_BUFFERS 10 /* Should match that used by cfg */ #define CFG_INTERFACE_STATUS_MAX_LEN 512 /* Log messages received from libknet */ #define LOG_BUFFER_SIZE 8192 struct totemknet_instance { struct crypto_instance *crypto_inst; qb_loop_t *poll_handle; knet_handle_t knet_handle; int link_mode; void *context; void (*totemknet_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemknet_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address, unsigned int link_no); void (*totemknet_mtu_changed) ( void *context, int net_mtu); void (*totemknet_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemknet_log_level_security; int totemknet_log_level_error; int totemknet_log_level_warning; int totemknet_log_level_notice; int totemknet_log_level_debug; int totemknet_subsys_id; int knet_subsys_id; void (*totemknet_log_printf) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))); void *knet_context; char iov_buffer[MAX_BUFFERS][FRAME_SIZE_MAX]; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; char *link_status[INTERFACE_MAX]; int num_links; struct totem_ip_address my_ids[INTERFACE_MAX]; uint16_t ip_port[INTERFACE_MAX]; int our_nodeid; struct totem_config *totem_config; totemsrp_stats_t *stats; struct totem_ip_address token_target; qb_loop_timer_handle timer_netif_check_timeout; qb_loop_timer_handle timer_merge_detect_timeout; int send_merge_detect_message; unsigned int merge_detect_messages_sent_before_timeout; int logpipes[2]; int knet_fd; }; struct work_item { const void *msg; unsigned int msg_len; struct totemknet_instance *instance; }; int totemknet_member_list_rebind_ip ( void *knet_context); static void totemknet_start_merge_detect_timeout( void *knet_context); static void totemknet_stop_merge_detect_timeout( void *knet_context); static void totemknet_instance_initialize (struct totemknet_instance *instance) { memset (instance, 0, sizeof (struct totemknet_instance)); } #define knet_log_printf(level, format, args...) \ do { \ instance->totemknet_log_printf ( \ level, instance->totemknet_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); #define libknet_log_printf(level, format, args...) \ do { \ instance->totemknet_log_printf ( \ level, instance->knet_subsys_id, \ __FUNCTION__, "libknet.h", __LINE__, \ (const char *)format, ##args); \ } while (0); #define KNET_LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemknet_log_printf ( \ level, instance->totemknet_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)", ##args, _error_ptr, err_num); \ } while(0) static int dst_host_filter_callback_fn(void *private_data, const unsigned char *outdata, ssize_t outdata_len, uint8_t tx_rx, uint16_t this_host_id, uint16_t src_host_id, int8_t *channel, uint16_t *dst_host_ids, size_t *dst_host_ids_entries) { struct totem_message_header *header = (struct totem_message_header *)outdata; // struct totemknet_instance *instance = (struct totemknet_instance *)private_data; int res; // knet_log_printf (LOGSYS_LEVEL_DEBUG, "Filter notification called: %s target nodeid=%d, len=%ld", tx_rx==KNET_NOTIFY_RX?"RX":"TX", header->target_nodeid, outdata_len); *channel = 0; if (header->target_nodeid) { dst_host_ids[0] = header->target_nodeid; *dst_host_ids_entries = 1; res = 0; /* unicast message */ } else { *dst_host_ids_entries = 0; res = 1; /* multicast message */ } return res; } static void socket_error_callback_fn(void *private_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno) { struct totemknet_instance *instance = (struct totemknet_instance *)private_data; knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet socket ERROR notification called: txrx=%d, error=%d, errorno=%d", tx_rx, error, errorno); if ((error == -1 && errorno != EAGAIN) || (error == 0)) { knet_handle_remove_datafd(instance->knet_handle, datafd); } } static void host_change_callback_fn(void *private_data, uint16_t host_id, uint8_t reachable, uint8_t remote, uint8_t external) { struct totemknet_instance *instance = (struct totemknet_instance *)private_data; // TODO: what? if anything. knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet host change callback. nodeid: %d reachable: %d", host_id, reachable); } static void pmtu_change_callback_fn(void *private_data, unsigned int data_mtu) { struct totemknet_instance *instance = (struct totemknet_instance *)private_data; knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet pMTU change: %d", data_mtu); // TODO: Check this instance->totemknet_mtu_changed(instance->context, data_mtu - totemip_udpip_header_size(AF_INET)); } int totemknet_crypto_set ( void *knet_context, const char *cipher_type, const char *hash_type) { return (0); } static inline void ucast_sendmsg ( struct totemknet_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { int res = 0; struct totem_message_header *header = (struct totem_message_header *)msg; header->target_nodeid = system_to->nodeid; /* * Transmit unicast message * An error here is recovered by totemsrp */ /* * If sending to ourself then just pass it through knet back to * the receive fn. knet does not do local->local delivery */ if (system_to->nodeid == instance->our_nodeid) { res = write (instance->knet_fd+1, msg, msg_len); if (res < 0) { KNET_LOGSYS_PERROR (errno, instance->totemknet_log_level_debug, "sendmsg(ucast-local) failed (non-critical)"); } } else { res = write (instance->knet_fd, msg, msg_len); if (res < 0) { KNET_LOGSYS_PERROR (errno, instance->totemknet_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } } static inline void mcast_sendmsg ( struct totemknet_instance *instance, const void *msg, unsigned int msg_len, int only_active) { int res; struct totem_message_header *header = (struct totem_message_header *)msg; header->target_nodeid = 0; // log_printf (LOGSYS_LEVEL_DEBUG, "totemknet: mcast_sendmsg. only_active=%d, len=%d", only_active, msg_len); res = write (instance->knet_fd, msg, msg_len); if (res < msg_len) { knet_log_printf (LOGSYS_LEVEL_DEBUG, "totemknet: mcast_send writev returned %d", res); } /* * Also send it to ourself, directly into * the receive fn. knet does not to local->local delivery */ res = write (instance->knet_fd+1, msg, msg_len); if (res < msg_len) { knet_log_printf (LOGSYS_LEVEL_DEBUG, "totemknet: mcast_send writev (local) returned %d", res); } if (!only_active || instance->send_merge_detect_message) { /* * Current message was sent to all nodes */ instance->merge_detect_messages_sent_before_timeout++; instance->send_merge_detect_message = 0; } } static int node_compare(const void *aptr, const void *bptr) { uint16_t a,b; a = *(uint16_t *)aptr; b = *(uint16_t *)bptr; return a > b; } int totemknet_ifaces_get (void *knet_context, char ***status, unsigned int *iface_count) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; struct knet_link_status link_status; uint16_t host_list[KNET_MAX_HOST]; size_t num_hosts; int i,j; char *ptr; int res = 0; /* * Don't do the whole 'link_info' bit if the caller just wants * a count of interfaces. */ if (status) { res = knet_host_get_host_list(instance->knet_handle, host_list, &num_hosts); if (res) { return (-1); } qsort(host_list, num_hosts, sizeof(uint16_t), node_compare); /* num_links is actually the highest link ID */ for (i=0; i <= instance->num_links; i++) { ptr = instance->link_status[i]; for (j=0; jknet_handle, host_list[j], i, &link_status); if (res == 0) { ptr[j] = '0' + (link_status.enabled | link_status.connected<<1 | link_status.dynconnected<<2); } else { ptr[j] += '?'; } } ptr[num_hosts] = '\0'; } *status = instance->link_status; } *iface_count = instance->num_links+1; return (res); } int totemknet_finalize ( void *knet_context) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; int res = 0; knet_log_printf(LOG_DEBUG, "totemknet: finalize"); qb_loop_poll_del (instance->poll_handle, instance->logpipes[0]); qb_loop_poll_del (instance->poll_handle, instance->knet_fd); knet_handle_free(instance->knet_handle); totemknet_stop_merge_detect_timeout(instance); return (res); } static int log_deliver_fn ( int fd, int revents, void *data) { struct totemknet_instance *instance = (struct totemknet_instance *)data; char buffer[LOG_BUFFER_SIZE]; int len; len = read(fd, buffer, sizeof(buffer)); if (len) { struct knet_log_msg *msg = (struct knet_log_msg *)buffer; switch (msg->msglevel) { case KNET_LOG_ERR: libknet_log_printf (LOGSYS_LEVEL_ERROR, "%s", msg->msg); break; case KNET_LOG_WARN: libknet_log_printf (LOGSYS_LEVEL_WARNING, "%s", msg->msg); break; case KNET_LOG_INFO: libknet_log_printf (LOGSYS_LEVEL_INFO, "%s", msg->msg); break; case KNET_LOG_DEBUG: libknet_log_printf (LOGSYS_LEVEL_DEBUG, "%s", msg->msg); break; } } return 0; } static int data_deliver_fn ( int fd, int revents, void *data) { struct totemknet_instance *instance = (struct totemknet_instance *)data; struct mmsghdr msg_recv[MAX_BUFFERS]; struct iovec iov_recv[MAX_BUFFERS]; struct sockaddr_storage system_from; int msgs_received; int i; for (i=0; iiov_buffer[i]; iov_recv[i].iov_len = FRAME_SIZE_MAX; msg_recv[i].msg_hdr.msg_name = &system_from; msg_recv[i].msg_hdr.msg_namelen = sizeof (struct sockaddr_storage); msg_recv[i].msg_hdr.msg_iov = &iov_recv[i]; msg_recv[i].msg_hdr.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv[i].msg_hdr.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv[i].msg_hdr.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv[i].msg_hdr.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv[i].msg_hdr.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv[i].msg_hdr.msg_accrightslen = 0; #endif } msgs_received = recvmmsg (fd, msg_recv, MAX_BUFFERS, MSG_NOSIGNAL | MSG_DONTWAIT, NULL); if (msgs_received == -1) { return (0); } for (i=0; istats_recv += msg_recv[i].msg_len; /* * Handle incoming message */ instance->totemknet_deliver_fn ( instance->context, instance->iov_buffer[i], msg_recv[i].msg_len); } return (0); } static void timer_function_netif_check_timeout ( void *data) { struct totemknet_instance *instance = (struct totemknet_instance *)data; int i; for (i=0; itotem_config->interface_count; i++) instance->totemknet_iface_change_fn (instance->context, &instance->my_ids[i], i); } /* * Create an instance */ int totemknet_initialize ( qb_loop_t *poll_handle, void **knet_context, struct totem_config *totem_config, totemsrp_stats_t *stats, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address, unsigned int link_no), void (*mtu_changed) ( void *context, int net_mtu), void (*target_set_completed) ( void *context)) { struct totemknet_instance *instance; int8_t channel=0; int res; int i; instance = malloc (sizeof (struct totemknet_instance)); if (instance == NULL) { return (-1); } totemknet_instance_initialize (instance); instance->totem_config = totem_config; instance->stats = stats; /* * Configure logging */ instance->totemknet_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemknet_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemknet_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemknet_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemknet_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemknet_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemknet_log_printf = totem_config->totem_logging_configuration.log_printf; instance->knet_subsys_id = _logsys_subsys_create("KNET", "libknet.h"); /* * Initialize local variables for totemknet */ instance->our_nodeid = instance->totem_config->node_id; for (i=0; i< instance->totem_config->interface_count; i++) { totemip_copy(&instance->my_ids[i], &totem_config->interfaces[i].bindnet); instance->my_ids[i].nodeid = instance->our_nodeid; instance->ip_port[i] = totem_config->interfaces[i].ip_port; /* Needed for totemsrp */ totem_config->interfaces[i].boundto.nodeid = instance->our_nodeid; } instance->poll_handle = poll_handle; instance->context = context; instance->totemknet_deliver_fn = deliver_fn; instance->totemknet_iface_change_fn = iface_change_fn; instance->totemknet_mtu_changed = mtu_changed; instance->totemknet_target_set_completed = target_set_completed; pipe(instance->logpipes); fcntl(instance->logpipes[0], F_SETFL, O_NONBLOCK); fcntl(instance->logpipes[1], F_SETFL, O_NONBLOCK); instance->knet_handle = knet_handle_new(instance->totem_config->node_id, instance->logpipes[1], KNET_LOG_DEBUG); if (!instance->knet_handle) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_CRIT, "knet_handle_new failed"); return (-1); } res = knet_handle_pmtud_setfreq(instance->knet_handle, 5); if (res) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_pmtud_setfreq failed"); } res = knet_handle_enable_filter(instance->knet_handle, instance, dst_host_filter_callback_fn); if (res) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_filter failed"); } res = knet_handle_enable_sock_notify(instance->knet_handle, instance, socket_error_callback_fn); if (res) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_sock_notify failed"); } res = knet_host_enable_status_change_notify(instance->knet_handle, instance, host_change_callback_fn); if (res) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_host_enable_status_change_notify failed"); } res = knet_handle_enable_pmtud_notify(instance->knet_handle, instance, pmtu_change_callback_fn); if (res) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_pmtud_notify failed"); } /* Get an fd into knet */ instance->knet_fd = 0; res = knet_handle_add_datafd(instance->knet_handle, &instance->knet_fd, &channel); if (res) { knet_log_printf(LOG_DEBUG, "knet_handle_add_datafd failed: %s", strerror(errno)); return -1; } /* Enable crypto if requested */ if (strcmp(instance->totem_config->crypto_cipher_type, "none") != 0) { struct knet_handle_crypto_cfg crypto_cfg; strcpy(crypto_cfg.crypto_model, "nss"); strcpy(crypto_cfg.crypto_cipher_type, instance->totem_config->crypto_cipher_type); strcpy(crypto_cfg.crypto_hash_type, instance->totem_config->crypto_hash_type); memcpy(crypto_cfg.private_key, instance->totem_config->private_key, instance->totem_config->private_key_len); crypto_cfg.private_key_len = instance->totem_config->private_key_len; res = knet_handle_crypto(instance->knet_handle, &crypto_cfg); if (res == -1) { knet_log_printf(LOG_ERR, "knet_handle_crypto failed: %s", strerror(errno)); return -1; } if (res == -2) { knet_log_printf(LOG_ERR, "knet_handle_crypto failed: -2"); return -1; } knet_log_printf(LOG_INFO, "kronosnet crypto initialized: %s/%s", crypto_cfg.crypto_cipher_type, crypto_cfg.crypto_hash_type); } knet_handle_setfwd(instance->knet_handle, 1); instance->link_mode = KNET_LINK_POLICY_PASSIVE; if (strcmp(instance->totem_config->link_mode, "active")==0) { instance->link_mode = KNET_LINK_POLICY_ACTIVE; } if (strcmp(instance->totem_config->link_mode, "rr")==0) { instance->link_mode = KNET_LINK_POLICY_RR; } for (i=0; ilink_status[i] = malloc(CFG_INTERFACE_STATUS_MAX_LEN); if (!instance->link_status[i]) { return -1; } } qb_loop_poll_add (instance->poll_handle, QB_LOOP_MED, instance->logpipes[0], POLLIN, instance, log_deliver_fn); qb_loop_poll_add (instance->poll_handle, QB_LOOP_HIGH, instance->knet_fd, POLLIN, instance, data_deliver_fn); /* * Upper layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); totemknet_start_merge_detect_timeout(instance); knet_log_printf (LOGSYS_LEVEL_INFO, "totemknet initialized"); *knet_context = instance; return (0); } void *totemknet_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemknet_buffer_release (void *ptr) { return free (ptr); } int totemknet_processor_count_set ( void *knet_context, int processor_count) { return (0); } int totemknet_recv_flush (void *knet_context) { return (0); } int totemknet_send_flush (void *knet_context) { return (0); } int totemknet_token_send ( void *knet_context, const void *msg, unsigned int msg_len) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemknet_mcast_flush_send ( void *knet_context, const void *msg, unsigned int msg_len) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; int res = 0; mcast_sendmsg (instance, msg, msg_len, 0); return (res); } int totemknet_mcast_noflush_send ( void *knet_context, const void *msg, unsigned int msg_len) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; int res = 0; mcast_sendmsg (instance, msg, msg_len, 1); return (res); } extern int totemknet_iface_check (void *knet_context) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; int res = 0; knet_log_printf(LOG_DEBUG, "totmeknet: iface_check"); return (res); } extern void totemknet_net_mtu_adjust (void *knet_context, struct totem_config *totem_config) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; totem_config->net_mtu -= totemip_udpip_header_size(AF_INET) + 23; knet_log_printf(LOG_DEBUG, "totemknet: Returning MTU of %d", totem_config->net_mtu); } int totemknet_token_target_set ( void *knet_context, const struct totem_ip_address *token_target) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemknet_target_set_completed (instance->context); return (res); } extern int totemknet_recv_mcast_empty ( void *knet_context) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; unsigned int res; struct sockaddr_storage system_from; struct mmsghdr msg_recv[MAX_BUFFERS]; struct iovec iov_recv[MAX_BUFFERS]; struct pollfd ufd; int nfds; int msg_processed = 0; int i; for (i=0; iiov_buffer[i]; iov_recv[i].iov_len = FRAME_SIZE_MAX; msg_recv[i].msg_hdr.msg_name = &system_from; msg_recv[i].msg_hdr.msg_namelen = sizeof (struct sockaddr_storage); msg_recv[i].msg_hdr.msg_iov = &iov_recv[i]; msg_recv[i].msg_hdr.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv[i].msg_hdr.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv[i].msg_hdr.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv[i].msg_hdr.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv[i].msg_hdr.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv[i].msg_hdr.msg_accrightslen = 0; #endif } do { ufd.fd = instance->knet_fd; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmmsg (instance->knet_fd, msg_recv, MAX_BUFFERS, MSG_NOSIGNAL | MSG_DONTWAIT, NULL); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); return (msg_processed); } int totemknet_member_add ( void *knet_context, const struct totem_ip_address *local, const struct totem_ip_address *member, int link_no) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; int err; int port = instance->ip_port[link_no]; struct sockaddr_storage remote_ss; struct sockaddr_storage local_ss; int addrlen; if (member->nodeid == instance->our_nodeid) { return 0; /* Don't add ourself, we send loopback messages directly */ } /* Keep track of the number of links */ if (link_no > instance->num_links) { instance->num_links = link_no; } knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: member_add: %d (%s), link=%d", member->nodeid, totemip_print(member), link_no); knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: local: %d (%s)", local->nodeid, totemip_print(local)); if (link_no == 0) { if (knet_host_add(instance->knet_handle, member->nodeid)) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_host_add"); return -1; } if (knet_host_set_policy(instance->knet_handle, member->nodeid, instance->link_mode)) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_set_policy failed"); return -1; } } /* Casts to remove const */ totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)member, port+link_no, &remote_ss, &addrlen); totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)local, port+link_no, &local_ss, &addrlen); err = knet_link_set_config(instance->knet_handle, member->nodeid, link_no, &local_ss, &remote_ss); if (err) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_config failed"); return -1; } knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: member_add: Setting link prio to %d", instance->totem_config->interfaces[link_no].knet_link_priority); err = knet_link_set_priority(instance->knet_handle, member->nodeid, link_no, instance->totem_config->interfaces[link_no].knet_link_priority); if (err) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_priority for nodeid %d, link %d failed", member->nodeid, link_no); } err = knet_link_set_ping_timers(instance->knet_handle, member->nodeid, link_no, instance->totem_config->interfaces[link_no].knet_ping_interval, instance->totem_config->interfaces[link_no].knet_ping_timeout, instance->totem_config->interfaces[link_no].knet_ping_precision); if (err) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_ping_timers for nodeid %d, link %d failed", member->nodeid, link_no); } err = knet_link_set_enable(instance->knet_handle, member->nodeid, link_no, 1); if (err) { KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_enable for nodeid %d, link %d failed", member->nodeid, link_no); return -1; } return (0); } int totemknet_member_remove ( void *knet_context, const struct totem_ip_address *token_target, int link_no) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; return knet_host_remove(instance->knet_handle, token_target->nodeid); } int totemknet_member_list_rebind_ip ( void *knet_context) { return (0); } static void timer_function_merge_detect_timeout ( void *data) { struct totemknet_instance *instance = (struct totemknet_instance *)data; if (instance->merge_detect_messages_sent_before_timeout == 0) { instance->send_merge_detect_message = 1; } instance->merge_detect_messages_sent_before_timeout = 0; totemknet_start_merge_detect_timeout(instance); } static void totemknet_start_merge_detect_timeout( void *knet_context) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; qb_loop_timer_add(instance->poll_handle, QB_LOOP_MED, instance->totem_config->merge_timeout * 2 * QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_merge_detect_timeout, &instance->timer_merge_detect_timeout); } static void totemknet_stop_merge_detect_timeout( void *knet_context) { struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; qb_loop_timer_del(instance->poll_handle, instance->timer_merge_detect_timeout); } diff --git a/exec/totempg.c b/exec/totempg.c index 927afa85..b1be3233 100644 --- a/exec/totempg.c +++ b/exec/totempg.c @@ -1,1537 +1,1523 @@ /* * Copyright (c) 2003-2005 MontaVista Software, Inc. * Copyright (c) 2005 OSDL. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * Author: Mark Haverkamp (markh@osdl.org) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * FRAGMENTATION AND PACKING ALGORITHM: * * Assemble the entire message into one buffer * if full fragment * store fragment into lengths list * for each full fragment * multicast fragment * set length and fragment fields of pg mesage * store remaining multicast into head of fragmentation data and set lens field * * If a message exceeds the maximum packet size allowed by the totem * single ring protocol, the protocol could lose forward progress. * Statically calculating the allowed data amount doesn't work because * the amount of data allowed depends on the number of fragments in * each message. In this implementation, the maximum fragment size * is dynamically calculated for each fragment added to the message. * It is possible for a message to be two bytes short of the maximum * packet size. This occurs when a message or collection of * messages + the mcast header + the lens are two bytes short of the * end of the packet. Since another len field consumes two bytes, the * len field would consume the rest of the packet without room for data. * * One optimization would be to forgo the final len field and determine * it from the size of the udp datagram. Then this condition would no * longer occur. */ /* * ASSEMBLY AND UNPACKING ALGORITHM: * * copy incoming packet into assembly data buffer indexed by current * location of end of fragment * * if not fragmented * deliver all messages in assembly data buffer * else * if msg_count > 1 and fragmented * deliver all messages except last message in assembly data buffer * copy last fragmented section to start of assembly data buffer * else * if msg_count = 1 and fragmented * do nothing * */ #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemsrp.h" #define min(a,b) ((a) < (b)) ? a : b struct totempg_mcast_header { short version; short type; }; #if !(defined(__i386__) || defined(__x86_64__)) /* * Need align on architectures different then i386 or x86_64 */ #define TOTEMPG_NEED_ALIGN 1 #endif /* * totempg_mcast structure * * header: Identify the mcast. * fragmented: Set if this message continues into next message * continuation: Set if this message is a continuation from last message * msg_count Indicates how many packed messages are contained * in the mcast. * Also, the size of each packed message and the messages themselves are * appended to the end of this structure when sent. */ struct totempg_mcast { struct totempg_mcast_header header; unsigned char fragmented; unsigned char continuation; unsigned short msg_count; /* * short msg_len[msg_count]; */ /* * data for messages */ }; /* * Maximum packet size for totem pg messages */ #define TOTEMPG_PACKET_SIZE (totempg_totem_config->net_mtu - \ sizeof (struct totempg_mcast)) /* * Local variables used for packing small messages */ static unsigned short mcast_packed_msg_lens[FRAME_SIZE_MAX]; static int mcast_packed_msg_count = 0; static int totempg_reserved = 1; static unsigned int totempg_size_limit; static totem_queue_level_changed_fn totem_queue_level_changed = NULL; static uint32_t totempg_threaded_mode = 0; static void *totemsrp_context; /* * Function and data used to log messages */ static int totempg_log_level_security; static int totempg_log_level_error; static int totempg_log_level_warning; static int totempg_log_level_notice; static int totempg_log_level_debug; static int totempg_subsys_id; static void (*totempg_log_printf) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...) __attribute__((format(printf, 6, 7))); struct totem_config *totempg_totem_config; static totempg_stats_t totempg_stats; enum throw_away_mode { THROW_AWAY_INACTIVE, THROW_AWAY_ACTIVE }; struct assembly { unsigned int nodeid; unsigned char data[MESSAGE_SIZE_MAX]; int index; unsigned char last_frag_num; enum throw_away_mode throw_away_mode; - struct list_head list; + struct qb_list_head list; }; static void assembly_deref (struct assembly *assembly); static int callback_token_received_fn (enum totem_callback_token_type type, const void *data); -DECLARE_LIST_INIT(assembly_list_inuse); +QB_LIST_DECLARE(assembly_list_inuse); /* * Free list is used both for transitional and operational assemblies */ -DECLARE_LIST_INIT(assembly_list_free); +QB_LIST_DECLARE(assembly_list_free); -DECLARE_LIST_INIT(assembly_list_inuse_trans); +QB_LIST_DECLARE(assembly_list_inuse_trans); -DECLARE_LIST_INIT(totempg_groups_list); +QB_LIST_DECLARE(totempg_groups_list); /* * Staging buffer for packed messages. Messages are staged in this buffer * before sending. Multiple messages may fit which cuts down on the * number of mcasts sent. If a message doesn't completely fit, then * the mcast header has a fragment bit set that says that there are more * data to follow. fragment_size is an index into the buffer. It indicates * the size of message data and where to place new message data. * fragment_contuation indicates whether the first packed message in * the buffer is a continuation of a previously packed fragment. */ static unsigned char *fragmentation_data; static int fragment_size = 0; static int fragment_continuation = 0; static int totempg_waiting_transack = 0; struct totempg_group_instance { void (*deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required); void (*confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); struct totempg_group *groups; int groups_cnt; int32_t q_level; - struct list_head list; + struct qb_list_head list; }; static unsigned char next_fragment = 1; static pthread_mutex_t totempg_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t callback_token_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t mcast_msg_mutex = PTHREAD_MUTEX_INITIALIZER; #define log_printf(level, format, args...) \ do { \ totempg_log_printf(level, \ totempg_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ format, ##args); \ } while (0); static int msg_count_send_ok (int msg_count); static int byte_count_send_ok (int byte_count); static void totempg_waiting_trans_ack_cb (int waiting_trans_ack) { log_printf(LOG_DEBUG, "waiting_trans_ack changed to %u", waiting_trans_ack); totempg_waiting_transack = waiting_trans_ack; } static struct assembly *assembly_ref (unsigned int nodeid) { struct assembly *assembly; - struct list_head *list; - struct list_head *active_assembly_list_inuse; + struct qb_list_head *list; + struct qb_list_head *active_assembly_list_inuse; if (totempg_waiting_transack) { active_assembly_list_inuse = &assembly_list_inuse_trans; } else { active_assembly_list_inuse = &assembly_list_inuse; } /* * Search inuse list for node id and return assembly buffer if found */ - for (list = active_assembly_list_inuse->next; - list != active_assembly_list_inuse; - list = list->next) { - - assembly = list_entry (list, struct assembly, list); + qb_list_for_each(list, active_assembly_list_inuse) { + assembly = qb_list_entry (list, struct assembly, list); if (nodeid == assembly->nodeid) { return (assembly); } } /* * Nothing found in inuse list get one from free list if available */ - if (list_empty (&assembly_list_free) == 0) { - assembly = list_entry (assembly_list_free.next, struct assembly, list); - list_del (&assembly->list); - list_add (&assembly->list, active_assembly_list_inuse); + if (qb_list_empty (&assembly_list_free) == 0) { + assembly = qb_list_entry (assembly_list_free.next, struct assembly, list); + qb_list_del (&assembly->list); + qb_list_add (&assembly->list, active_assembly_list_inuse); assembly->nodeid = nodeid; assembly->index = 0; assembly->last_frag_num = 0; assembly->throw_away_mode = THROW_AWAY_INACTIVE; return (assembly); } /* * Nothing available in inuse or free list, so allocate a new one */ assembly = malloc (sizeof (struct assembly)); /* * TODO handle memory allocation failure here */ assert (assembly); assembly->nodeid = nodeid; assembly->data[0] = 0; assembly->index = 0; assembly->last_frag_num = 0; assembly->throw_away_mode = THROW_AWAY_INACTIVE; - list_init (&assembly->list); - list_add (&assembly->list, active_assembly_list_inuse); + qb_list_init (&assembly->list); + qb_list_add (&assembly->list, active_assembly_list_inuse); return (assembly); } static void assembly_deref (struct assembly *assembly) { - - list_del (&assembly->list); - list_add (&assembly->list, &assembly_list_free); + qb_list_del (&assembly->list); + qb_list_add (&assembly->list, &assembly_list_free); } static void assembly_deref_from_normal_and_trans (int nodeid) { int j; - struct list_head *list, *list_next; - struct list_head *active_assembly_list_inuse; + struct qb_list_head *list; + struct qb_list_head *active_assembly_list_inuse; struct assembly *assembly; for (j = 0; j < 2; j++) { if (j == 0) { active_assembly_list_inuse = &assembly_list_inuse; } else { active_assembly_list_inuse = &assembly_list_inuse_trans; } - for (list = active_assembly_list_inuse->next; - list != active_assembly_list_inuse; - list = list_next) { - - list_next = list->next; - assembly = list_entry (list, struct assembly, list); + qb_list_for_each(list, active_assembly_list_inuse) { + assembly = qb_list_entry (list, struct assembly, list); if (nodeid == assembly->nodeid) { - list_del (&assembly->list); - list_add (&assembly->list, &assembly_list_free); + qb_list_del (&assembly->list); + qb_list_add (&assembly->list, &assembly_list_free); } } } } static inline void app_confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int i; struct totempg_group_instance *instance; - struct list_head *list; + struct qb_list_head *list; /* * For every leaving processor, add to free list * This also has the side effect of clearing out the dataset * In the leaving processor's assembly buffer. */ for (i = 0; i < left_list_entries; i++) { assembly_deref_from_normal_and_trans (left_list[i]); } - for (list = totempg_groups_list.next; - list != &totempg_groups_list; - list = list->next) { - - instance = list_entry (list, struct totempg_group_instance, list); + qb_list_for_each(list, &totempg_groups_list) { + instance = qb_list_entry (list, struct totempg_group_instance, list); if (instance->confchg_fn) { instance->confchg_fn ( configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } } } static inline void group_endian_convert ( void *msg, int msg_len) { unsigned short *group_len; int i; char *aligned_msg; #ifdef TOTEMPG_NEED_ALIGN /* * Align data structure for not i386 or x86_64 */ if ((size_t)msg % 4 != 0) { aligned_msg = alloca(msg_len); memcpy(aligned_msg, msg, msg_len); } else { aligned_msg = msg; } #else aligned_msg = msg; #endif group_len = (unsigned short *)aligned_msg; group_len[0] = swab16(group_len[0]); for (i = 1; i < group_len[0] + 1; i++) { group_len[i] = swab16(group_len[i]); } if (aligned_msg != msg) { memcpy(msg, aligned_msg, msg_len); } } static inline int group_matches ( struct iovec *iovec, unsigned int iov_len, struct totempg_group *groups_b, unsigned int group_b_cnt, unsigned int *adjust_iovec) { unsigned short *group_len; char *group_name; int i; int j; #ifdef TOTEMPG_NEED_ALIGN struct iovec iovec_aligned = { NULL, 0 }; #endif assert (iov_len == 1); #ifdef TOTEMPG_NEED_ALIGN /* * Align data structure for not i386 or x86_64 */ if ((size_t)iovec->iov_base % 4 != 0) { iovec_aligned.iov_base = alloca(iovec->iov_len); memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len); iovec_aligned.iov_len = iovec->iov_len; iovec = &iovec_aligned; } #endif group_len = (unsigned short *)iovec->iov_base; group_name = ((char *)iovec->iov_base) + sizeof (unsigned short) * (group_len[0] + 1); /* * Calculate amount to adjust the iovec by before delivering to app */ *adjust_iovec = sizeof (unsigned short) * (group_len[0] + 1); for (i = 1; i < group_len[0] + 1; i++) { *adjust_iovec += group_len[i]; } /* * Determine if this message should be delivered to this instance */ for (i = 1; i < group_len[0] + 1; i++) { for (j = 0; j < group_b_cnt; j++) { if ((group_len[i] == groups_b[j].group_len) && (memcmp (groups_b[j].group, group_name, group_len[i]) == 0)) { return (1); } } group_name += group_len[i]; } return (0); } static inline void app_deliver_fn ( unsigned int nodeid, void *msg, unsigned int msg_len, int endian_conversion_required) { struct totempg_group_instance *instance; struct iovec stripped_iovec; unsigned int adjust_iovec; struct iovec *iovec; - struct list_head *list; + struct qb_list_head *list; struct iovec aligned_iovec = { NULL, 0 }; if (endian_conversion_required) { group_endian_convert (msg, msg_len); } /* * TODO: segmentation/assembly need to be redesigned to provide aligned access * in all cases to avoid memory copies on non386 archs. Probably broke backwars * compatibility */ #ifdef TOTEMPG_NEED_ALIGN /* * Align data structure for not i386 or x86_64 */ aligned_iovec.iov_base = alloca(msg_len); aligned_iovec.iov_len = msg_len; memcpy(aligned_iovec.iov_base, msg, msg_len); #else aligned_iovec.iov_base = msg; aligned_iovec.iov_len = msg_len; #endif iovec = &aligned_iovec; - for (list = totempg_groups_list.next; - list != &totempg_groups_list; - list = list->next) { - - instance = list_entry (list, struct totempg_group_instance, list); + qb_list_for_each(list, &totempg_groups_list) { + instance = qb_list_entry (list, struct totempg_group_instance, list); if (group_matches (iovec, 1, instance->groups, instance->groups_cnt, &adjust_iovec)) { stripped_iovec.iov_len = iovec->iov_len - adjust_iovec; stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec; #ifdef TOTEMPG_NEED_ALIGN /* * Align data structure for not i386 or x86_64 */ if ((char *)iovec->iov_base + adjust_iovec % 4 != 0) { /* * Deal with misalignment */ stripped_iovec.iov_base = alloca (stripped_iovec.iov_len); memcpy (stripped_iovec.iov_base, (char *)iovec->iov_base + adjust_iovec, stripped_iovec.iov_len); } #endif instance->deliver_fn ( nodeid, stripped_iovec.iov_base, stripped_iovec.iov_len, endian_conversion_required); } } } static void totempg_confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { // TODO optimize this app_confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } static void totempg_deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { struct totempg_mcast *mcast; unsigned short *msg_lens; int i; struct assembly *assembly; char header[FRAME_SIZE_MAX]; int msg_count; int continuation; int start; const char *data; int datasize; struct iovec iov_delv; assembly = assembly_ref (nodeid); assert (assembly); /* * Assemble the header into one block of data and * assemble the packet contents into one block of data to simplify delivery */ mcast = (struct totempg_mcast *)msg; if (endian_conversion_required) { mcast->msg_count = swab16 (mcast->msg_count); } msg_count = mcast->msg_count; datasize = sizeof (struct totempg_mcast) + msg_count * sizeof (unsigned short); memcpy (header, msg, datasize); data = msg; msg_lens = (unsigned short *) (header + sizeof (struct totempg_mcast)); if (endian_conversion_required) { for (i = 0; i < mcast->msg_count; i++) { msg_lens[i] = swab16 (msg_lens[i]); } } memcpy (&assembly->data[assembly->index], &data[datasize], msg_len - datasize); /* * If the last message in the buffer is a fragment, then we * can't deliver it. We'll first deliver the full messages * then adjust the assembly buffer so we can add the rest of the * fragment when it arrives. */ msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count; continuation = mcast->continuation; iov_delv.iov_base = (void *)&assembly->data[0]; iov_delv.iov_len = assembly->index + msg_lens[0]; /* * Make sure that if this message is a continuation, that it * matches the sequence number of the previous fragment. * Also, if the first packed message is a continuation * of a previous message, but the assembly buffer * is empty, then we need to discard it since we can't * assemble a complete message. Likewise, if this message isn't a * continuation and the assembly buffer is empty, we have to discard * the continued message. */ start = 0; if (assembly->throw_away_mode == THROW_AWAY_ACTIVE) { /* Throw away the first msg block */ if (mcast->fragmented == 0 || mcast->fragmented == 1) { assembly->throw_away_mode = THROW_AWAY_INACTIVE; assembly->index += msg_lens[0]; iov_delv.iov_base = (void *)&assembly->data[assembly->index]; iov_delv.iov_len = msg_lens[1]; start = 1; } } else if (assembly->throw_away_mode == THROW_AWAY_INACTIVE) { if (continuation == assembly->last_frag_num) { assembly->last_frag_num = mcast->fragmented; for (i = start; i < msg_count; i++) { app_deliver_fn(nodeid, iov_delv.iov_base, iov_delv.iov_len, endian_conversion_required); assembly->index += msg_lens[i]; iov_delv.iov_base = (void *)&assembly->data[assembly->index]; if (i < (msg_count - 1)) { iov_delv.iov_len = msg_lens[i + 1]; } } } else { log_printf (LOG_DEBUG, "fragmented continuation %u is not equal to assembly last_frag_num %u", continuation, assembly->last_frag_num); assembly->throw_away_mode = THROW_AWAY_ACTIVE; } } if (mcast->fragmented == 0) { /* * End of messages, dereference assembly struct */ assembly->last_frag_num = 0; assembly->index = 0; assembly_deref (assembly); } else { /* * Message is fragmented, keep around assembly list */ if (mcast->msg_count > 1) { memmove (&assembly->data[0], &assembly->data[assembly->index], msg_lens[msg_count]); assembly->index = 0; } assembly->index += msg_lens[msg_count]; } } /* * Totem Process Group Abstraction * depends on poll abstraction, POSIX, IPV4 */ void *callback_token_received_handle; int callback_token_received_fn (enum totem_callback_token_type type, const void *data) { struct totempg_mcast mcast; struct iovec iovecs[3]; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&mcast_msg_mutex); } if (mcast_packed_msg_count == 0) { if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&mcast_msg_mutex); } return (0); } if (totemsrp_avail(totemsrp_context) == 0) { if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&mcast_msg_mutex); } return (0); } mcast.header.version = 0; mcast.header.type = 0; mcast.fragmented = 0; /* * Was the first message in this buffer a continuation of a * fragmented message? */ mcast.continuation = fragment_continuation; fragment_continuation = 0; mcast.msg_count = mcast_packed_msg_count; iovecs[0].iov_base = (void *)&mcast; iovecs[0].iov_len = sizeof (struct totempg_mcast); iovecs[1].iov_base = (void *)mcast_packed_msg_lens; iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short); iovecs[2].iov_base = (void *)&fragmentation_data[0]; iovecs[2].iov_len = fragment_size; (void)totemsrp_mcast (totemsrp_context, iovecs, 3, 0); mcast_packed_msg_count = 0; fragment_size = 0; if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&mcast_msg_mutex); } return (0); } /* * Initialize the totem process group abstraction */ int totempg_initialize ( qb_loop_t *poll_handle, struct totem_config *totem_config) { int res; totempg_totem_config = totem_config; totempg_log_level_security = totem_config->totem_logging_configuration.log_level_security; totempg_log_level_error = totem_config->totem_logging_configuration.log_level_error; totempg_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; totempg_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; totempg_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; totempg_log_printf = totem_config->totem_logging_configuration.log_printf; totempg_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; fragmentation_data = malloc (TOTEMPG_PACKET_SIZE); if (fragmentation_data == 0) { return (-1); } totemsrp_net_mtu_adjust (totem_config); res = totemsrp_initialize ( poll_handle, &totemsrp_context, totem_config, &totempg_stats, totempg_deliver_fn, totempg_confchg_fn, totempg_waiting_trans_ack_cb); totemsrp_callback_token_create ( totemsrp_context, &callback_token_received_handle, TOTEM_CALLBACK_TOKEN_RECEIVED, 0, callback_token_received_fn, 0); totempg_size_limit = (totemsrp_avail(totemsrp_context) - 1) * (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16); - list_init (&totempg_groups_list); + qb_list_init (&totempg_groups_list); return (res); } void totempg_finalize (void) { if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); } totemsrp_finalize (totemsrp_context); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&totempg_mutex); } } /* * Multicast a message */ static int mcast_msg ( struct iovec *iovec_in, unsigned int iov_len, int guarantee) { int res = 0; struct totempg_mcast mcast; struct iovec iovecs[3]; struct iovec iovec[64]; int i; int dest, src; int max_packet_size = 0; int copy_len = 0; int copy_base = 0; int total_size = 0; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&mcast_msg_mutex); } totemsrp_event_signal (totemsrp_context, TOTEM_EVENT_NEW_MSG, 1); /* * Remove zero length iovectors from the list */ assert (iov_len < 64); for (dest = 0, src = 0; src < iov_len; src++) { if (iovec_in[src].iov_len) { memcpy (&iovec[dest++], &iovec_in[src], sizeof (struct iovec)); } } iov_len = dest; max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof (unsigned short) * (mcast_packed_msg_count + 1)); mcast_packed_msg_lens[mcast_packed_msg_count] = 0; /* * Check if we would overwrite new message queue */ for (i = 0; i < iov_len; i++) { total_size += iovec[i].iov_len; } if (byte_count_send_ok (total_size + sizeof(unsigned short) * (mcast_packed_msg_count)) == 0) { if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&mcast_msg_mutex); } return(-1); } mcast.header.version = 0; for (i = 0; i < iov_len; ) { mcast.fragmented = 0; mcast.continuation = fragment_continuation; copy_len = iovec[i].iov_len - copy_base; /* * If it all fits with room left over, copy it in. * We need to leave at least sizeof(short) + 1 bytes in the * fragment_buffer on exit so that max_packet_size + fragment_size * doesn't exceed the size of the fragment_buffer on the next call. */ if ((copy_len + fragment_size) < (max_packet_size - sizeof (unsigned short))) { memcpy (&fragmentation_data[fragment_size], (char *)iovec[i].iov_base + copy_base, copy_len); fragment_size += copy_len; mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len; next_fragment = 1; copy_len = 0; copy_base = 0; i++; continue; /* * If it just fits or is too big, then send out what fits. */ } else { unsigned char *data_ptr; copy_len = min(copy_len, max_packet_size - fragment_size); if( copy_len == max_packet_size ) data_ptr = (unsigned char *)iovec[i].iov_base + copy_base; else { data_ptr = fragmentation_data; memcpy (&fragmentation_data[fragment_size], (unsigned char *)iovec[i].iov_base + copy_base, copy_len); } memcpy (&fragmentation_data[fragment_size], (unsigned char *)iovec[i].iov_base + copy_base, copy_len); mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len; /* * if we're not on the last iovec or the iovec is too large to * fit, then indicate a fragment. This also means that the next * message will have the continuation of this one. */ if ((i < (iov_len - 1)) || ((copy_base + copy_len) < iovec[i].iov_len)) { if (!next_fragment) { next_fragment++; } fragment_continuation = next_fragment; mcast.fragmented = next_fragment++; assert(fragment_continuation != 0); assert(mcast.fragmented != 0); } else { fragment_continuation = 0; } /* * assemble the message and send it */ mcast.msg_count = ++mcast_packed_msg_count; iovecs[0].iov_base = (void *)&mcast; iovecs[0].iov_len = sizeof(struct totempg_mcast); iovecs[1].iov_base = (void *)mcast_packed_msg_lens; iovecs[1].iov_len = mcast_packed_msg_count * sizeof(unsigned short); iovecs[2].iov_base = (void *)data_ptr; iovecs[2].iov_len = max_packet_size; assert (totemsrp_avail(totemsrp_context) > 0); res = totemsrp_mcast (totemsrp_context, iovecs, 3, guarantee); if (res == -1) { goto error_exit; } /* * Recalculate counts and indexes for the next. */ mcast_packed_msg_lens[0] = 0; mcast_packed_msg_count = 0; fragment_size = 0; max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof(unsigned short)); /* * If the iovec all fit, go to the next iovec */ if ((copy_base + copy_len) == iovec[i].iov_len) { copy_len = 0; copy_base = 0; i++; /* * Continue with the rest of the current iovec. */ } else { copy_base += copy_len; } } } /* * Bump only if we added message data. This may be zero if * the last buffer just fit into the fragmentation_data buffer * and we were at the last iovec. */ if (mcast_packed_msg_lens[mcast_packed_msg_count]) { mcast_packed_msg_count++; } error_exit: if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&mcast_msg_mutex); } return (res); } /* * Determine if a message of msg_size could be queued */ static int msg_count_send_ok ( int msg_count) { int avail = 0; avail = totemsrp_avail (totemsrp_context); totempg_stats.msg_queue_avail = avail; return ((avail - totempg_reserved) > msg_count); } static int byte_count_send_ok ( int byte_count) { unsigned int msg_count = 0; int avail = 0; avail = totemsrp_avail (totemsrp_context); msg_count = (byte_count / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1; return (avail >= msg_count); } static int send_reserve ( int msg_size) { unsigned int msg_count = 0; msg_count = (msg_size / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1; totempg_reserved += msg_count; totempg_stats.msg_reserved = totempg_reserved; return (msg_count); } static void send_release ( int msg_count) { totempg_reserved -= msg_count; totempg_stats.msg_reserved = totempg_reserved; } #ifndef HAVE_SMALL_MEMORY_FOOTPRINT #undef MESSAGE_QUEUE_MAX #define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totempg_totem_config->net_mtu) #endif /* HAVE_SMALL_MEMORY_FOOTPRINT */ static uint32_t q_level_precent_used(void) { return (100 - (((totemsrp_avail(totemsrp_context) - totempg_reserved) * 100) / MESSAGE_QUEUE_MAX)); } int totempg_callback_token_create ( void **handle_out, enum totem_callback_token_type type, int delete, int (*callback_fn) (enum totem_callback_token_type type, const void *), const void *data) { unsigned int res; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&callback_token_mutex); } res = totemsrp_callback_token_create (totemsrp_context, handle_out, type, delete, callback_fn, data); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&callback_token_mutex); } return (res); } void totempg_callback_token_destroy ( void *handle_out) { if (totempg_threaded_mode == 1) { pthread_mutex_lock (&callback_token_mutex); } totemsrp_callback_token_destroy (totemsrp_context, handle_out); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&callback_token_mutex); } } /* * vi: set autoindent tabstop=4 shiftwidth=4 : */ int totempg_groups_initialize ( void **totempg_groups_instance, void (*deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required), void (*confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id)) { struct totempg_group_instance *instance; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); } instance = malloc (sizeof (struct totempg_group_instance)); if (instance == NULL) { goto error_exit; } instance->deliver_fn = deliver_fn; instance->confchg_fn = confchg_fn; instance->groups = 0; instance->groups_cnt = 0; instance->q_level = QB_LOOP_MED; - list_init (&instance->list); - list_add (&instance->list, &totempg_groups_list); + qb_list_init (&instance->list); + qb_list_add (&instance->list, &totempg_groups_list); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&totempg_mutex); } *totempg_groups_instance = instance; return (0); error_exit: if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&totempg_mutex); } return (-1); } int totempg_groups_join ( void *totempg_groups_instance, const struct totempg_group *groups, size_t group_cnt) { struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; struct totempg_group *new_groups; unsigned int res = 0; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); } new_groups = realloc (instance->groups, sizeof (struct totempg_group) * (instance->groups_cnt + group_cnt)); if (new_groups == 0) { res = ENOMEM; goto error_exit; } memcpy (&new_groups[instance->groups_cnt], groups, group_cnt * sizeof (struct totempg_group)); instance->groups = new_groups; instance->groups_cnt += group_cnt; error_exit: if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&totempg_mutex); } return (res); } int totempg_groups_leave ( void *totempg_groups_instance, const struct totempg_group *groups, size_t group_cnt) { if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); } if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&totempg_mutex); } return (0); } #define MAX_IOVECS_FROM_APP 32 #define MAX_GROUPS_PER_MSG 32 int totempg_groups_mcast_joined ( void *totempg_groups_instance, const struct iovec *iovec, unsigned int iov_len, int guarantee) { struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; unsigned short group_len[MAX_GROUPS_PER_MSG + 1]; struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP]; int i; unsigned int res; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); } /* * Build group_len structure and the iovec_mcast structure */ group_len[0] = instance->groups_cnt; for (i = 0; i < instance->groups_cnt; i++) { group_len[i + 1] = instance->groups[i].group_len; iovec_mcast[i + 1].iov_len = instance->groups[i].group_len; iovec_mcast[i + 1].iov_base = (void *) instance->groups[i].group; } iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short); iovec_mcast[0].iov_base = group_len; for (i = 0; i < iov_len; i++) { iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len; iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base; } res = mcast_msg (iovec_mcast, iov_len + instance->groups_cnt + 1, guarantee); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&totempg_mutex); } return (res); } static void check_q_level( void *totempg_groups_instance) { struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; int32_t old_level = instance->q_level; int32_t percent_used = q_level_precent_used(); if (percent_used >= 75 && instance->q_level != TOTEM_Q_LEVEL_CRITICAL) { instance->q_level = TOTEM_Q_LEVEL_CRITICAL; } else if (percent_used < 30 && instance->q_level != TOTEM_Q_LEVEL_LOW) { instance->q_level = TOTEM_Q_LEVEL_LOW; } else if (percent_used > 40 && percent_used < 50 && instance->q_level != TOTEM_Q_LEVEL_GOOD) { instance->q_level = TOTEM_Q_LEVEL_GOOD; } else if (percent_used > 60 && percent_used < 70 && instance->q_level != TOTEM_Q_LEVEL_HIGH) { instance->q_level = TOTEM_Q_LEVEL_HIGH; } if (totem_queue_level_changed && old_level != instance->q_level) { totem_queue_level_changed(instance->q_level); } } void totempg_check_q_level( void *totempg_groups_instance) { struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; check_q_level(instance); } int totempg_groups_joined_reserve ( void *totempg_groups_instance, const struct iovec *iovec, unsigned int iov_len) { struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; unsigned int size = 0; unsigned int i; unsigned int reserved = 0; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); pthread_mutex_lock (&mcast_msg_mutex); } for (i = 0; i < instance->groups_cnt; i++) { size += instance->groups[i].group_len; } for (i = 0; i < iov_len; i++) { size += iovec[i].iov_len; } if (size >= totempg_size_limit) { reserved = -1; goto error_exit; } if (byte_count_send_ok (size)) { reserved = send_reserve (size); } else { reserved = 0; } error_exit: check_q_level(instance); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&mcast_msg_mutex); pthread_mutex_unlock (&totempg_mutex); } return (reserved); } int totempg_groups_joined_release (int msg_count) { if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); pthread_mutex_lock (&mcast_msg_mutex); } send_release (msg_count); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&mcast_msg_mutex); pthread_mutex_unlock (&totempg_mutex); } return 0; } int totempg_groups_mcast_groups ( void *totempg_groups_instance, int guarantee, const struct totempg_group *groups, size_t groups_cnt, const struct iovec *iovec, unsigned int iov_len) { unsigned short group_len[MAX_GROUPS_PER_MSG + 1]; struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP]; int i; unsigned int res; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); } /* * Build group_len structure and the iovec_mcast structure */ group_len[0] = groups_cnt; for (i = 0; i < groups_cnt; i++) { group_len[i + 1] = groups[i].group_len; iovec_mcast[i + 1].iov_len = groups[i].group_len; iovec_mcast[i + 1].iov_base = (void *) groups[i].group; } iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short); iovec_mcast[0].iov_base = group_len; for (i = 0; i < iov_len; i++) { iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len; iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base; } res = mcast_msg (iovec_mcast, iov_len + groups_cnt + 1, guarantee); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&totempg_mutex); } return (res); } /* * Returns -1 if error, 0 if can't send, 1 if can send the message */ int totempg_groups_send_ok_groups ( void *totempg_groups_instance, const struct totempg_group *groups, size_t groups_cnt, const struct iovec *iovec, unsigned int iov_len) { unsigned int size = 0; unsigned int i; unsigned int res; if (totempg_threaded_mode == 1) { pthread_mutex_lock (&totempg_mutex); } for (i = 0; i < groups_cnt; i++) { size += groups[i].group_len; } for (i = 0; i < iov_len; i++) { size += iovec[i].iov_len; } res = msg_count_send_ok (size); if (totempg_threaded_mode == 1) { pthread_mutex_unlock (&totempg_mutex); } return (res); } int totempg_ifaces_get ( unsigned int nodeid, struct totem_ip_address *interfaces, unsigned int interfaces_size, char ***status, unsigned int *iface_count) { int res; res = totemsrp_ifaces_get ( totemsrp_context, nodeid, interfaces, interfaces_size, status, iface_count); return (res); } void totempg_event_signal (enum totem_event_type type, int value) { totemsrp_event_signal (totemsrp_context, type, value); } void* totempg_get_stats (void) { return &totempg_stats; } int totempg_crypto_set ( const char *cipher_type, const char *hash_type) { int res; res = totemsrp_crypto_set (totemsrp_context, cipher_type, hash_type); return (res); } int totempg_ring_reenable (void) { int res; res = totemsrp_ring_reenable (totemsrp_context); return (res); } #define ONE_IFACE_LEN 63 const char *totempg_ifaces_print (unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[ONE_IFACE_LEN+1]; struct totem_ip_address interfaces[INTERFACE_MAX]; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get (nodeid, interfaces, INTERFACE_MAX, NULL, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } res = totempg_ifaces_get (nodeid, interfaces, INTERFACE_MAX, NULL, &iface_count); for (i = 0; i < iface_count; i++) { snprintf (one_iface, ONE_IFACE_LEN, "r(%d) ip(%s) ", i, totemip_print (&interfaces[i])); strcat (iface_string, one_iface); } return (iface_string); } unsigned int totempg_my_nodeid_get (void) { return (totemsrp_my_nodeid_get(totemsrp_context)); } int totempg_my_family_get (void) { return (totemsrp_my_family_get(totemsrp_context)); } extern void totempg_service_ready_register ( void (*totem_service_ready) (void)) { totemsrp_service_ready_register (totemsrp_context, totem_service_ready); } void totempg_queue_level_register_callback (totem_queue_level_changed_fn fn) { totem_queue_level_changed = fn; } extern int totempg_member_add ( const struct totem_ip_address *member, int ring_no) { return totemsrp_member_add (totemsrp_context, member, ring_no); } extern int totempg_member_remove ( const struct totem_ip_address *member, int ring_no) { return totemsrp_member_remove (totemsrp_context, member, ring_no); } void totempg_threaded_mode_enable (void) { totempg_threaded_mode = 1; totemsrp_threaded_mode_enable (totemsrp_context); } void totempg_trans_ack (void) { totemsrp_trans_ack (totemsrp_context); } diff --git a/exec/totemsrp.c b/exec/totemsrp.c index 3cee4576..34a2105d 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -1,4784 +1,4779 @@ /* * Copyright (c) 2003-2006 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * The first version of this code was based upon Yair Amir's PhD thesis: * http://www.cs.jhu.edu/~yairamir/phd.ps) (ch4,5). * * The current version of totemsrp implements the Totem protocol specified in: * http://citeseer.ist.psu.edu/amir95totem.html * * The deviations from the above published protocols are: * - encryption of message contents with nss * - authentication of meessage contents with SHA1/HMAC * - token hold mode where token doesn't rotate on unused ring - reduces cpu * usage on 1.6ghz xeon from 35% to less then .1 % as measured by top */ #include #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include -#include #define LOGSYS_UTILS_ONLY 1 #include #include "totemsrp.h" #include "totemnet.h" #include "cs_queue.h" #define LOCALHOST_IP inet_addr("127.0.0.1") #define QUEUE_RTR_ITEMS_SIZE_MAX 16384 /* allow 16384 retransmit items */ #define RETRANS_MESSAGE_QUEUE_SIZE_MAX 16384 /* allow 500 messages to be queued */ #define RECEIVED_MESSAGE_QUEUE_SIZE_MAX 500 /* allow 500 messages to be queued */ #define MAXIOVS 5 #define RETRANSMIT_ENTRIES_MAX 30 #define TOKEN_SIZE_MAX 64000 /* bytes */ #define LEAVE_DUMMY_NODEID 0 /* * SRP address. * CC: TODO: Can we remove IP address from this and just use nodeids? */ struct srp_addr { uint8_t no_addrs; struct totem_ip_address addr[INTERFACE_MAX]; }; /* * Rollover handling: * SEQNO_START_MSG is the starting sequence number after a new configuration * This should remain zero, unless testing overflow in which case * 0x7ffff000 and 0xfffff000 are good starting values. * * SEQNO_START_TOKEN is the starting sequence number after a new configuration * for a token. This should remain zero, unless testing overflow in which * case 07fffff00 or 0xffffff00 are good starting values. */ #define SEQNO_START_MSG 0x0 #define SEQNO_START_TOKEN 0x0 /* * These can be used ot test different rollover points * #define SEQNO_START_MSG 0xfffffe00 * #define SEQNO_START_TOKEN 0xfffffe00 */ /* * These can be used to test the error recovery algorithms * #define TEST_DROP_ORF_TOKEN_PERCENTAGE 30 * #define TEST_DROP_COMMIT_TOKEN_PERCENTAGE 30 * #define TEST_DROP_MCAST_PERCENTAGE 50 * #define TEST_RECOVERY_MSG_COUNT 300 */ /* * we compare incoming messages to determine if their endian is * different - if so convert them * * do not change */ #define ENDIAN_LOCAL 0xff22 enum message_type { MESSAGE_TYPE_ORF_TOKEN = 0, /* Ordering, Reliability, Flow (ORF) control Token */ MESSAGE_TYPE_MCAST = 1, /* ring ordered multicast message */ MESSAGE_TYPE_MEMB_MERGE_DETECT = 2, /* merge rings if there are available rings */ MESSAGE_TYPE_MEMB_JOIN = 3, /* membership join message */ MESSAGE_TYPE_MEMB_COMMIT_TOKEN = 4, /* membership commit token */ MESSAGE_TYPE_TOKEN_HOLD_CANCEL = 5, /* cancel the holding of the token */ }; enum encapsulation_type { MESSAGE_ENCAPSULATED = 1, MESSAGE_NOT_ENCAPSULATED = 2 }; /* * New membership algorithm local variables */ struct consensus_list_item { struct srp_addr addr; int set; }; struct token_callback_instance { - struct list_head list; + struct qb_list_head list; int (*callback_fn) (enum totem_callback_token_type type, const void *); enum totem_callback_token_type callback_type; int delete; void *data; }; struct totemsrp_socket { int mcast; int token; }; struct mcast { struct totem_message_header header; struct srp_addr system_from; unsigned int seq; int this_seqno; struct memb_ring_id ring_id; unsigned int node_id; int guarantee; } __attribute__((packed)); struct rtr_item { struct memb_ring_id ring_id; unsigned int seq; }__attribute__((packed)); struct orf_token { struct totem_message_header header; unsigned int seq; unsigned int token_seq; unsigned int aru; unsigned int aru_addr; struct memb_ring_id ring_id; unsigned int backlog; unsigned int fcc; int retrans_flg; int rtr_list_entries; struct rtr_item rtr_list[0]; }__attribute__((packed)); struct memb_join { struct totem_message_header header; struct srp_addr system_from; unsigned int proc_list_entries; unsigned int failed_list_entries; unsigned long long ring_seq; unsigned char end_of_memb_join[0]; /* * These parts of the data structure are dynamic: * struct srp_addr proc_list[]; * struct srp_addr failed_list[]; */ } __attribute__((packed)); struct memb_merge_detect { struct totem_message_header header; struct srp_addr system_from; struct memb_ring_id ring_id; } __attribute__((packed)); struct token_hold_cancel { struct totem_message_header header; struct memb_ring_id ring_id; } __attribute__((packed)); struct memb_commit_token_memb_entry { struct memb_ring_id ring_id; unsigned int aru; unsigned int high_delivered; unsigned int received_flg; }__attribute__((packed)); struct memb_commit_token { struct totem_message_header header; unsigned int token_seq; struct memb_ring_id ring_id; unsigned int retrans_flg; int memb_index; int addr_entries; unsigned char end_of_commit_token[0]; /* * These parts of the data structure are dynamic: * * struct srp_addr addr[PROCESSOR_COUNT_MAX]; * struct memb_commit_token_memb_entry memb_list[PROCESSOR_COUNT_MAX]; */ }__attribute__((packed)); struct message_item { struct mcast *mcast; unsigned int msg_len; }; struct sort_queue_item { struct mcast *mcast; unsigned int msg_len; }; enum memb_state { MEMB_STATE_OPERATIONAL = 1, MEMB_STATE_GATHER = 2, MEMB_STATE_COMMIT = 3, MEMB_STATE_RECOVERY = 4 }; struct totemsrp_instance { int iface_changes; int failed_to_recv; /* * Flow control mcasts and remcasts on last and current orf_token */ int fcc_remcast_last; int fcc_mcast_last; int fcc_remcast_current; struct consensus_list_item consensus_list[PROCESSOR_COUNT_MAX]; int consensus_list_entries; struct srp_addr my_id; struct srp_addr my_proc_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_failed_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_new_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_trans_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_deliver_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX]; unsigned int my_leave_memb_list[PROCESSOR_COUNT_MAX]; int my_proc_list_entries; int my_failed_list_entries; int my_new_memb_entries; int my_trans_memb_entries; int my_memb_entries; int my_deliver_memb_entries; int my_left_memb_entries; int my_leave_memb_entries; struct memb_ring_id my_ring_id; struct memb_ring_id my_old_ring_id; int my_aru_count; int my_merge_detect_timeout_outstanding; unsigned int my_last_aru; int my_seq_unchanged; int my_received_flg; unsigned int my_high_seq_received; unsigned int my_install_seq; int my_rotation_counter; int my_set_retrans_flg; int my_retrans_flg_count; unsigned int my_high_ring_delivered; int heartbeat_timeout; /* * Queues used to order, deliver, and recover messages */ struct cs_queue new_message_queue; struct cs_queue new_message_queue_trans; struct cs_queue retrans_message_queue; struct sq regular_sort_queue; struct sq recovery_sort_queue; /* * Received up to and including */ unsigned int my_aru; unsigned int my_high_delivered; - struct list_head token_callback_received_listhead; + struct qb_list_head token_callback_received_listhead; - struct list_head token_callback_sent_listhead; + struct qb_list_head token_callback_sent_listhead; char orf_token_retransmit[TOKEN_SIZE_MAX]; int orf_token_retransmit_size; unsigned int my_token_seq; /* * Timers */ qb_loop_timer_handle timer_pause_timeout; qb_loop_timer_handle timer_orf_token_timeout; qb_loop_timer_handle timer_orf_token_retransmit_timeout; qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout; qb_loop_timer_handle timer_merge_detect_timeout; qb_loop_timer_handle memb_timer_state_gather_join_timeout; qb_loop_timer_handle memb_timer_state_gather_consensus_timeout; qb_loop_timer_handle memb_timer_state_commit_timeout; qb_loop_timer_handle timer_heartbeat_timeout; /* * Function and data used to log messages */ int totemsrp_log_level_security; int totemsrp_log_level_error; int totemsrp_log_level_warning; int totemsrp_log_level_notice; int totemsrp_log_level_debug; int totemsrp_log_level_trace; int totemsrp_subsys_id; void (*totemsrp_log_printf) ( int level, int sybsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7)));; enum memb_state memb_state; //TODO struct srp_addr next_memb; qb_loop_t *totemsrp_poll_handle; struct totem_ip_address mcast_address; void (*totemsrp_deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required); void (*totemsrp_confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); void (*totemsrp_service_ready_fn) (void); void (*totemsrp_waiting_trans_ack_cb_fn) ( int waiting_trans_ack); void (*memb_ring_id_create_or_load) ( struct memb_ring_id *memb_ring_id, const struct totem_ip_address *addr); void (*memb_ring_id_store) ( const struct memb_ring_id *memb_ring_id, const struct totem_ip_address *addr); int global_seqno; int my_token_held; unsigned long long token_ring_id_seq; unsigned int last_released; unsigned int set_aru; int old_ring_state_saved; int old_ring_state_aru; unsigned int old_ring_state_high_seq_received; unsigned int my_last_seq; struct timeval tv_old; void *totemnet_context; struct totem_config *totem_config; unsigned int use_heartbeat; unsigned int my_trc; unsigned int my_pbl; unsigned int my_cbl; uint64_t pause_timestamp; struct memb_commit_token *commit_token; totemsrp_stats_t stats; uint32_t orf_token_discard; uint32_t originated_orf_token; uint32_t threaded_mode_enabled; uint32_t waiting_trans_ack; int flushing; void * token_recv_event_handle; void * token_sent_event_handle; char commit_token_storage[40000]; }; struct message_handlers { int count; int (*handler_functions[6]) ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); }; enum gather_state_from { TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT = 0, TOTEMSRP_GSFROM_GATHER_MISSING1 = 1, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE = 2, TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED = 3, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE = 4, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE = 5, TOTEMSRP_GSFROM_FAILED_TO_RECEIVE = 6, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE = 7, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE = 8, TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE = 9, TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE = 10, TOTEMSRP_GSFROM_MERGE_DURING_JOIN = 11, TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE = 12, TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE = 13, TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY = 14, TOTEMSRP_GSFROM_INTERFACE_CHANGE = 15, TOTEMSRP_GSFROM_MAX = TOTEMSRP_GSFROM_INTERFACE_CHANGE, }; const char* gather_state_from_desc [] = { [TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT] = "consensus timeout", [TOTEMSRP_GSFROM_GATHER_MISSING1] = "MISSING", [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE] = "The token was lost in the OPERATIONAL state.", [TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED] = "The consensus timeout expired.", [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE] = "The token was lost in the COMMIT state.", [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE] = "The token was lost in the RECOVERY state.", [TOTEMSRP_GSFROM_FAILED_TO_RECEIVE] = "failed to receive", [TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE] = "foreign message in operational state", [TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE] = "foreign message in gather state", [TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE] = "merge during operational state", [TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE] = "merge during gather state", [TOTEMSRP_GSFROM_MERGE_DURING_JOIN] = "merge during join", [TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE] = "join during operational state", [TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE] = "join during commit state", [TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY] = "join during recovery", [TOTEMSRP_GSFROM_INTERFACE_CHANGE] = "interface change", }; /* * forward decls */ static int message_handler_orf_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_mcast ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_merge_detect ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_join ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_commit_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_token_hold_cancel ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static void totemsrp_instance_initialize (struct totemsrp_instance *instance); static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src); static void srp_addr_to_nodeid ( unsigned int *nodeid_out, struct srp_addr *srp_addr_in, unsigned int entries); static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b); static void memb_leave_message_send (struct totemsrp_instance *instance); static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type); static void memb_state_gather_enter (struct totemsrp_instance *instance, enum gather_state_from gather_from); static void messages_deliver_to_app (struct totemsrp_instance *instance, int skip, unsigned int end_point); static int orf_token_mcast (struct totemsrp_instance *instance, struct orf_token *oken, int fcc_mcasts_allowed); static void messages_free (struct totemsrp_instance *instance, unsigned int token_aru); static void memb_ring_id_set (struct totemsrp_instance *instance, const struct memb_ring_id *ring_id); static void target_set_completed (void *context); static void memb_state_commit_token_update (struct totemsrp_instance *instance); static void memb_state_commit_token_target_set (struct totemsrp_instance *instance); static int memb_state_commit_token_send (struct totemsrp_instance *instance); static int memb_state_commit_token_send_recovery (struct totemsrp_instance *instance, struct memb_commit_token *memb_commit_token); static void memb_state_commit_token_create (struct totemsrp_instance *instance); static int token_hold_cancel_send (struct totemsrp_instance *instance); static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out); static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out); static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out); static void mcast_endian_convert (const struct mcast *in, struct mcast *out); static void memb_merge_detect_endian_convert ( const struct memb_merge_detect *in, struct memb_merge_detect *out); static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in); static void timer_function_orf_token_timeout (void *data); static void timer_function_pause_timeout (void *data); static void timer_function_heartbeat_timeout (void *data); static void timer_function_token_retransmit_timeout (void *data); static void timer_function_token_hold_retransmit_timeout (void *data); static void timer_function_merge_detect_timeout (void *data); static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance); static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr); static const char* gsfrom_to_msg(enum gather_state_from gsfrom); void main_deliver_fn ( void *context, const void *msg, unsigned int msg_len); void main_iface_change_fn ( void *context, const struct totem_ip_address *iface_address, unsigned int iface_no); struct message_handlers totemsrp_message_handlers = { 6, { message_handler_orf_token, /* MESSAGE_TYPE_ORF_TOKEN */ message_handler_mcast, /* MESSAGE_TYPE_MCAST */ message_handler_memb_merge_detect, /* MESSAGE_TYPE_MEMB_MERGE_DETECT */ message_handler_memb_join, /* MESSAGE_TYPE_MEMB_JOIN */ message_handler_memb_commit_token, /* MESSAGE_TYPE_MEMB_COMMIT_TOKEN */ message_handler_token_hold_cancel /* MESSAGE_TYPE_TOKEN_HOLD_CANCEL */ } }; #define log_printf(level, format, args...) \ do { \ instance->totemsrp_log_printf ( \ level, instance->totemsrp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ format, ##args); \ } while (0); #define LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemsrp_log_printf ( \ level, instance->totemsrp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ } while(0) static const char* gsfrom_to_msg(enum gather_state_from gsfrom) { if (gsfrom <= TOTEMSRP_GSFROM_MAX) { return gather_state_from_desc[gsfrom]; } else { return "UNKNOWN"; } } static void totemsrp_instance_initialize (struct totemsrp_instance *instance) { memset (instance, 0, sizeof (struct totemsrp_instance)); - list_init (&instance->token_callback_received_listhead); + qb_list_init (&instance->token_callback_received_listhead); - list_init (&instance->token_callback_sent_listhead); + qb_list_init (&instance->token_callback_sent_listhead); instance->my_received_flg = 1; instance->my_token_seq = SEQNO_START_TOKEN - 1; instance->memb_state = MEMB_STATE_OPERATIONAL; instance->set_aru = -1; instance->my_aru = SEQNO_START_MSG; instance->my_high_seq_received = SEQNO_START_MSG; instance->my_high_delivered = SEQNO_START_MSG; instance->orf_token_discard = 0; instance->originated_orf_token = 0; instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage; instance->my_id.no_addrs = INTERFACE_MAX; instance->waiting_trans_ack = 1; } static int pause_flush (struct totemsrp_instance *instance) { uint64_t now_msec; uint64_t timestamp_msec; int res = 0; now_msec = (qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC); timestamp_msec = instance->pause_timestamp / QB_TIME_NS_IN_MSEC; if ((now_msec - timestamp_msec) > (instance->totem_config->token_timeout / 2)) { log_printf (instance->totemsrp_log_level_notice, "Process pause detected for %d ms, flushing membership messages.", (unsigned int)(now_msec - timestamp_msec)); /* * -1 indicates an error from recvmsg */ do { res = totemnet_recv_mcast_empty (instance->totemnet_context); } while (res == -1); } return (res); } static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance) { struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance; uint32_t time_now; unsigned long long nano_secs = qb_util_nano_current_get (); time_now = (nano_secs / QB_TIME_NS_IN_MSEC); if (type == TOTEM_CALLBACK_TOKEN_RECEIVED) { /* incr latest token the index */ if (instance->stats.latest_token == (TOTEM_TOKEN_STATS_MAX - 1)) instance->stats.latest_token = 0; else instance->stats.latest_token++; if (instance->stats.earliest_token == instance->stats.latest_token) { /* we have filled up the array, start overwriting */ if (instance->stats.earliest_token == (TOTEM_TOKEN_STATS_MAX - 1)) instance->stats.earliest_token = 0; else instance->stats.earliest_token++; instance->stats.token[instance->stats.earliest_token].rx = 0; instance->stats.token[instance->stats.earliest_token].tx = 0; instance->stats.token[instance->stats.earliest_token].backlog_calc = 0; } instance->stats.token[instance->stats.latest_token].rx = time_now; instance->stats.token[instance->stats.latest_token].tx = 0; /* in case we drop the token */ } else { instance->stats.token[instance->stats.latest_token].tx = time_now; } return 0; } static void totempg_mtu_changed(void *context, int net_mtu) { struct totemsrp_instance *instance = context; instance->totem_config->net_mtu = net_mtu - sizeof (struct mcast); log_printf (instance->totemsrp_log_level_debug, "Net MTU changed to %d, new value is %d", net_mtu, instance->totem_config->net_mtu); } /* * Exported interfaces */ int totemsrp_initialize ( qb_loop_t *poll_handle, void **srp_context, struct totem_config *totem_config, totempg_stats_t *stats, void (*deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required), void (*confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id), void (*waiting_trans_ack_cb_fn) ( int waiting_trans_ack)) { struct totemsrp_instance *instance; instance = malloc (sizeof (struct totemsrp_instance)); if (instance == NULL) { goto error_exit; } totemsrp_instance_initialize (instance); instance->totemsrp_waiting_trans_ack_cb_fn = waiting_trans_ack_cb_fn; instance->totemsrp_waiting_trans_ack_cb_fn (1); stats->srp = &instance->stats; instance->stats.latest_token = 0; instance->stats.earliest_token = 0; instance->totem_config = totem_config; /* * Configure logging */ instance->totemsrp_log_level_security = totem_config->totem_logging_configuration.log_level_security; instance->totemsrp_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemsrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemsrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemsrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemsrp_log_level_trace = totem_config->totem_logging_configuration.log_level_trace; instance->totemsrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemsrp_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Configure totem store and load functions */ instance->memb_ring_id_create_or_load = totem_config->totem_memb_ring_id_create_or_load; instance->memb_ring_id_store = totem_config->totem_memb_ring_id_store; /* * Initialize local variables for totemsrp */ totemip_copy (&instance->mcast_address, &totem_config->interfaces[0].mcast_addr); /* * Display totem configuration */ log_printf (instance->totemsrp_log_level_debug, "Token Timeout (%d ms) retransmit timeout (%d ms)", totem_config->token_timeout, totem_config->token_retransmit_timeout); log_printf (instance->totemsrp_log_level_debug, "token hold (%d ms) retransmits before loss (%d retrans)", totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const); log_printf (instance->totemsrp_log_level_debug, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)", totem_config->join_timeout, totem_config->send_join_timeout, totem_config->consensus_timeout, totem_config->merge_timeout); log_printf (instance->totemsrp_log_level_debug, "downcheck (%d ms) fail to recv const (%d msgs)", totem_config->downcheck_timeout, totem_config->fail_to_recv_const); log_printf (instance->totemsrp_log_level_debug, "seqno unchanged const (%d rotations) Maximum network MTU %d", totem_config->seqno_unchanged_const, totem_config->net_mtu); log_printf (instance->totemsrp_log_level_debug, "window size per rotation (%d messages) maximum messages per rotation (%d messages)", totem_config->window_size, totem_config->max_messages); log_printf (instance->totemsrp_log_level_debug, "missed count const (%d messages)", totem_config->miss_count_const); log_printf (instance->totemsrp_log_level_debug, "send threads (%d threads)", totem_config->threads); log_printf (instance->totemsrp_log_level_debug, "heartbeat_failures_allowed (%d)", totem_config->heartbeat_failures_allowed); log_printf (instance->totemsrp_log_level_debug, "max_network_delay (%d ms)", totem_config->max_network_delay); cs_queue_init (&instance->retrans_message_queue, RETRANS_MESSAGE_QUEUE_SIZE_MAX, sizeof (struct message_item), instance->threaded_mode_enabled); sq_init (&instance->regular_sort_queue, QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0); sq_init (&instance->recovery_sort_queue, QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0); instance->totemsrp_poll_handle = poll_handle; instance->totemsrp_deliver_fn = deliver_fn; instance->totemsrp_confchg_fn = confchg_fn; instance->use_heartbeat = 1; timer_function_pause_timeout (instance); if ( totem_config->heartbeat_failures_allowed == 0 ) { log_printf (instance->totemsrp_log_level_debug, "HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0"); instance->use_heartbeat = 0; } if (instance->use_heartbeat) { instance->heartbeat_timeout = (totem_config->heartbeat_failures_allowed) * totem_config->token_retransmit_timeout + totem_config->max_network_delay; if (instance->heartbeat_timeout >= totem_config->token_timeout) { log_printf (instance->totemsrp_log_level_debug, "total heartbeat_timeout (%d ms) is not less than token timeout (%d ms)", instance->heartbeat_timeout, totem_config->token_timeout); log_printf (instance->totemsrp_log_level_debug, "heartbeat_timeout = heartbeat_failures_allowed * token_retransmit_timeout + max_network_delay"); log_printf (instance->totemsrp_log_level_debug, "heartbeat timeout should be less than the token timeout. Heartbeat is disabled!!"); instance->use_heartbeat = 0; } else { log_printf (instance->totemsrp_log_level_debug, "total heartbeat_timeout (%d ms)", instance->heartbeat_timeout); } } totemnet_initialize ( poll_handle, &instance->totemnet_context, totem_config, stats->srp, instance, main_deliver_fn, main_iface_change_fn, totempg_mtu_changed, target_set_completed); /* * Must have net_mtu adjusted by totemnet_initialize first */ cs_queue_init (&instance->new_message_queue, MESSAGE_QUEUE_MAX, sizeof (struct message_item), instance->threaded_mode_enabled); cs_queue_init (&instance->new_message_queue_trans, MESSAGE_QUEUE_MAX, sizeof (struct message_item), instance->threaded_mode_enabled); totemsrp_callback_token_create (instance, &instance->token_recv_event_handle, TOTEM_CALLBACK_TOKEN_RECEIVED, 0, token_event_stats_collector, instance); totemsrp_callback_token_create (instance, &instance->token_sent_event_handle, TOTEM_CALLBACK_TOKEN_SENT, 0, token_event_stats_collector, instance); *srp_context = instance; return (0); error_exit: return (-1); } void totemsrp_finalize ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; memb_leave_message_send (instance); totemnet_finalize (instance->totemnet_context); cs_queue_free (&instance->new_message_queue); cs_queue_free (&instance->new_message_queue_trans); cs_queue_free (&instance->retrans_message_queue); sq_free (&instance->regular_sort_queue); sq_free (&instance->recovery_sort_queue); free (instance); } /* * Return configured interfaces. interfaces is array of totem_ip addresses allocated by caller, * with interaces_size number of items. iface_count is final number of interfaces filled by this * function. * * Function returns 0 on success, otherwise if interfaces array is not big enough, -2 is returned, * and if interface was not found, -1 is returned. */ int totemsrp_ifaces_get ( void *srp_context, unsigned int nodeid, struct totem_ip_address *interfaces, unsigned int interfaces_size, char ***status, unsigned int *iface_count) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res = 0; unsigned int found = 0; unsigned int i; for (i = 0; i < instance->my_memb_entries; i++) { if (instance->my_memb_list[i].addr[0].nodeid == nodeid) { found = 1; break; } } if (found) { *iface_count = instance->totem_config->interface_count; if (interfaces_size >= *iface_count) { memcpy (interfaces, instance->my_memb_list[i].addr, sizeof (struct totem_ip_address) * *iface_count); } else { res = -2; } goto finish; } for (i = 0; i < instance->my_left_memb_entries; i++) { if (instance->my_left_memb_list[i].addr[0].nodeid == nodeid) { found = 1; break; } } if (found) { *iface_count = instance->totem_config->interface_count; if (interfaces_size >= *iface_count) { memcpy (interfaces, instance->my_left_memb_list[i].addr, sizeof (struct totem_ip_address) * *iface_count); } else { res = -2; } } else { res = -1; } finish: totemnet_ifaces_get(instance->totemnet_context, status, iface_count); return (res); } int totemsrp_crypto_set ( void *srp_context, const char *cipher_type, const char *hash_type) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res; res = totemnet_crypto_set(instance->totemnet_context, cipher_type, hash_type); return (res); } unsigned int totemsrp_my_nodeid_get ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; unsigned int res; res = instance->totem_config->interfaces[0].boundto.nodeid; return (res); } int totemsrp_my_family_get ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res; res = instance->totem_config->interfaces[0].boundto.family; return (res); } int totemsrp_ring_reenable ( void *srp_context) { return (0); } /* * Set operations for use by the membership algorithm */ static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b) { unsigned int i; unsigned int res; for (i = 0; i < 1; i++) { res = totemip_equal (&a->addr[i], &b->addr[i]); if (res == 0) { return (0); } } return (1); } static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src) { unsigned int i; dest->no_addrs = src->no_addrs; for (i = 0; i < INTERFACE_MAX; i++) { totemip_copy (&dest->addr[i], &src->addr[i]); } } static void srp_addr_to_nodeid ( unsigned int *nodeid_out, struct srp_addr *srp_addr_in, unsigned int entries) { unsigned int i; for (i = 0; i < entries; i++) { nodeid_out[i] = srp_addr_in[i].addr[0].nodeid; } } static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in) { int i; for (i = 0; i < INTERFACE_MAX; i++) { totemip_copy_endian_convert (&out->addr[i], &in->addr[i]); } } static void memb_consensus_reset (struct totemsrp_instance *instance) { instance->consensus_list_entries = 0; } static void memb_set_subtract ( struct srp_addr *out_list, int *out_list_entries, struct srp_addr *one_list, int one_list_entries, struct srp_addr *two_list, int two_list_entries) { int found = 0; int i; int j; *out_list_entries = 0; for (i = 0; i < one_list_entries; i++) { for (j = 0; j < two_list_entries; j++) { if (srp_addr_equal (&one_list[i], &two_list[j])) { found = 1; break; } } if (found == 0) { srp_addr_copy (&out_list[*out_list_entries], &one_list[i]); *out_list_entries = *out_list_entries + 1; } found = 0; } } /* * Set consensus for a specific processor */ static void memb_consensus_set ( struct totemsrp_instance *instance, const struct srp_addr *addr) { int found = 0; int i; if (addr->addr[0].nodeid == LEAVE_DUMMY_NODEID) return; for (i = 0; i < instance->consensus_list_entries; i++) { if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) { found = 1; break; /* found entry */ } } srp_addr_copy (&instance->consensus_list[i].addr, addr); instance->consensus_list[i].set = 1; if (found == 0) { instance->consensus_list_entries++; } return; } /* * Is consensus set for a specific processor */ static int memb_consensus_isset ( struct totemsrp_instance *instance, const struct srp_addr *addr) { int i; for (i = 0; i < instance->consensus_list_entries; i++) { if (srp_addr_equal (addr, &instance->consensus_list[i].addr)) { return (instance->consensus_list[i].set); } } return (0); } /* * Is consensus agreed upon based upon consensus database */ static int memb_consensus_agreed ( struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; int token_memb_entries = 0; int agreed = 1; int i; memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); for (i = 0; i < token_memb_entries; i++) { if (memb_consensus_isset (instance, &token_memb[i]) == 0) { agreed = 0; break; } } if (agreed && instance->failed_to_recv == 1) { /* * Both nodes agreed on our failure. We don't care how many proc list items left because we * will create single ring anyway. */ return (agreed); } assert (token_memb_entries >= 1); return (agreed); } static void memb_consensus_notset ( struct totemsrp_instance *instance, struct srp_addr *no_consensus_list, int *no_consensus_list_entries, struct srp_addr *comparison_list, int comparison_list_entries) { int i; *no_consensus_list_entries = 0; for (i = 0; i < instance->my_proc_list_entries; i++) { if (memb_consensus_isset (instance, &instance->my_proc_list[i]) == 0) { srp_addr_copy (&no_consensus_list[*no_consensus_list_entries], &instance->my_proc_list[i]); *no_consensus_list_entries = *no_consensus_list_entries + 1; } } } /* * Is set1 equal to set2 Entries can be in different orders */ static int memb_set_equal ( struct srp_addr *set1, int set1_entries, struct srp_addr *set2, int set2_entries) { int i; int j; int found = 0; if (set1_entries != set2_entries) { return (0); } for (i = 0; i < set2_entries; i++) { for (j = 0; j < set1_entries; j++) { if (srp_addr_equal (&set1[j], &set2[i])) { found = 1; break; } } if (found == 0) { return (0); } found = 0; } return (1); } /* * Is subset fully contained in fullset */ static int memb_set_subset ( const struct srp_addr *subset, int subset_entries, const struct srp_addr *fullset, int fullset_entries) { int i; int j; int found = 0; if (subset_entries > fullset_entries) { return (0); } for (i = 0; i < subset_entries; i++) { for (j = 0; j < fullset_entries; j++) { if (srp_addr_equal (&subset[i], &fullset[j])) { found = 1; } } if (found == 0) { return (0); } found = 0; } return (1); } /* * merge subset into fullset taking care not to add duplicates */ static void memb_set_merge ( const struct srp_addr *subset, int subset_entries, struct srp_addr *fullset, int *fullset_entries) { int found = 0; int i; int j; for (i = 0; i < subset_entries; i++) { for (j = 0; j < *fullset_entries; j++) { if (srp_addr_equal (&fullset[j], &subset[i])) { found = 1; break; } } if (found == 0) { srp_addr_copy (&fullset[*fullset_entries], &subset[i]); *fullset_entries = *fullset_entries + 1; } found = 0; } return; } static void memb_set_and_with_ring_id ( struct srp_addr *set1, struct memb_ring_id *set1_ring_ids, int set1_entries, struct srp_addr *set2, int set2_entries, struct memb_ring_id *old_ring_id, struct srp_addr *and, int *and_entries) { int i; int j; int found = 0; *and_entries = 0; for (i = 0; i < set2_entries; i++) { for (j = 0; j < set1_entries; j++) { if (srp_addr_equal (&set1[j], &set2[i])) { if (memcmp (&set1_ring_ids[j], old_ring_id, sizeof (struct memb_ring_id)) == 0) { found = 1; } break; } } if (found) { srp_addr_copy (&and[*and_entries], &set1[j]); *and_entries = *and_entries + 1; } found = 0; } return; } #ifdef CODE_COVERAGE static void memb_set_print ( char *string, struct srp_addr *list, int list_entries) { int i; int j; printf ("List '%s' contains %d entries:\n", string, list_entries); for (i = 0; i < list_entries; i++) { printf ("Address %d with %d rings\n", i, list[i].no_addrs); for (j = 0; j < list[i].no_addrs; j++) { printf ("\tiface %d %s\n", j, totemip_print (&list[i].addr[j])); printf ("\tfamily %d\n", list[i].addr[j].family); } } } #endif static void my_leave_memb_clear( struct totemsrp_instance *instance) { memset(instance->my_leave_memb_list, 0, sizeof(instance->my_leave_memb_list)); instance->my_leave_memb_entries = 0; } static unsigned int my_leave_memb_match( struct totemsrp_instance *instance, unsigned int nodeid) { int i; unsigned int ret = 0; for (i = 0; i < instance->my_leave_memb_entries; i++){ if (instance->my_leave_memb_list[i] == nodeid){ ret = nodeid; break; } } return ret; } static void my_leave_memb_set( struct totemsrp_instance *instance, unsigned int nodeid) { int i, found = 0; for (i = 0; i < instance->my_leave_memb_entries; i++){ if (instance->my_leave_memb_list[i] == nodeid){ found = 1; break; } } if (found == 1) { return; } if (instance->my_leave_memb_entries < (PROCESSOR_COUNT_MAX - 1)) { instance->my_leave_memb_list[instance->my_leave_memb_entries] = nodeid; instance->my_leave_memb_entries++; } else { log_printf (instance->totemsrp_log_level_warning, "Cannot set LEAVE nodeid=%d", nodeid); } } static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance) { assert (instance != NULL); return totemnet_buffer_alloc (instance->totemnet_context); } static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr) { assert (instance != NULL); totemnet_buffer_release (instance->totemnet_context, ptr); } static void reset_token_retransmit_timeout (struct totemsrp_instance *instance) { int32_t res; qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout); res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_retransmit_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_token_retransmit_timeout, &instance->timer_orf_token_retransmit_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "reset_token_retransmit_timeout - qb_loop_timer_add error : %d", res); } } static void start_merge_detect_timeout (struct totemsrp_instance *instance) { int32_t res; if (instance->my_merge_detect_timeout_outstanding == 0) { res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->merge_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_merge_detect_timeout, &instance->timer_merge_detect_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "start_merge_detect_timeout - qb_loop_timer_add error : %d", res); } instance->my_merge_detect_timeout_outstanding = 1; } } static void cancel_merge_detect_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_merge_detect_timeout); instance->my_merge_detect_timeout_outstanding = 0; } /* * ring_state_* is used to save and restore the sort queue * state when a recovery operation fails (and enters gather) */ static void old_ring_state_save (struct totemsrp_instance *instance) { if (instance->old_ring_state_saved == 0) { instance->old_ring_state_saved = 1; memcpy (&instance->my_old_ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); instance->old_ring_state_aru = instance->my_aru; instance->old_ring_state_high_seq_received = instance->my_high_seq_received; log_printf (instance->totemsrp_log_level_debug, "Saving state aru %x high seq received %x", instance->my_aru, instance->my_high_seq_received); } } static void old_ring_state_restore (struct totemsrp_instance *instance) { instance->my_aru = instance->old_ring_state_aru; instance->my_high_seq_received = instance->old_ring_state_high_seq_received; log_printf (instance->totemsrp_log_level_debug, "Restoring instance->my_aru %x my high seq received %x", instance->my_aru, instance->my_high_seq_received); } static void old_ring_state_reset (struct totemsrp_instance *instance) { log_printf (instance->totemsrp_log_level_debug, "Resetting old ring state"); instance->old_ring_state_saved = 0; } static void reset_pause_timeout (struct totemsrp_instance *instance) { int32_t res; qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_pause_timeout); res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 5, (void *)instance, timer_function_pause_timeout, &instance->timer_pause_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "reset_pause_timeout - qb_loop_timer_add error : %d", res); } } static void reset_token_timeout (struct totemsrp_instance *instance) { int32_t res; qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout); res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_orf_token_timeout, &instance->timer_orf_token_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "reset_token_timeout - qb_loop_timer_add error : %d", res); } } static void reset_heartbeat_timeout (struct totemsrp_instance *instance) { int32_t res; qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout); res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->heartbeat_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_heartbeat_timeout, &instance->timer_heartbeat_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "reset_heartbeat_timeout - qb_loop_timer_add error : %d", res); } } static void cancel_token_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout); } static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout); } static void cancel_token_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout); } static void start_token_hold_retransmit_timeout (struct totemsrp_instance *instance) { int32_t res; res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_hold_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_token_hold_retransmit_timeout, &instance->timer_orf_token_hold_retransmit_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "start_token_hold_retransmit_timeout - qb_loop_timer_add error : %d", res); } } static void cancel_token_hold_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_hold_retransmit_timeout); } static void memb_state_consensus_timeout_expired ( struct totemsrp_instance *instance) { struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX]; int no_consensus_list_entries; instance->stats.consensus_timeouts++; if (memb_consensus_agreed (instance)) { memb_consensus_reset (instance); memb_consensus_set (instance, &instance->my_id); reset_token_timeout (instance); // REVIEWED } else { memb_consensus_notset ( instance, no_consensus_list, &no_consensus_list_entries, instance->my_proc_list, instance->my_proc_list_entries); memb_set_merge (no_consensus_list, no_consensus_list_entries, instance->my_failed_list, &instance->my_failed_list_entries); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT); } } static void memb_join_message_send (struct totemsrp_instance *instance); static void memb_merge_detect_transmit (struct totemsrp_instance *instance); /* * Timers used for various states of the membership algorithm */ static void timer_function_pause_timeout (void *data) { struct totemsrp_instance *instance = data; instance->pause_timestamp = qb_util_nano_current_get (); reset_pause_timeout (instance); } static void memb_recovery_state_token_loss (struct totemsrp_instance *instance) { old_ring_state_restore (instance); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE); instance->stats.recovery_token_lost++; } static void timer_function_orf_token_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the OPERATIONAL state."); log_printf (instance->totemsrp_log_level_notice, "A processor failed, forming new configuration."); totemnet_iface_check (instance->totemnet_context); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE); instance->stats.operational_token_lost++; break; case MEMB_STATE_GATHER: log_printf (instance->totemsrp_log_level_debug, "The consensus timeout expired."); memb_state_consensus_timeout_expired (instance); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED); instance->stats.gather_token_lost++; break; case MEMB_STATE_COMMIT: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the COMMIT state."); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE); instance->stats.commit_token_lost++; break; case MEMB_STATE_RECOVERY: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the RECOVERY state."); memb_recovery_state_token_loss (instance); instance->orf_token_discard = 1; break; } } static void timer_function_heartbeat_timeout (void *data) { struct totemsrp_instance *instance = data; log_printf (instance->totemsrp_log_level_debug, "HeartBeat Timer expired Invoking token loss mechanism in state %d ", instance->memb_state); timer_function_orf_token_timeout(data); } static void memb_timer_function_state_gather (void *data) { struct totemsrp_instance *instance = data; int32_t res; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: assert (0); /* this should never happen */ break; case MEMB_STATE_GATHER: case MEMB_STATE_COMMIT: memb_join_message_send (instance); /* * Restart the join timeout `*/ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_state_gather, &instance->memb_timer_state_gather_join_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "memb_timer_function_state_gather - qb_loop_timer_add error : %d", res); } break; } } static void memb_timer_function_gather_consensus_timeout (void *data) { struct totemsrp_instance *instance = data; memb_state_consensus_timeout_expired (instance); } static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance *instance) { unsigned int i; struct sort_queue_item *recovery_message_item; struct sort_queue_item regular_message_item; unsigned int range = 0; int res; void *ptr; struct mcast *mcast; log_printf (instance->totemsrp_log_level_debug, "recovery to regular %x-%x", SEQNO_START_MSG + 1, instance->my_aru); range = instance->my_aru - SEQNO_START_MSG; /* * Move messages from recovery to regular sort queue */ // todo should i be initialized to 0 or 1 ? for (i = 1; i <= range; i++) { res = sq_item_get (&instance->recovery_sort_queue, i + SEQNO_START_MSG, &ptr); if (res != 0) { continue; } recovery_message_item = ptr; /* * Convert recovery message into regular message */ mcast = recovery_message_item->mcast; if (mcast->header.encapsulated == MESSAGE_ENCAPSULATED) { /* * Message is a recovery message encapsulated * in a new ring message */ regular_message_item.mcast = (struct mcast *)(((char *)recovery_message_item->mcast) + sizeof (struct mcast)); regular_message_item.msg_len = recovery_message_item->msg_len - sizeof (struct mcast); mcast = regular_message_item.mcast; } else { /* * TODO this case shouldn't happen */ continue; } log_printf (instance->totemsrp_log_level_debug, "comparing if ring id is for this processors old ring seqno %d", mcast->seq); /* * Only add this message to the regular sort * queue if it was originated with the same ring * id as the previous ring */ if (memcmp (&instance->my_old_ring_id, &mcast->ring_id, sizeof (struct memb_ring_id)) == 0) { res = sq_item_inuse (&instance->regular_sort_queue, mcast->seq); if (res == 0) { sq_item_add (&instance->regular_sort_queue, ®ular_message_item, mcast->seq); if (sq_lt_compare (instance->old_ring_state_high_seq_received, mcast->seq)) { instance->old_ring_state_high_seq_received = mcast->seq; } } } else { log_printf (instance->totemsrp_log_level_debug, "-not adding msg with seq no %x", mcast->seq); } } } /* * Change states in the state machine of the membership algorithm */ static void memb_state_operational_enter (struct totemsrp_instance *instance) { struct srp_addr joined_list[PROCESSOR_COUNT_MAX]; int joined_list_entries = 0; unsigned int aru_save; unsigned int joined_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int trans_memb_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int new_memb_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int left_list[PROCESSOR_COUNT_MAX]; unsigned int i; unsigned int res; char left_node_msg[1024]; char joined_node_msg[1024]; char failed_node_msg[1024]; instance->originated_orf_token = 0; memb_consensus_reset (instance); old_ring_state_reset (instance); deliver_messages_from_recovery_to_regular (instance); log_printf (instance->totemsrp_log_level_trace, "Delivering to app %x to %x", instance->my_high_delivered + 1, instance->old_ring_state_high_seq_received); aru_save = instance->my_aru; instance->my_aru = instance->old_ring_state_aru; messages_deliver_to_app (instance, 0, instance->old_ring_state_high_seq_received); /* * Calculate joined and left list */ memb_set_subtract (instance->my_left_memb_list, &instance->my_left_memb_entries, instance->my_memb_list, instance->my_memb_entries, instance->my_trans_memb_list, instance->my_trans_memb_entries); memb_set_subtract (joined_list, &joined_list_entries, instance->my_new_memb_list, instance->my_new_memb_entries, instance->my_trans_memb_list, instance->my_trans_memb_entries); /* * Install new membership */ instance->my_memb_entries = instance->my_new_memb_entries; memcpy (&instance->my_memb_list, instance->my_new_memb_list, sizeof (struct srp_addr) * instance->my_memb_entries); instance->last_released = 0; instance->my_set_retrans_flg = 0; /* * Deliver transitional configuration to application */ srp_addr_to_nodeid (left_list, instance->my_left_memb_list, instance->my_left_memb_entries); srp_addr_to_nodeid (trans_memb_list_totemip, instance->my_trans_memb_list, instance->my_trans_memb_entries); instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL, trans_memb_list_totemip, instance->my_trans_memb_entries, left_list, instance->my_left_memb_entries, 0, 0, &instance->my_ring_id); instance->waiting_trans_ack = 1; instance->totemsrp_waiting_trans_ack_cb_fn (1); // TODO we need to filter to ensure we only deliver those // messages which are part of instance->my_deliver_memb messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received); instance->my_aru = aru_save; /* * Deliver regular configuration to application */ srp_addr_to_nodeid (new_memb_list_totemip, instance->my_new_memb_list, instance->my_new_memb_entries); srp_addr_to_nodeid (joined_list_totemip, joined_list, joined_list_entries); instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR, new_memb_list_totemip, instance->my_new_memb_entries, 0, 0, joined_list_totemip, joined_list_entries, &instance->my_ring_id); /* * The recovery sort queue now becomes the regular * sort queue. It is necessary to copy the state * into the regular sort queue. */ sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue); instance->my_last_aru = SEQNO_START_MSG; /* When making my_proc_list smaller, ensure that the * now non-used entries are zero-ed out. There are some suspect * assert's that assume that there is always 2 entries in the list. * These fail when my_proc_list is reduced to 1 entry (and the * valid [0] entry is the same as the 'unused' [1] entry). */ memset(instance->my_proc_list, 0, sizeof (struct srp_addr) * instance->my_proc_list_entries); instance->my_proc_list_entries = instance->my_new_memb_entries; memcpy (instance->my_proc_list, instance->my_new_memb_list, sizeof (struct srp_addr) * instance->my_memb_entries); instance->my_failed_list_entries = 0; /* * TODO Not exactly to spec * * At the entry to this function all messages without a gap are * deliered. * * This code throw away messages from the last gap in the sort queue * to my_high_seq_received * * What should really happen is we should deliver all messages up to * a gap, then delier the transitional configuration, then deliver * the messages between the first gap and my_high_seq_received, then * deliver a regular configuration, then deliver the regular * configuration * * Unfortunately totempg doesn't appear to like this operating mode * which needs more inspection */ i = instance->my_high_seq_received + 1; do { void *ptr; i -= 1; res = sq_item_get (&instance->regular_sort_queue, i, &ptr); if (i == 0) { break; } } while (res); instance->my_high_delivered = i; for (i = 0; i <= instance->my_high_delivered; i++) { void *ptr; res = sq_item_get (&instance->regular_sort_queue, i, &ptr); if (res == 0) { struct sort_queue_item *regular_message; regular_message = ptr; free (regular_message->mcast); } } sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered); instance->last_released = instance->my_high_delivered; if (joined_list_entries) { int sptr = 0; sptr += snprintf(joined_node_msg, sizeof(joined_node_msg)-sptr, " joined:"); for (i=0; i< joined_list_entries; i++) { sptr += snprintf(joined_node_msg+sptr, sizeof(joined_node_msg)-sptr, " %u", joined_list_totemip[i]); } } else { joined_node_msg[0] = '\0'; } if (instance->my_left_memb_entries) { int sptr = 0; int sptr2 = 0; sptr += snprintf(left_node_msg, sizeof(left_node_msg)-sptr, " left:"); for (i=0; i< instance->my_left_memb_entries; i++) { sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " %u", left_list[i]); } for (i=0; i< instance->my_left_memb_entries; i++) { if (my_leave_memb_match(instance, left_list[i]) == 0) { if (sptr2 == 0) { sptr2 += snprintf(failed_node_msg, sizeof(failed_node_msg)-sptr2, " failed:"); } sptr2 += snprintf(failed_node_msg+sptr2, sizeof(left_node_msg)-sptr2, " %u", left_list[i]); } } if (sptr2 == 0) { failed_node_msg[0] = '\0'; } } else { left_node_msg[0] = '\0'; failed_node_msg[0] = '\0'; } my_leave_memb_clear(instance); log_printf (instance->totemsrp_log_level_debug, "entering OPERATIONAL state."); log_printf (instance->totemsrp_log_level_notice, "A new membership (%s:%lld) was formed. Members%s%s", totemip_print (&instance->my_ring_id.rep), instance->my_ring_id.seq, joined_node_msg, left_node_msg); if (strlen(failed_node_msg)) { log_printf (instance->totemsrp_log_level_notice, "Failed to receive the leave message.%s", failed_node_msg); } instance->memb_state = MEMB_STATE_OPERATIONAL; instance->stats.operational_entered++; instance->stats.continuous_gather = 0; instance->my_received_flg = 1; reset_pause_timeout (instance); /* * Save ring id information from this configuration to determine * which processors are transitioning from old regular configuration * in to new regular configuration on the next configuration change */ memcpy (&instance->my_old_ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); return; } static void memb_state_gather_enter ( struct totemsrp_instance *instance, enum gather_state_from gather_from) { int32_t res; instance->orf_token_discard = 1; instance->originated_orf_token = 0; memb_set_merge ( &instance->my_id, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_join_message_send (instance); /* * Restart the join timeout */ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_state_gather, &instance->memb_timer_state_gather_join_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(1) : %d", res); } /* * Restart the consensus timeout */ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout); res = qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->consensus_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_gather_consensus_timeout, &instance->memb_timer_state_gather_consensus_timeout); if (res != 0) { log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(2) : %d", res); } /* * Cancel the token loss and token retransmission timeouts */ cancel_token_retransmit_timeout (instance); // REVIEWED cancel_token_timeout (instance); // REVIEWED cancel_merge_detect_timeout (instance); memb_consensus_reset (instance); memb_consensus_set (instance, &instance->my_id); log_printf (instance->totemsrp_log_level_debug, "entering GATHER state from %d(%s).", gather_from, gsfrom_to_msg(gather_from)); instance->memb_state = MEMB_STATE_GATHER; instance->stats.gather_entered++; if (gather_from == TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED) { /* * State 3 means gather, so we are continuously gathering. */ instance->stats.continuous_gather++; } return; } static void timer_function_token_retransmit_timeout (void *data); static void target_set_completed ( void *context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; memb_state_commit_token_send (instance); } static void memb_state_commit_enter ( struct totemsrp_instance *instance) { old_ring_state_save (instance); memb_state_commit_token_update (instance); memb_state_commit_token_target_set (instance); qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); instance->memb_timer_state_gather_join_timeout = 0; qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout); instance->memb_timer_state_gather_consensus_timeout = 0; memb_ring_id_set (instance, &instance->commit_token->ring_id); instance->memb_ring_id_store (&instance->my_ring_id, &instance->my_id.addr[0]); instance->token_ring_id_seq = instance->my_ring_id.seq; log_printf (instance->totemsrp_log_level_debug, "entering COMMIT state."); instance->memb_state = MEMB_STATE_COMMIT; reset_token_retransmit_timeout (instance); // REVIEWED reset_token_timeout (instance); // REVIEWED instance->stats.commit_entered++; instance->stats.continuous_gather = 0; /* * reset all flow control variables since we are starting a new ring */ instance->my_trc = 0; instance->my_pbl = 0; instance->my_cbl = 0; /* * commit token sent after callback that token target has been set */ } static void memb_state_recovery_enter ( struct totemsrp_instance *instance, struct memb_commit_token *commit_token) { int i; int local_received_flg = 1; unsigned int low_ring_aru; unsigned int range = 0; unsigned int messages_originated = 0; const struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; struct memb_ring_id my_new_memb_ring_id_list[PROCESSOR_COUNT_MAX]; addr = (const struct srp_addr *)commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries); log_printf (instance->totemsrp_log_level_debug, "entering RECOVERY state."); instance->orf_token_discard = 0; instance->my_high_ring_delivered = 0; sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG); cs_queue_reinit (&instance->retrans_message_queue); low_ring_aru = instance->old_ring_state_high_seq_received; memb_state_commit_token_send_recovery (instance, commit_token); instance->my_token_seq = SEQNO_START_TOKEN - 1; /* * Build regular configuration */ totemnet_processor_count_set ( instance->totemnet_context, commit_token->addr_entries); /* * Build transitional configuration */ for (i = 0; i < instance->my_new_memb_entries; i++) { memcpy (&my_new_memb_ring_id_list[i], &memb_list[i].ring_id, sizeof (struct memb_ring_id)); } memb_set_and_with_ring_id ( instance->my_new_memb_list, my_new_memb_ring_id_list, instance->my_new_memb_entries, instance->my_memb_list, instance->my_memb_entries, &instance->my_old_ring_id, instance->my_trans_memb_list, &instance->my_trans_memb_entries); for (i = 0; i < instance->my_trans_memb_entries; i++) { log_printf (instance->totemsrp_log_level_debug, "TRANS [%d] member %s:", i, totemip_print (&instance->my_trans_memb_list[i].addr[0])); } for (i = 0; i < instance->my_new_memb_entries; i++) { log_printf (instance->totemsrp_log_level_debug, "position [%d] member %s:", i, totemip_print (&addr[i].addr[0])); log_printf (instance->totemsrp_log_level_debug, "previous ring seq %llx rep %s", memb_list[i].ring_id.seq, totemip_print (&memb_list[i].ring_id.rep)); log_printf (instance->totemsrp_log_level_debug, "aru %x high delivered %x received flag %d", memb_list[i].aru, memb_list[i].high_delivered, memb_list[i].received_flg); // assert (totemip_print (&memb_list[i].ring_id.rep) != 0); } /* * Determine if any received flag is false */ for (i = 0; i < commit_token->addr_entries; i++) { if (memb_set_subset (&instance->my_new_memb_list[i], 1, instance->my_trans_memb_list, instance->my_trans_memb_entries) && memb_list[i].received_flg == 0) { instance->my_deliver_memb_entries = instance->my_trans_memb_entries; memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list, sizeof (struct srp_addr) * instance->my_trans_memb_entries); local_received_flg = 0; break; } } if (local_received_flg == 1) { goto no_originate; } /* Else originate messages if we should */ /* * Calculate my_low_ring_aru, instance->my_high_ring_delivered for the transitional membership */ for (i = 0; i < commit_token->addr_entries; i++) { if (memb_set_subset (&instance->my_new_memb_list[i], 1, instance->my_deliver_memb_list, instance->my_deliver_memb_entries) && memcmp (&instance->my_old_ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (memb_list[i].aru, low_ring_aru)) { low_ring_aru = memb_list[i].aru; } if (sq_lt_compare (instance->my_high_ring_delivered, memb_list[i].high_delivered)) { instance->my_high_ring_delivered = memb_list[i].high_delivered; } } } /* * Copy all old ring messages to instance->retrans_message_queue */ range = instance->old_ring_state_high_seq_received - low_ring_aru; if (range == 0) { /* * No messages to copy */ goto no_originate; } assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); log_printf (instance->totemsrp_log_level_debug, "copying all old ring messages from %x-%x.", low_ring_aru + 1, instance->old_ring_state_high_seq_received); for (i = 1; i <= range; i++) { struct sort_queue_item *sort_queue_item; struct message_item message_item; void *ptr; int res; res = sq_item_get (&instance->regular_sort_queue, low_ring_aru + i, &ptr); if (res != 0) { continue; } sort_queue_item = ptr; messages_originated++; memset (&message_item, 0, sizeof (struct message_item)); // TODO LEAK message_item.mcast = totemsrp_buffer_alloc (instance); assert (message_item.mcast); message_item.mcast->header.type = MESSAGE_TYPE_MCAST; srp_addr_copy (&message_item.mcast->system_from, &instance->my_id); message_item.mcast->header.encapsulated = MESSAGE_ENCAPSULATED; message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid; assert (message_item.mcast->header.nodeid); message_item.mcast->header.endian_detector = ENDIAN_LOCAL; memcpy (&message_item.mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); message_item.msg_len = sort_queue_item->msg_len + sizeof (struct mcast); memcpy (((char *)message_item.mcast) + sizeof (struct mcast), sort_queue_item->mcast, sort_queue_item->msg_len); cs_queue_item_add (&instance->retrans_message_queue, &message_item); } log_printf (instance->totemsrp_log_level_debug, "Originated %d messages in RECOVERY.", messages_originated); goto originated; no_originate: log_printf (instance->totemsrp_log_level_debug, "Did not need to originate any messages in recovery."); originated: instance->my_aru = SEQNO_START_MSG; instance->my_aru_count = 0; instance->my_seq_unchanged = 0; instance->my_high_seq_received = SEQNO_START_MSG; instance->my_install_seq = SEQNO_START_MSG; instance->last_released = SEQNO_START_MSG; reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED instance->memb_state = MEMB_STATE_RECOVERY; instance->stats.recovery_entered++; instance->stats.continuous_gather = 0; return; } void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; token_hold_cancel_send (instance); return; } int totemsrp_mcast ( void *srp_context, struct iovec *iovec, unsigned int iov_len, int guarantee) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int i; struct message_item message_item; char *addr; unsigned int addr_idx; struct cs_queue *queue_use; if (instance->waiting_trans_ack) { queue_use = &instance->new_message_queue_trans; } else { queue_use = &instance->new_message_queue; } if (cs_queue_is_full (queue_use)) { log_printf (instance->totemsrp_log_level_debug, "queue full"); return (-1); } memset (&message_item, 0, sizeof (struct message_item)); /* * Allocate pending item */ message_item.mcast = totemsrp_buffer_alloc (instance); if (message_item.mcast == 0) { goto error_mcast; } /* * Set mcast header */ memset(message_item.mcast, 0, sizeof (struct mcast)); message_item.mcast->header.type = MESSAGE_TYPE_MCAST; message_item.mcast->header.endian_detector = ENDIAN_LOCAL; message_item.mcast->header.encapsulated = MESSAGE_NOT_ENCAPSULATED; message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid; assert (message_item.mcast->header.nodeid); message_item.mcast->guarantee = guarantee; srp_addr_copy (&message_item.mcast->system_from, &instance->my_id); addr = (char *)message_item.mcast; addr_idx = sizeof (struct mcast); for (i = 0; i < iov_len; i++) { memcpy (&addr[addr_idx], iovec[i].iov_base, iovec[i].iov_len); addr_idx += iovec[i].iov_len; } message_item.msg_len = addr_idx; log_printf (instance->totemsrp_log_level_trace, "mcasted message added to pending queue"); instance->stats.mcast_tx++; cs_queue_item_add (queue_use, &message_item); return (0); error_mcast: return (-1); } /* * Determine if there is room to queue a new message */ int totemsrp_avail (void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int avail; struct cs_queue *queue_use; if (instance->waiting_trans_ack) { queue_use = &instance->new_message_queue_trans; } else { queue_use = &instance->new_message_queue; } cs_queue_avail (queue_use, &avail); return (avail); } /* * ORF Token Management */ /* * Recast message to mcast group if it is available */ static int orf_token_remcast ( struct totemsrp_instance *instance, int seq) { struct sort_queue_item *sort_queue_item; int res; void *ptr; struct sq *sort_queue; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } res = sq_in_range (sort_queue, seq); if (res == 0) { log_printf (instance->totemsrp_log_level_debug, "sq not in range"); return (-1); } /* * Get RTR item at seq, if not available, return */ res = sq_item_get (sort_queue, seq, &ptr); if (res != 0) { return -1; } sort_queue_item = ptr; totemnet_mcast_noflush_send ( instance->totemnet_context, sort_queue_item->mcast, sort_queue_item->msg_len); return (0); } /* * Free all freeable messages from ring */ static void messages_free ( struct totemsrp_instance *instance, unsigned int token_aru) { struct sort_queue_item *regular_message; unsigned int i; int res; int log_release = 0; unsigned int release_to; unsigned int range = 0; release_to = token_aru; if (sq_lt_compare (instance->my_last_aru, release_to)) { release_to = instance->my_last_aru; } if (sq_lt_compare (instance->my_high_delivered, release_to)) { release_to = instance->my_high_delivered; } /* * Ensure we dont try release before an already released point */ if (sq_lt_compare (release_to, instance->last_released)) { return; } range = release_to - instance->last_released; assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); /* * Release retransmit list items if group aru indicates they are transmitted */ for (i = 1; i <= range; i++) { void *ptr; res = sq_item_get (&instance->regular_sort_queue, instance->last_released + i, &ptr); if (res == 0) { regular_message = ptr; totemsrp_buffer_release (instance, regular_message->mcast); } sq_items_release (&instance->regular_sort_queue, instance->last_released + i); log_release = 1; } instance->last_released += range; if (log_release) { log_printf (instance->totemsrp_log_level_trace, "releasing messages up to and including %x", release_to); } } static void update_aru ( struct totemsrp_instance *instance) { unsigned int i; int res; struct sq *sort_queue; unsigned int range; unsigned int my_aru_saved = 0; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } range = instance->my_high_seq_received - instance->my_aru; my_aru_saved = instance->my_aru; for (i = 1; i <= range; i++) { void *ptr; res = sq_item_get (sort_queue, my_aru_saved + i, &ptr); /* * If hole, stop updating aru */ if (res != 0) { break; } } instance->my_aru += i - 1; } /* * Multicasts pending messages onto the ring (requires orf_token possession) */ static int orf_token_mcast ( struct totemsrp_instance *instance, struct orf_token *token, int fcc_mcasts_allowed) { struct message_item *message_item = 0; struct cs_queue *mcast_queue; struct sq *sort_queue; struct sort_queue_item sort_queue_item; struct mcast *mcast; unsigned int fcc_mcast_current; if (instance->memb_state == MEMB_STATE_RECOVERY) { mcast_queue = &instance->retrans_message_queue; sort_queue = &instance->recovery_sort_queue; reset_token_retransmit_timeout (instance); // REVIEWED } else { if (instance->waiting_trans_ack) { mcast_queue = &instance->new_message_queue_trans; } else { mcast_queue = &instance->new_message_queue; } sort_queue = &instance->regular_sort_queue; } for (fcc_mcast_current = 0; fcc_mcast_current < fcc_mcasts_allowed; fcc_mcast_current++) { if (cs_queue_is_empty (mcast_queue)) { break; } message_item = (struct message_item *)cs_queue_item_get (mcast_queue); message_item->mcast->seq = ++token->seq; message_item->mcast->this_seqno = instance->global_seqno++; /* * Build IO vector */ memset (&sort_queue_item, 0, sizeof (struct sort_queue_item)); sort_queue_item.mcast = message_item->mcast; sort_queue_item.msg_len = message_item->msg_len; mcast = sort_queue_item.mcast; memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); /* * Add message to retransmit queue */ sq_item_add (sort_queue, &sort_queue_item, message_item->mcast->seq); totemnet_mcast_noflush_send ( instance->totemnet_context, message_item->mcast, message_item->msg_len); /* * Delete item from pending queue */ cs_queue_item_remove (mcast_queue); /* * If messages mcasted, deliver any new messages to totempg */ instance->my_high_seq_received = token->seq; } update_aru (instance); /* * Return 1 if more messages are available for single node clusters */ return (fcc_mcast_current); } /* * Remulticasts messages in orf_token's retransmit list (requires orf_token) * Modify's orf_token's rtr to include retransmits required by this process */ static int orf_token_rtr ( struct totemsrp_instance *instance, struct orf_token *orf_token, unsigned int *fcc_allowed) { unsigned int res; unsigned int i, j; unsigned int found; struct sq *sort_queue; struct rtr_item *rtr_list; unsigned int range = 0; char retransmit_msg[1024]; char value[64]; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } rtr_list = &orf_token->rtr_list[0]; strcpy (retransmit_msg, "Retransmit List: "); if (orf_token->rtr_list_entries) { log_printf (instance->totemsrp_log_level_debug, "Retransmit List %d", orf_token->rtr_list_entries); for (i = 0; i < orf_token->rtr_list_entries; i++) { sprintf (value, "%x ", rtr_list[i].seq); strcat (retransmit_msg, value); } strcat (retransmit_msg, ""); log_printf (instance->totemsrp_log_level_notice, "%s", retransmit_msg); } /* * Retransmit messages on orf_token's RTR list from RTR queue */ for (instance->fcc_remcast_current = 0, i = 0; instance->fcc_remcast_current < *fcc_allowed && i < orf_token->rtr_list_entries;) { /* * If this retransmit request isn't from this configuration, * try next rtr entry */ if (memcmp (&rtr_list[i].ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) != 0) { i += 1; continue; } res = orf_token_remcast (instance, rtr_list[i].seq); if (res == 0) { /* * Multicasted message, so no need to copy to new retransmit list */ orf_token->rtr_list_entries -= 1; assert (orf_token->rtr_list_entries >= 0); memmove (&rtr_list[i], &rtr_list[i + 1], sizeof (struct rtr_item) * (orf_token->rtr_list_entries - i)); instance->stats.mcast_retx++; instance->fcc_remcast_current++; } else { i += 1; } } *fcc_allowed = *fcc_allowed - instance->fcc_remcast_current; /* * Add messages to retransmit to RTR list * but only retry if there is room in the retransmit list */ range = orf_token->seq - instance->my_aru; assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); for (i = 1; (orf_token->rtr_list_entries < RETRANSMIT_ENTRIES_MAX) && (i <= range); i++) { /* * Ensure message is within the sort queue range */ res = sq_in_range (sort_queue, instance->my_aru + i); if (res == 0) { break; } /* * Find if a message is missing from this processor */ res = sq_item_inuse (sort_queue, instance->my_aru + i); if (res == 0) { /* * Determine how many times we have missed receiving * this sequence number. sq_item_miss_count increments * a counter for the sequence number. The miss count * will be returned and compared. This allows time for * delayed multicast messages to be received before * declaring the message is missing and requesting a * retransmit. */ res = sq_item_miss_count (sort_queue, instance->my_aru + i); if (res < instance->totem_config->miss_count_const) { continue; } /* * Determine if missing message is already in retransmit list */ found = 0; for (j = 0; j < orf_token->rtr_list_entries; j++) { if (instance->my_aru + i == rtr_list[j].seq) { found = 1; } } if (found == 0) { /* * Missing message not found in current retransmit list so add it */ memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i; orf_token->rtr_list_entries++; } } } return (instance->fcc_remcast_current); } static void token_retransmit (struct totemsrp_instance *instance) { totemnet_token_send (instance->totemnet_context, instance->orf_token_retransmit, instance->orf_token_retransmit_size); } /* * Retransmit the regular token if no mcast or token has * been received in retransmit token period retransmit * the token to the next processor */ static void timer_function_token_retransmit_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_GATHER: break; case MEMB_STATE_COMMIT: case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: token_retransmit (instance); reset_token_retransmit_timeout (instance); // REVIEWED break; } } static void timer_function_token_hold_retransmit_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_GATHER: break; case MEMB_STATE_COMMIT: break; case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: token_retransmit (instance); break; } } static void timer_function_merge_detect_timeout(void *data) { struct totemsrp_instance *instance = data; instance->my_merge_detect_timeout_outstanding = 0; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { memb_merge_detect_transmit (instance); } break; case MEMB_STATE_GATHER: case MEMB_STATE_COMMIT: case MEMB_STATE_RECOVERY: break; } } /* * Send orf_token to next member (requires orf_token) */ static int token_send ( struct totemsrp_instance *instance, struct orf_token *orf_token, int forward_token) { int res = 0; unsigned int orf_token_size; orf_token_size = sizeof (struct orf_token) + (orf_token->rtr_list_entries * sizeof (struct rtr_item)); orf_token->header.nodeid = instance->my_id.addr[0].nodeid; memcpy (instance->orf_token_retransmit, orf_token, orf_token_size); instance->orf_token_retransmit_size = orf_token_size; assert (orf_token->header.nodeid); if (forward_token == 0) { return (0); } totemnet_token_send (instance->totemnet_context, orf_token, orf_token_size); return (res); } static int token_hold_cancel_send (struct totemsrp_instance *instance) { struct token_hold_cancel token_hold_cancel; /* * Only cancel if the token is currently held */ if (instance->my_token_held == 0) { return (0); } instance->my_token_held = 0; /* * Build message */ token_hold_cancel.header.type = MESSAGE_TYPE_TOKEN_HOLD_CANCEL; token_hold_cancel.header.endian_detector = ENDIAN_LOCAL; token_hold_cancel.header.encapsulated = 0; token_hold_cancel.header.nodeid = instance->my_id.addr[0].nodeid; memcpy (&token_hold_cancel.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); assert (token_hold_cancel.header.nodeid); instance->stats.token_hold_cancel_tx++; totemnet_mcast_flush_send (instance->totemnet_context, &token_hold_cancel, sizeof (struct token_hold_cancel)); return (0); } static int orf_token_send_initial (struct totemsrp_instance *instance) { struct orf_token orf_token; int res; orf_token.header.type = MESSAGE_TYPE_ORF_TOKEN; orf_token.header.endian_detector = ENDIAN_LOCAL; orf_token.header.encapsulated = 0; orf_token.header.nodeid = instance->my_id.addr[0].nodeid; assert (orf_token.header.nodeid); orf_token.seq = SEQNO_START_MSG; orf_token.token_seq = SEQNO_START_TOKEN; orf_token.retrans_flg = 1; instance->my_set_retrans_flg = 1; instance->stats.orf_token_tx++; if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) { orf_token.retrans_flg = 0; instance->my_set_retrans_flg = 0; } else { orf_token.retrans_flg = 1; instance->my_set_retrans_flg = 1; } orf_token.aru = 0; orf_token.aru = SEQNO_START_MSG - 1; orf_token.aru_addr = instance->my_id.addr[0].nodeid; memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); orf_token.fcc = 0; orf_token.backlog = 0; orf_token.rtr_list_entries = 0; res = token_send (instance, &orf_token, 1); return (res); } static void memb_state_commit_token_update ( struct totemsrp_instance *instance) { struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; unsigned int high_aru; unsigned int i; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries); memcpy (instance->my_new_memb_list, addr, sizeof (struct srp_addr) * instance->commit_token->addr_entries); instance->my_new_memb_entries = instance->commit_token->addr_entries; memcpy (&memb_list[instance->commit_token->memb_index].ring_id, &instance->my_old_ring_id, sizeof (struct memb_ring_id)); memb_list[instance->commit_token->memb_index].aru = instance->old_ring_state_aru; /* * TODO high delivered is really instance->my_aru, but with safe this * could change? */ instance->my_received_flg = (instance->my_aru == instance->my_high_seq_received); memb_list[instance->commit_token->memb_index].received_flg = instance->my_received_flg; memb_list[instance->commit_token->memb_index].high_delivered = instance->my_high_delivered; /* * find high aru up to current memb_index for all matching ring ids * if any ring id matching memb_index has aru less then high aru set * received flag for that entry to false */ high_aru = memb_list[instance->commit_token->memb_index].aru; for (i = 0; i <= instance->commit_token->memb_index; i++) { if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (high_aru, memb_list[i].aru)) { high_aru = memb_list[i].aru; } } } for (i = 0; i <= instance->commit_token->memb_index; i++) { if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (memb_list[i].aru, high_aru)) { memb_list[i].received_flg = 0; if (i == instance->commit_token->memb_index) { instance->my_received_flg = 0; } } } } instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid; instance->commit_token->memb_index += 1; assert (instance->commit_token->memb_index <= instance->commit_token->addr_entries); assert (instance->commit_token->header.nodeid); } static void memb_state_commit_token_target_set ( struct totemsrp_instance *instance) { struct srp_addr *addr; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; /* Totemnet just looks at the node id */ totemnet_token_target_set ( instance->totemnet_context, &addr[instance->commit_token->memb_index % instance->commit_token->addr_entries].addr[0]); } static int memb_state_commit_token_send_recovery ( struct totemsrp_instance *instance, struct memb_commit_token *commit_token) { unsigned int commit_token_size; commit_token->token_seq++; commit_token->header.nodeid = instance->my_id.addr[0].nodeid; commit_token_size = sizeof (struct memb_commit_token) + ((sizeof (struct srp_addr) + sizeof (struct memb_commit_token_memb_entry)) * commit_token->addr_entries); /* * Make a copy for retransmission if necessary */ memcpy (instance->orf_token_retransmit, commit_token, commit_token_size); instance->orf_token_retransmit_size = commit_token_size; instance->stats.memb_commit_token_tx++; totemnet_token_send (instance->totemnet_context, commit_token, commit_token_size); /* * Request retransmission of the commit token in case it is lost */ reset_token_retransmit_timeout (instance); return (0); } static int memb_state_commit_token_send ( struct totemsrp_instance *instance) { unsigned int commit_token_size; instance->commit_token->token_seq++; instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid; commit_token_size = sizeof (struct memb_commit_token) + ((sizeof (struct srp_addr) + sizeof (struct memb_commit_token_memb_entry)) * instance->commit_token->addr_entries); /* * Make a copy for retransmission if necessary */ memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size); instance->orf_token_retransmit_size = commit_token_size; instance->stats.memb_commit_token_tx++; totemnet_token_send (instance->totemnet_context, instance->commit_token, commit_token_size); /* * Request retransmission of the commit token in case it is lost */ reset_token_retransmit_timeout (instance); return (0); } static int memb_lowest_in_config (struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; int token_memb_entries = 0; int i; struct totem_ip_address *lowest_addr; memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); /* * find representative by searching for smallest identifier */ lowest_addr = &token_memb[0].addr[0]; for (i = 1; i < token_memb_entries; i++) { if (totemip_compare(lowest_addr, &token_memb[i].addr[0]) > 0) { totemip_copy (lowest_addr, &token_memb[i].addr[0]); } } return (totemip_compare (lowest_addr, &instance->my_id.addr[0]) == 0); } static int srp_addr_compare (const void *a, const void *b) { const struct srp_addr *srp_a = (const struct srp_addr *)a; const struct srp_addr *srp_b = (const struct srp_addr *)b; return (totemip_compare (&srp_a->addr[0], &srp_b->addr[0])); } static void memb_state_commit_token_create ( struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; int token_memb_entries = 0; log_printf (instance->totemsrp_log_level_debug, "Creating commit token because I am the rep."); memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); memset (instance->commit_token, 0, sizeof (struct memb_commit_token)); instance->commit_token->header.type = MESSAGE_TYPE_MEMB_COMMIT_TOKEN; instance->commit_token->header.endian_detector = ENDIAN_LOCAL; instance->commit_token->header.encapsulated = 0; instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid; assert (instance->commit_token->header.nodeid); totemip_copy(&instance->commit_token->ring_id.rep, &instance->my_id.addr[0]); instance->commit_token->ring_id.seq = instance->token_ring_id_seq + 4; /* * This qsort is necessary to ensure the commit token traverses * the ring in the proper order */ qsort (token_memb, token_memb_entries, sizeof (struct srp_addr), srp_addr_compare); instance->commit_token->memb_index = 0; instance->commit_token->addr_entries = token_memb_entries; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries); memcpy (addr, token_memb, token_memb_entries * sizeof (struct srp_addr)); memset (memb_list, 0, sizeof (struct memb_commit_token_memb_entry) * token_memb_entries); } static void memb_join_message_send (struct totemsrp_instance *instance) { char memb_join_data[40000]; struct memb_join *memb_join = (struct memb_join *)memb_join_data; char *addr; unsigned int addr_idx; memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; memb_join->header.endian_detector = ENDIAN_LOCAL; memb_join->header.encapsulated = 0; memb_join->header.nodeid = instance->my_id.addr[0].nodeid; assert (memb_join->header.nodeid); memb_join->ring_seq = instance->my_ring_id.seq; memb_join->proc_list_entries = instance->my_proc_list_entries; memb_join->failed_list_entries = instance->my_failed_list_entries; srp_addr_copy (&memb_join->system_from, &instance->my_id); /* * This mess adds the joined and failed processor lists into the join * message */ addr = (char *)memb_join; addr_idx = sizeof (struct memb_join); memcpy (&addr[addr_idx], instance->my_proc_list, instance->my_proc_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_proc_list_entries * sizeof (struct srp_addr); memcpy (&addr[addr_idx], instance->my_failed_list, instance->my_failed_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_failed_list_entries * sizeof (struct srp_addr); if (instance->totem_config->send_join_timeout) { usleep (random() % (instance->totem_config->send_join_timeout * 1000)); } instance->stats.memb_join_tx++; totemnet_mcast_flush_send ( instance->totemnet_context, memb_join, addr_idx); } static void memb_leave_message_send (struct totemsrp_instance *instance) { char memb_join_data[40000]; struct memb_join *memb_join = (struct memb_join *)memb_join_data; char *addr; unsigned int addr_idx; int active_memb_entries; struct srp_addr active_memb[PROCESSOR_COUNT_MAX]; log_printf (instance->totemsrp_log_level_debug, "sending join/leave message"); /* * add us to the failed list, and remove us from * the members list */ memb_set_merge( &instance->my_id, 1, instance->my_failed_list, &instance->my_failed_list_entries); memb_set_subtract (active_memb, &active_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, &instance->my_id, 1); memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; memb_join->header.endian_detector = ENDIAN_LOCAL; memb_join->header.encapsulated = 0; memb_join->header.nodeid = LEAVE_DUMMY_NODEID; memb_join->ring_seq = instance->my_ring_id.seq; memb_join->proc_list_entries = active_memb_entries; memb_join->failed_list_entries = instance->my_failed_list_entries; srp_addr_copy (&memb_join->system_from, &instance->my_id); memb_join->system_from.addr[0].nodeid = LEAVE_DUMMY_NODEID; // TODO: CC Maybe use the actual join send routine. /* * This mess adds the joined and failed processor lists into the join * message */ addr = (char *)memb_join; addr_idx = sizeof (struct memb_join); memcpy (&addr[addr_idx], active_memb, active_memb_entries * sizeof (struct srp_addr)); addr_idx += active_memb_entries * sizeof (struct srp_addr); memcpy (&addr[addr_idx], instance->my_failed_list, instance->my_failed_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_failed_list_entries * sizeof (struct srp_addr); if (instance->totem_config->send_join_timeout) { usleep (random() % (instance->totem_config->send_join_timeout * 1000)); } instance->stats.memb_join_tx++; totemnet_mcast_flush_send ( instance->totemnet_context, memb_join, addr_idx); } static void memb_merge_detect_transmit (struct totemsrp_instance *instance) { struct memb_merge_detect memb_merge_detect; memb_merge_detect.header.type = MESSAGE_TYPE_MEMB_MERGE_DETECT; memb_merge_detect.header.endian_detector = ENDIAN_LOCAL; memb_merge_detect.header.encapsulated = 0; memb_merge_detect.header.nodeid = instance->my_id.addr[0].nodeid; srp_addr_copy (&memb_merge_detect.system_from, &instance->my_id); memcpy (&memb_merge_detect.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); assert (memb_merge_detect.header.nodeid); instance->stats.memb_merge_detect_tx++; totemnet_mcast_flush_send (instance->totemnet_context, &memb_merge_detect, sizeof (struct memb_merge_detect)); } static void memb_ring_id_set ( struct totemsrp_instance *instance, const struct memb_ring_id *ring_id) { memcpy (&instance->my_ring_id, ring_id, sizeof (struct memb_ring_id)); } int totemsrp_callback_token_create ( void *srp_context, void **handle_out, enum totem_callback_token_type type, int delete, int (*callback_fn) (enum totem_callback_token_type type, const void *), const void *data) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; struct token_callback_instance *callback_handle; token_hold_cancel_send (instance); callback_handle = malloc (sizeof (struct token_callback_instance)); if (callback_handle == 0) { return (-1); } *handle_out = (void *)callback_handle; - list_init (&callback_handle->list); + qb_list_init (&callback_handle->list); callback_handle->callback_fn = callback_fn; callback_handle->data = (void *) data; callback_handle->callback_type = type; callback_handle->delete = delete; switch (type) { case TOTEM_CALLBACK_TOKEN_RECEIVED: - list_add (&callback_handle->list, &instance->token_callback_received_listhead); + qb_list_add (&callback_handle->list, &instance->token_callback_received_listhead); break; case TOTEM_CALLBACK_TOKEN_SENT: - list_add (&callback_handle->list, &instance->token_callback_sent_listhead); + qb_list_add (&callback_handle->list, &instance->token_callback_sent_listhead); break; } return (0); } void totemsrp_callback_token_destroy (void *srp_context, void **handle_out) { struct token_callback_instance *h; if (*handle_out) { h = (struct token_callback_instance *)*handle_out; - list_del (&h->list); + qb_list_del (&h->list); free (h); h = NULL; *handle_out = 0; } } static void token_callbacks_execute ( struct totemsrp_instance *instance, enum totem_callback_token_type type) { - struct list_head *list; - struct list_head *list_next; - struct list_head *callback_listhead = 0; + struct qb_list_head *list; + struct qb_list_head *callback_listhead = 0; struct token_callback_instance *token_callback_instance; int res; int del; switch (type) { case TOTEM_CALLBACK_TOKEN_RECEIVED: callback_listhead = &instance->token_callback_received_listhead; break; case TOTEM_CALLBACK_TOKEN_SENT: callback_listhead = &instance->token_callback_sent_listhead; break; default: assert (0); } - for (list = callback_listhead->next; list != callback_listhead; - list = list_next) { - - token_callback_instance = list_entry (list, struct token_callback_instance, list); - - list_next = list->next; + qb_list_for_each(list, callback_listhead) { + token_callback_instance = qb_list_entry (list, struct token_callback_instance, list); del = token_callback_instance->delete; if (del == 1) { - list_del (list); + qb_list_del (list); } res = token_callback_instance->callback_fn ( token_callback_instance->callback_type, token_callback_instance->data); /* * This callback failed to execute, try it again on the next token */ if (res == -1 && del == 1) { - list_add (list, callback_listhead); + qb_list_add (list, callback_listhead); } else if (del) { free (token_callback_instance); } } } /* * Flow control functions */ static unsigned int backlog_get (struct totemsrp_instance *instance) { unsigned int backlog = 0; struct cs_queue *queue_use = NULL; if (instance->memb_state == MEMB_STATE_OPERATIONAL) { if (instance->waiting_trans_ack) { queue_use = &instance->new_message_queue_trans; } else { queue_use = &instance->new_message_queue; } } else if (instance->memb_state == MEMB_STATE_RECOVERY) { queue_use = &instance->retrans_message_queue; } if (queue_use != NULL) { backlog = cs_queue_used (queue_use); } instance->stats.token[instance->stats.latest_token].backlog_calc = backlog; return (backlog); } static int fcc_calculate ( struct totemsrp_instance *instance, struct orf_token *token) { unsigned int transmits_allowed; unsigned int backlog_calc; transmits_allowed = instance->totem_config->max_messages; if (transmits_allowed > instance->totem_config->window_size - token->fcc) { transmits_allowed = instance->totem_config->window_size - token->fcc; } instance->my_cbl = backlog_get (instance); /* * Only do backlog calculation if there is a backlog otherwise * we would result in div by zero */ if (token->backlog + instance->my_cbl - instance->my_pbl) { backlog_calc = (instance->totem_config->window_size * instance->my_pbl) / (token->backlog + instance->my_cbl - instance->my_pbl); if (backlog_calc > 0 && transmits_allowed > backlog_calc) { transmits_allowed = backlog_calc; } } return (transmits_allowed); } /* * don't overflow the RTR sort queue */ static void fcc_rtr_limit ( struct totemsrp_instance *instance, struct orf_token *token, unsigned int *transmits_allowed) { int check = QUEUE_RTR_ITEMS_SIZE_MAX; check -= (*transmits_allowed + instance->totem_config->window_size); assert (check >= 0); if (sq_lt_compare (instance->last_released + QUEUE_RTR_ITEMS_SIZE_MAX - *transmits_allowed - instance->totem_config->window_size, token->seq)) { *transmits_allowed = 0; } } static void fcc_token_update ( struct totemsrp_instance *instance, struct orf_token *token, unsigned int msgs_transmitted) { token->fcc += msgs_transmitted - instance->my_trc; token->backlog += instance->my_cbl - instance->my_pbl; instance->my_trc = msgs_transmitted; instance->my_pbl = instance->my_cbl; } /* * Message Handlers */ unsigned long long int tv_old; /* * message handler called when TOKEN message type received */ static int message_handler_orf_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { char token_storage[1500]; char token_convert[1500]; struct orf_token *token = NULL; int forward_token; unsigned int transmits_allowed; unsigned int mcasted_retransmit; unsigned int mcasted_regular; unsigned int last_aru; #ifdef GIVEINFO unsigned long long tv_current; unsigned long long tv_diff; tv_current = qb_util_nano_current_get (); tv_diff = tv_current - tv_old; tv_old = tv_current; log_printf (instance->totemsrp_log_level_debug, "Time since last token %0.4f ms", ((float)tv_diff) / 1000000.0); #endif if (instance->orf_token_discard) { return (0); } #ifdef TEST_DROP_ORF_TOKEN_PERCENTAGE if (random()%100 < TEST_DROP_ORF_TOKEN_PERCENTAGE) { return (0); } #endif if (endian_conversion_needed) { orf_token_endian_convert ((struct orf_token *)msg, (struct orf_token *)token_convert); msg = (struct orf_token *)token_convert; } /* * Make copy of token and retransmit list in case we have * to flush incoming messages from the kernel queue */ token = (struct orf_token *)token_storage; memcpy (token, msg, sizeof (struct orf_token)); memcpy (&token->rtr_list[0], (char *)msg + sizeof (struct orf_token), sizeof (struct rtr_item) * RETRANSMIT_ENTRIES_MAX); /* * Handle merge detection timeout */ if (token->seq == instance->my_last_seq) { start_merge_detect_timeout (instance); instance->my_seq_unchanged += 1; } else { cancel_merge_detect_timeout (instance); cancel_token_hold_retransmit_timeout (instance); instance->my_seq_unchanged = 0; } instance->my_last_seq = token->seq; #ifdef TEST_RECOVERY_MSG_COUNT if (instance->memb_state == MEMB_STATE_OPERATIONAL && token->seq > TEST_RECOVERY_MSG_COUNT) { return (0); } #endif instance->flushing = 1; totemnet_recv_flush (instance->totemnet_context); instance->flushing = 0; /* * Determine if we should hold (in reality drop) the token */ instance->my_token_held = 0; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) && instance->my_seq_unchanged > instance->totem_config->seqno_unchanged_const) { instance->my_token_held = 1; } else if (!totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) && instance->my_seq_unchanged >= instance->totem_config->seqno_unchanged_const) { instance->my_token_held = 1; } /* * Hold onto token when there is no activity on ring and * this processor is the ring rep */ forward_token = 1; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { if (instance->my_token_held) { forward_token = 0; } } token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_RECEIVED); switch (instance->memb_state) { case MEMB_STATE_COMMIT: /* Discard token */ break; case MEMB_STATE_OPERATIONAL: messages_free (instance, token->aru); /* * Do NOT add break, this case should also execute code in gather case. */ case MEMB_STATE_GATHER: /* * DO NOT add break, we use different free mechanism in recovery state */ case MEMB_STATE_RECOVERY: /* * Discard tokens from another configuration */ if (memcmp (&token->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) != 0) { if ((forward_token) && instance->use_heartbeat) { reset_heartbeat_timeout(instance); } else { cancel_heartbeat_timeout(instance); } return (0); /* discard token */ } /* * Discard retransmitted tokens */ if (sq_lte_compare (token->token_seq, instance->my_token_seq)) { return (0); /* discard token */ } last_aru = instance->my_last_aru; instance->my_last_aru = token->aru; transmits_allowed = fcc_calculate (instance, token); mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed); if (instance->my_token_held == 1 && (token->rtr_list_entries > 0 || mcasted_retransmit > 0)) { instance->my_token_held = 0; forward_token = 1; } fcc_rtr_limit (instance, token, &transmits_allowed); mcasted_regular = orf_token_mcast (instance, token, transmits_allowed); /* if (mcasted_regular) { printf ("mcasted regular %d\n", mcasted_regular); printf ("token seq %d\n", token->seq); } */ fcc_token_update (instance, token, mcasted_retransmit + mcasted_regular); if (sq_lt_compare (instance->my_aru, token->aru) || instance->my_id.addr[0].nodeid == token->aru_addr || token->aru_addr == 0) { token->aru = instance->my_aru; if (token->aru == token->seq) { token->aru_addr = 0; } else { token->aru_addr = instance->my_id.addr[0].nodeid; } } if (token->aru == last_aru && token->aru_addr != 0) { instance->my_aru_count += 1; } else { instance->my_aru_count = 0; } /* * We really don't follow specification there. In specification, OTHER nodes * detect failure of one node (based on aru_count) and my_id IS NEVER added * to failed list (so node never mark itself as failed) */ if (instance->my_aru_count > instance->totem_config->fail_to_recv_const && token->aru_addr == instance->my_id.addr[0].nodeid) { log_printf (instance->totemsrp_log_level_error, "FAILED TO RECEIVE"); instance->failed_to_recv = 1; memb_set_merge (&instance->my_id, 1, instance->my_failed_list, &instance->my_failed_list_entries); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FAILED_TO_RECEIVE); } else { instance->my_token_seq = token->token_seq; token->token_seq += 1; if (instance->memb_state == MEMB_STATE_RECOVERY) { /* * instance->my_aru == instance->my_high_seq_received means this processor * has recovered all messages it can recover * (ie: its retrans queue is empty) */ if (cs_queue_is_empty (&instance->retrans_message_queue) == 0) { if (token->retrans_flg == 0) { token->retrans_flg = 1; instance->my_set_retrans_flg = 1; } } else if (token->retrans_flg == 1 && instance->my_set_retrans_flg) { token->retrans_flg = 0; instance->my_set_retrans_flg = 0; } log_printf (instance->totemsrp_log_level_debug, "token retrans flag is %d my set retrans flag%d retrans queue empty %d count %d, aru %x", token->retrans_flg, instance->my_set_retrans_flg, cs_queue_is_empty (&instance->retrans_message_queue), instance->my_retrans_flg_count, token->aru); if (token->retrans_flg == 0) { instance->my_retrans_flg_count += 1; } else { instance->my_retrans_flg_count = 0; } if (instance->my_retrans_flg_count == 2) { instance->my_install_seq = token->seq; } log_printf (instance->totemsrp_log_level_debug, "install seq %x aru %x high seq received %x", instance->my_install_seq, instance->my_aru, instance->my_high_seq_received); if (instance->my_retrans_flg_count >= 2 && instance->my_received_flg == 0 && sq_lte_compare (instance->my_install_seq, instance->my_aru)) { instance->my_received_flg = 1; instance->my_deliver_memb_entries = instance->my_trans_memb_entries; memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list, sizeof (struct totem_ip_address) * instance->my_trans_memb_entries); } if (instance->my_retrans_flg_count >= 3 && sq_lte_compare (instance->my_install_seq, token->aru)) { instance->my_rotation_counter += 1; } else { instance->my_rotation_counter = 0; } if (instance->my_rotation_counter == 2) { log_printf (instance->totemsrp_log_level_debug, "retrans flag count %x token aru %x install seq %x aru %x %x", instance->my_retrans_flg_count, token->aru, instance->my_install_seq, instance->my_aru, token->seq); memb_state_operational_enter (instance); instance->my_rotation_counter = 0; instance->my_retrans_flg_count = 0; } } totemnet_send_flush (instance->totemnet_context); token_send (instance, token, forward_token); #ifdef GIVEINFO tv_current = qb_util_nano_current_get (); tv_diff = tv_current - tv_old; tv_old = tv_current; log_printf (instance->totemsrp_log_level_debug, "I held %0.4f ms", ((float)tv_diff) / 1000000.0); #endif if (instance->memb_state == MEMB_STATE_OPERATIONAL) { messages_deliver_to_app (instance, 0, instance->my_high_seq_received); } /* * Deliver messages after token has been transmitted * to improve performance */ reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED if (totemip_equal(&instance->my_id.addr[0], &instance->my_ring_id.rep) && instance->my_token_held == 1) { start_token_hold_retransmit_timeout (instance); } token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_SENT); } break; } if ((forward_token) && instance->use_heartbeat) { reset_heartbeat_timeout(instance); } else { cancel_heartbeat_timeout(instance); } return (0); } static void messages_deliver_to_app ( struct totemsrp_instance *instance, int skip, unsigned int end_point) { struct sort_queue_item *sort_queue_item_p; unsigned int i; int res; struct mcast *mcast_in; struct mcast mcast_header; unsigned int range = 0; int endian_conversion_required; unsigned int my_high_delivered_stored = 0; range = end_point - instance->my_high_delivered; if (range) { log_printf (instance->totemsrp_log_level_trace, "Delivering %x to %x", instance->my_high_delivered, end_point); } assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); my_high_delivered_stored = instance->my_high_delivered; /* * Deliver messages in order from rtr queue to pending delivery queue */ for (i = 1; i <= range; i++) { void *ptr = 0; /* * If out of range of sort queue, stop assembly */ res = sq_in_range (&instance->regular_sort_queue, my_high_delivered_stored + i); if (res == 0) { break; } res = sq_item_get (&instance->regular_sort_queue, my_high_delivered_stored + i, &ptr); /* * If hole, stop assembly */ if (res != 0 && skip == 0) { break; } instance->my_high_delivered = my_high_delivered_stored + i; if (res != 0) { continue; } sort_queue_item_p = ptr; mcast_in = sort_queue_item_p->mcast; assert (mcast_in != (struct mcast *)0xdeadbeef); endian_conversion_required = 0; if (mcast_in->header.endian_detector != ENDIAN_LOCAL) { endian_conversion_required = 1; mcast_endian_convert (mcast_in, &mcast_header); } else { memcpy (&mcast_header, mcast_in, sizeof (struct mcast)); } /* * Skip messages not originated in instance->my_deliver_memb */ if (skip && memb_set_subset (&mcast_header.system_from, 1, instance->my_deliver_memb_list, instance->my_deliver_memb_entries) == 0) { instance->my_high_delivered = my_high_delivered_stored + i; continue; } /* * Message found */ log_printf (instance->totemsrp_log_level_trace, "Delivering MCAST message with seq %x to pending delivery queue", mcast_header.seq); /* * Message is locally originated multicast */ instance->totemsrp_deliver_fn ( mcast_header.header.nodeid, ((char *)sort_queue_item_p->mcast) + sizeof (struct mcast), sort_queue_item_p->msg_len - sizeof (struct mcast), endian_conversion_required); } } /* * recv message handler called when MCAST message type received */ static int message_handler_mcast ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct sort_queue_item sort_queue_item; struct sq *sort_queue; struct mcast mcast_header; if (endian_conversion_needed) { mcast_endian_convert (msg, &mcast_header); } else { memcpy (&mcast_header, msg, sizeof (struct mcast)); } if (mcast_header.header.encapsulated == MESSAGE_ENCAPSULATED) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } assert (msg_len <= FRAME_SIZE_MAX); #ifdef TEST_DROP_MCAST_PERCENTAGE if (random()%100 < TEST_DROP_MCAST_PERCENTAGE) { return (0); } #endif /* * If the message is foreign execute the switch below */ if (memcmp (&instance->my_ring_id, &mcast_header.ring_id, sizeof (struct memb_ring_id)) != 0) { switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: memb_set_merge ( &mcast_header.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE); break; case MEMB_STATE_GATHER: if (!memb_set_subset ( &mcast_header.system_from, 1, instance->my_proc_list, instance->my_proc_list_entries)) { memb_set_merge (&mcast_header.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE); return (0); } break; case MEMB_STATE_COMMIT: /* discard message */ instance->stats.rx_msg_dropped++; break; case MEMB_STATE_RECOVERY: /* discard message */ instance->stats.rx_msg_dropped++; break; } return (0); } log_printf (instance->totemsrp_log_level_trace, "Received ringid(%s:%lld) seq %x", totemip_print (&mcast_header.ring_id.rep), mcast_header.ring_id.seq, mcast_header.seq); /* * Add mcast message to rtr queue if not already in rtr queue * otherwise free io vectors */ if (msg_len > 0 && msg_len <= FRAME_SIZE_MAX && sq_in_range (sort_queue, mcast_header.seq) && sq_item_inuse (sort_queue, mcast_header.seq) == 0) { /* * Allocate new multicast memory block */ // TODO LEAK sort_queue_item.mcast = totemsrp_buffer_alloc (instance); if (sort_queue_item.mcast == NULL) { return (-1); /* error here is corrected by the algorithm */ } memcpy (sort_queue_item.mcast, msg, msg_len); sort_queue_item.msg_len = msg_len; if (sq_lt_compare (instance->my_high_seq_received, mcast_header.seq)) { instance->my_high_seq_received = mcast_header.seq; } sq_item_add (sort_queue, &sort_queue_item, mcast_header.seq); } update_aru (instance); if (instance->memb_state == MEMB_STATE_OPERATIONAL) { messages_deliver_to_app (instance, 0, instance->my_high_seq_received); } /* TODO remove from retrans message queue for old ring in recovery state */ return (0); } static int message_handler_memb_merge_detect ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct memb_merge_detect memb_merge_detect; if (endian_conversion_needed) { memb_merge_detect_endian_convert (msg, &memb_merge_detect); } else { memcpy (&memb_merge_detect, msg, sizeof (struct memb_merge_detect)); } /* * do nothing if this is a merge detect from this configuration */ if (memcmp (&instance->my_ring_id, &memb_merge_detect.ring_id, sizeof (struct memb_ring_id)) == 0) { return (0); } /* * Execute merge operation */ switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: memb_set_merge (&memb_merge_detect.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE); break; case MEMB_STATE_GATHER: if (!memb_set_subset ( &memb_merge_detect.system_from, 1, instance->my_proc_list, instance->my_proc_list_entries)) { memb_set_merge (&memb_merge_detect.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE); return (0); } break; case MEMB_STATE_COMMIT: /* do nothing in commit */ break; case MEMB_STATE_RECOVERY: /* do nothing in recovery */ break; } return (0); } static void memb_join_process ( struct totemsrp_instance *instance, const struct memb_join *memb_join) { struct srp_addr *proc_list; struct srp_addr *failed_list; int gather_entered = 0; int fail_minus_memb_entries = 0; struct srp_addr fail_minus_memb[PROCESSOR_COUNT_MAX]; proc_list = (struct srp_addr *)memb_join->end_of_memb_join; failed_list = proc_list + memb_join->proc_list_entries; /* memb_set_print ("proclist", proc_list, memb_join->proc_list_entries); memb_set_print ("faillist", failed_list, memb_join->failed_list_entries); memb_set_print ("my_proclist", instance->my_proc_list, instance->my_proc_list_entries); memb_set_print ("my_faillist", instance->my_failed_list, instance->my_failed_list_entries); -*/ if (memb_join->header.type == MESSAGE_TYPE_MEMB_JOIN) { if (instance->flushing) { if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) { log_printf (instance->totemsrp_log_level_warning, "Discarding LEAVE message during flush, nodeid=%u", memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid : LEAVE_DUMMY_NODEID); if (memb_join->failed_list_entries > 0) { my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid); } } else { log_printf (instance->totemsrp_log_level_warning, "Discarding JOIN message during flush, nodeid=%d", memb_join->header.nodeid); } return; } else { if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) { log_printf (instance->totemsrp_log_level_debug, "Received LEAVE message from %u", memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid : LEAVE_DUMMY_NODEID); if (memb_join->failed_list_entries > 0) { my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid); } } } } if (memb_set_equal (proc_list, memb_join->proc_list_entries, instance->my_proc_list, instance->my_proc_list_entries) && memb_set_equal (failed_list, memb_join->failed_list_entries, instance->my_failed_list, instance->my_failed_list_entries)) { memb_consensus_set (instance, &memb_join->system_from); if (memb_consensus_agreed (instance) && instance->failed_to_recv == 1) { instance->failed_to_recv = 0; srp_addr_copy (&instance->my_proc_list[0], &instance->my_id); instance->my_proc_list_entries = 1; instance->my_failed_list_entries = 0; memb_state_commit_token_create (instance); memb_state_commit_enter (instance); return; } if (memb_consensus_agreed (instance) && memb_lowest_in_config (instance)) { memb_state_commit_token_create (instance); memb_state_commit_enter (instance); } else { goto out; } } else if (memb_set_subset (proc_list, memb_join->proc_list_entries, instance->my_proc_list, instance->my_proc_list_entries) && memb_set_subset (failed_list, memb_join->failed_list_entries, instance->my_failed_list, instance->my_failed_list_entries)) { goto out; } else if (memb_set_subset (&memb_join->system_from, 1, instance->my_failed_list, instance->my_failed_list_entries)) { goto out; } else { memb_set_merge (proc_list, memb_join->proc_list_entries, instance->my_proc_list, &instance->my_proc_list_entries); if (memb_set_subset ( &instance->my_id, 1, failed_list, memb_join->failed_list_entries)) { memb_set_merge ( &memb_join->system_from, 1, instance->my_failed_list, &instance->my_failed_list_entries); } else { if (memb_set_subset ( &memb_join->system_from, 1, instance->my_memb_list, instance->my_memb_entries)) { if (memb_set_subset ( &memb_join->system_from, 1, instance->my_failed_list, instance->my_failed_list_entries) == 0) { memb_set_merge (failed_list, memb_join->failed_list_entries, instance->my_failed_list, &instance->my_failed_list_entries); } else { memb_set_subtract (fail_minus_memb, &fail_minus_memb_entries, failed_list, memb_join->failed_list_entries, instance->my_memb_list, instance->my_memb_entries); memb_set_merge (fail_minus_memb, fail_minus_memb_entries, instance->my_failed_list, &instance->my_failed_list_entries); } } } memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_JOIN); gather_entered = 1; } out: if (gather_entered == 0 && instance->memb_state == MEMB_STATE_OPERATIONAL) { memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE); } } static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out) { int i; struct srp_addr *in_proc_list; struct srp_addr *in_failed_list; struct srp_addr *out_proc_list; struct srp_addr *out_failed_list; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); out->proc_list_entries = swab32 (in->proc_list_entries); out->failed_list_entries = swab32 (in->failed_list_entries); out->ring_seq = swab64 (in->ring_seq); in_proc_list = (struct srp_addr *)in->end_of_memb_join; in_failed_list = in_proc_list + out->proc_list_entries; out_proc_list = (struct srp_addr *)out->end_of_memb_join; out_failed_list = out_proc_list + out->proc_list_entries; for (i = 0; i < out->proc_list_entries; i++) { srp_addr_copy_endian_convert (&out_proc_list[i], &in_proc_list[i]); } for (i = 0; i < out->failed_list_entries; i++) { srp_addr_copy_endian_convert (&out_failed_list[i], &in_failed_list[i]); } } static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out) { int i; struct srp_addr *in_addr = (struct srp_addr *)in->end_of_commit_token; struct srp_addr *out_addr = (struct srp_addr *)out->end_of_commit_token; struct memb_commit_token_memb_entry *in_memb_list; struct memb_commit_token_memb_entry *out_memb_list; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->token_seq = swab32 (in->token_seq); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); out->retrans_flg = swab32 (in->retrans_flg); out->memb_index = swab32 (in->memb_index); out->addr_entries = swab32 (in->addr_entries); in_memb_list = (struct memb_commit_token_memb_entry *)(in_addr + out->addr_entries); out_memb_list = (struct memb_commit_token_memb_entry *)(out_addr + out->addr_entries); for (i = 0; i < out->addr_entries; i++) { srp_addr_copy_endian_convert (&out_addr[i], &in_addr[i]); /* * Only convert the memb entry if it has been set */ if (in_memb_list[i].ring_id.rep.family != 0) { totemip_copy_endian_convert (&out_memb_list[i].ring_id.rep, &in_memb_list[i].ring_id.rep); out_memb_list[i].ring_id.seq = swab64 (in_memb_list[i].ring_id.seq); out_memb_list[i].aru = swab32 (in_memb_list[i].aru); out_memb_list[i].high_delivered = swab32 (in_memb_list[i].high_delivered); out_memb_list[i].received_flg = swab32 (in_memb_list[i].received_flg); } } } static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out) { int i; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->seq = swab32 (in->seq); out->token_seq = swab32 (in->token_seq); out->aru = swab32 (in->aru); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->aru_addr = swab32(in->aru_addr); out->ring_id.seq = swab64 (in->ring_id.seq); out->fcc = swab32 (in->fcc); out->backlog = swab32 (in->backlog); out->retrans_flg = swab32 (in->retrans_flg); out->rtr_list_entries = swab32 (in->rtr_list_entries); for (i = 0; i < out->rtr_list_entries; i++) { totemip_copy_endian_convert(&out->rtr_list[i].ring_id.rep, &in->rtr_list[i].ring_id.rep); out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq); out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq); } } static void mcast_endian_convert (const struct mcast *in, struct mcast *out) { out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->header.encapsulated = in->header.encapsulated; out->seq = swab32 (in->seq); out->this_seqno = swab32 (in->this_seqno); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); out->node_id = swab32 (in->node_id); out->guarantee = swab32 (in->guarantee); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); } static void memb_merge_detect_endian_convert ( const struct memb_merge_detect *in, struct memb_merge_detect *out) { out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); } static int ignore_join_under_operational ( struct totemsrp_instance *instance, const struct memb_join *memb_join) { struct srp_addr *proc_list; struct srp_addr *failed_list; unsigned long long ring_seq; proc_list = (struct srp_addr *)memb_join->end_of_memb_join; failed_list = proc_list + memb_join->proc_list_entries; ring_seq = memb_join->ring_seq; if (memb_set_subset (&instance->my_id, 1, failed_list, memb_join->failed_list_entries)) { return (1); } /* * In operational state, my_proc_list is exactly the same as * my_memb_list. */ if ((memb_set_subset (&memb_join->system_from, 1, instance->my_memb_list, instance->my_memb_entries)) && (ring_seq < instance->my_ring_id.seq)) { return (1); } return (0); } static int message_handler_memb_join ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { const struct memb_join *memb_join; struct memb_join *memb_join_convert = alloca (msg_len); if (endian_conversion_needed) { memb_join = memb_join_convert; memb_join_endian_convert (msg, memb_join_convert); } else { memb_join = msg; } /* * If the process paused because it wasn't scheduled in a timely * fashion, flush the join messages because they may be queued * entries */ if (pause_flush (instance)) { return (0); } if (instance->token_ring_id_seq < memb_join->ring_seq) { instance->token_ring_id_seq = memb_join->ring_seq; } switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: if (!ignore_join_under_operational (instance, memb_join)) { memb_join_process (instance, memb_join); } break; case MEMB_STATE_GATHER: memb_join_process (instance, memb_join); break; case MEMB_STATE_COMMIT: if (memb_set_subset (&memb_join->system_from, 1, instance->my_new_memb_list, instance->my_new_memb_entries) && memb_join->ring_seq >= instance->my_ring_id.seq) { memb_join_process (instance, memb_join); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE); } break; case MEMB_STATE_RECOVERY: if (memb_set_subset (&memb_join->system_from, 1, instance->my_new_memb_list, instance->my_new_memb_entries) && memb_join->ring_seq >= instance->my_ring_id.seq) { memb_join_process (instance, memb_join); memb_recovery_state_token_loss (instance); memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY); } break; } return (0); } static int message_handler_memb_commit_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct memb_commit_token *memb_commit_token_convert = alloca (msg_len); struct memb_commit_token *memb_commit_token; struct srp_addr sub[PROCESSOR_COUNT_MAX]; int sub_entries; struct srp_addr *addr; log_printf (instance->totemsrp_log_level_debug, "got commit token"); if (endian_conversion_needed) { memb_commit_token_endian_convert (msg, memb_commit_token_convert); } else { memcpy (memb_commit_token_convert, msg, msg_len); } memb_commit_token = memb_commit_token_convert; addr = (struct srp_addr *)memb_commit_token->end_of_commit_token; #ifdef TEST_DROP_COMMIT_TOKEN_PERCENTAGE if (random()%100 < TEST_DROP_COMMIT_TOKEN_PERCENTAGE) { return (0); } #endif switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: /* discard token */ break; case MEMB_STATE_GATHER: memb_set_subtract (sub, &sub_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); if (memb_set_equal (addr, memb_commit_token->addr_entries, sub, sub_entries) && memb_commit_token->ring_id.seq > instance->my_ring_id.seq) { memcpy (instance->commit_token, memb_commit_token, msg_len); memb_state_commit_enter (instance); } break; case MEMB_STATE_COMMIT: /* * If retransmitted commit tokens are sent on this ring * filter them out and only enter recovery once the * commit token has traversed the array. This is * determined by : * memb_commit_token->memb_index == memb_commit_token->addr_entries) { */ if (memb_commit_token->ring_id.seq == instance->my_ring_id.seq && memb_commit_token->memb_index == memb_commit_token->addr_entries) { memb_state_recovery_enter (instance, memb_commit_token); } break; case MEMB_STATE_RECOVERY: if (totemip_equal (&instance->my_id.addr[0], &instance->my_ring_id.rep)) { /* Filter out duplicated tokens */ if (instance->originated_orf_token) { break; } instance->originated_orf_token = 1; log_printf (instance->totemsrp_log_level_debug, "Sending initial ORF token"); // TODO convert instead of initiate orf_token_send_initial (instance); reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED } break; } return (0); } static int message_handler_token_hold_cancel ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { const struct token_hold_cancel *token_hold_cancel = msg; if (memcmp (&token_hold_cancel->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) == 0) { instance->my_seq_unchanged = 0; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { timer_function_token_retransmit_timeout (instance); } } return (0); } void main_deliver_fn ( void *context, const void *msg, unsigned int msg_len) { struct totemsrp_instance *instance = context; const struct totem_message_header *message_header = msg; if (msg_len < sizeof (struct totem_message_header)) { log_printf (instance->totemsrp_log_level_security, "Received message is too short... ignoring %u.", (unsigned int)msg_len); return; } switch (message_header->type) { case MESSAGE_TYPE_ORF_TOKEN: instance->stats.orf_token_rx++; break; case MESSAGE_TYPE_MCAST: instance->stats.mcast_rx++; break; case MESSAGE_TYPE_MEMB_MERGE_DETECT: instance->stats.memb_merge_detect_rx++; break; case MESSAGE_TYPE_MEMB_JOIN: instance->stats.memb_join_rx++; break; case MESSAGE_TYPE_MEMB_COMMIT_TOKEN: instance->stats.memb_commit_token_rx++; break; case MESSAGE_TYPE_TOKEN_HOLD_CANCEL: instance->stats.token_hold_cancel_rx++; break; default: log_printf (instance->totemsrp_log_level_security, "Type of received message is wrong... ignoring %d.\n", (int)message_header->type); printf ("wrong message type\n"); instance->stats.rx_msg_dropped++; return; } /* * Handle incoming message */ totemsrp_message_handlers.handler_functions[(int)message_header->type] ( instance, msg, msg_len, message_header->endian_detector != ENDIAN_LOCAL); } void main_iface_change_fn ( void *context, const struct totem_ip_address *iface_addr, unsigned int iface_no) { struct totemsrp_instance *instance = context; int i; totemip_copy (&instance->my_id.addr[iface_no], iface_addr); assert (instance->my_id.addr[iface_no].nodeid); totemip_copy (&instance->my_memb_list[0].addr[iface_no], iface_addr); if (instance->iface_changes++ == 0) { instance->memb_ring_id_create_or_load (&instance->my_ring_id, &instance->my_id.addr[0]); instance->token_ring_id_seq = instance->my_ring_id.seq; log_printf ( instance->totemsrp_log_level_debug, "Created or loaded sequence id %llx.%s for this ring.", instance->my_ring_id.seq, totemip_print (&instance->my_ring_id.rep)); if (instance->totemsrp_service_ready_fn) { instance->totemsrp_service_ready_fn (); } } for (i = 0; i < instance->totem_config->interfaces[iface_no].member_count; i++) { totemsrp_member_add (instance, &instance->totem_config->interfaces[iface_no].member_list[i], iface_no); } if (instance->iface_changes >= instance->totem_config->interface_count) { memb_state_gather_enter (instance, TOTEMSRP_GSFROM_INTERFACE_CHANGE); } } void totemsrp_net_mtu_adjust (struct totem_config *totem_config) { totem_config->net_mtu -= sizeof (struct mcast); } void totemsrp_service_ready_register ( void *context, void (*totem_service_ready) (void)) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; instance->totemsrp_service_ready_fn = totem_service_ready; } int totemsrp_member_add ( void *context, const struct totem_ip_address *member, int link_no) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; int res; res = totemnet_member_add (instance->totemnet_context, &instance->my_id.addr[link_no], member, link_no); return (res); } int totemsrp_member_remove ( void *context, const struct totem_ip_address *member, int link_no) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; int res; res = totemnet_member_remove (instance->totemnet_context, member, link_no); return (res); } void totemsrp_threaded_mode_enable (void *context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; instance->threaded_mode_enabled = 1; } void totemsrp_trans_ack (void *context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; instance->waiting_trans_ack = 0; instance->totemsrp_waiting_trans_ack_cb_fn (0); } diff --git a/exec/totemudp.c b/exec/totemudp.c index c103007d..96965b70 100644 --- a/exec/totemudp.c +++ b/exec/totemudp.c @@ -1,1366 +1,1365 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemudp.h" #include "util.h" #include #include #include #include #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 #define NETIF_STATE_REPORT_DOWN 2 #define BIND_STATE_UNBOUND 0 #define BIND_STATE_REGULAR 1 #define BIND_STATE_LOOPBACK 2 #define MESSAGE_TYPE_MEMB_JOIN 3 struct totemudp_socket { int mcast_recv; int mcast_send; int token; /* * Socket used for local multicast delivery. We don't rely on multicast * loop and rather this UNIX DGRAM socket is used. Socket is created by * socketpair call and they are used in same way as pipe (so [0] is read * end and [1] is write end) */ int local_mcast_loop[2]; }; struct totemudp_instance { qb_loop_t *totemudp_poll_handle; struct totem_interface *totem_interface; int netif_state_report; int netif_bind_state; void *context; void (*totemudp_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemudp_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address, unsigned int ring_no); void (*totemudp_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemudp_log_level_security; int totemudp_log_level_error; int totemudp_log_level_warning; int totemudp_log_level_notice; int totemudp_log_level_debug; int totemudp_subsys_id; void (*totemudp_log_printf) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))); void *udp_context; char iov_buffer[FRAME_SIZE_MAX]; char iov_buffer_flush[FRAME_SIZE_MAX]; struct iovec totemudp_iov_recv; struct iovec totemudp_iov_recv_flush; struct totemudp_socket totemudp_sockets; struct totem_ip_address mcast_address; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; struct totem_ip_address my_id; int firstrun; qb_loop_timer_handle timer_netif_check_timeout; unsigned int my_memb_entries; int flushing; struct totem_config *totem_config; totemsrp_stats_t *stats; struct totem_ip_address token_target; }; struct work_item { const void *msg; unsigned int msg_len; struct totemudp_instance *instance; }; static int totemudp_build_sockets ( struct totemudp_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *mcastaddress, struct totemudp_socket *sockets, struct totem_ip_address *bound_to); static struct totem_ip_address localhost; static void totemudp_instance_initialize (struct totemudp_instance *instance) { memset (instance, 0, sizeof (struct totemudp_instance)); instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; instance->totemudp_iov_recv.iov_base = instance->iov_buffer; instance->totemudp_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush; instance->totemudp_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); /* * There is always atleast 1 processor */ instance->my_memb_entries = 1; } #define log_printf(level, format, args...) \ do { \ instance->totemudp_log_printf ( \ level, instance->totemudp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); #define LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemudp_log_printf ( \ level, instance->totemudp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ } while(0) int totemudp_crypto_set ( void *udp_context, const char *cipher_type, const char *hash_type) { return (0); } static inline void ucast_sendmsg ( struct totemudp_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { struct msghdr msg_ucast; int res = 0; struct sockaddr_storage sockaddr; struct iovec iovec; int addrlen; iovec.iov_base = (void*)msg; iovec.iov_len = msg_len; /* * Build unicast message */ memset(&msg_ucast, 0, sizeof(msg_ucast)); totemip_totemip_to_sockaddr_convert(system_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_ucast.msg_name = &sockaddr; msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = (void *)&iovec; msg_ucast.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_ucast.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_ucast.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_ucast.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_ucast.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_ucast.msg_accrightslen = 0; #endif /* * Transmit unicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } static inline void mcast_sendmsg ( struct totemudp_instance *instance, const void *msg, unsigned int msg_len) { struct msghdr msg_mcast; int res = 0; struct iovec iovec; struct sockaddr_storage sockaddr; int addrlen; iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; /* * Build multicast message */ totemip_totemip_to_sockaddr_convert(&instance->mcast_address, instance->totem_interface->ip_port, &sockaddr, &addrlen); memset(&msg_mcast, 0, sizeof(msg_mcast)); msg_mcast.msg_name = &sockaddr; msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = (void *)&iovec; msg_mcast.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_mcast.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_mcast.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_mcast.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_mcast.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_mcast.msg_accrightslen = 0; #endif /* * Transmit multicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(mcast) failed (non-critical)"); instance->stats->continuous_sendmsg_failures++; } else { instance->stats->continuous_sendmsg_failures = 0; } /* * Transmit multicast message to local unix mcast loop * An error here is recovered by totemsrp */ msg_mcast.msg_name = NULL; msg_mcast.msg_namelen = 0; res = sendmsg (instance->totemudp_sockets.local_mcast_loop[1], &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(local mcast loop) failed (non-critical)"); } } int totemudp_finalize ( void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; if (instance->totemudp_sockets.mcast_recv > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.mcast_recv); close (instance->totemudp_sockets.mcast_recv); } if (instance->totemudp_sockets.mcast_send > 0) { close (instance->totemudp_sockets.mcast_send); } if (instance->totemudp_sockets.local_mcast_loop[0] > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.local_mcast_loop[0]); close (instance->totemudp_sockets.local_mcast_loop[0]); close (instance->totemudp_sockets.local_mcast_loop[1]); } if (instance->totemudp_sockets.token > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.token); close (instance->totemudp_sockets.token); } return (res); } /* * Only designed to work with a message with one iov */ static int net_deliver_fn ( int fd, int revents, void *data) { struct totemudp_instance *instance = (struct totemudp_instance *)data; struct msghdr msg_recv; struct iovec *iovec; struct sockaddr_storage system_from; int bytes_received; char *message_type; if (instance->flushing == 1) { iovec = &instance->totemudp_iov_recv_flush; } else { iovec = &instance->totemudp_iov_recv; } /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv.msg_accrightslen = 0; #endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { return (0); } else { instance->stats_recv += bytes_received; } iovec->iov_len = bytes_received; /* * Drop all non-mcast messages (more specifically join * messages should be dropped) */ message_type = (char *)iovec->iov_base; if (instance->flushing == 1 && *message_type == MESSAGE_TYPE_MEMB_JOIN) { log_printf(instance->totemudp_log_level_warning, "JOIN or LEAVE message was thrown away during flush operation."); iovec->iov_len = FRAME_SIZE_MAX; return (0); } /* * Handle incoming message */ instance->totemudp_deliver_fn ( instance->context, iovec->iov_base, iovec->iov_len); iovec->iov_len = FRAME_SIZE_MAX; return (0); } static int netif_determine ( struct totemudp_instance *instance, struct totem_ip_address *bindnet, struct totem_ip_address *bound_to, int *interface_up, int *interface_num) { int res; res = totemip_iface_check (bindnet, bound_to, interface_up, interface_num, instance->totem_config->clear_node_high_bit); return (res); } /* * If the interface is up, the sockets for totem are built. If the interface is down * this function is requeued in the timer list to retry building the sockets later. */ static void timer_function_netif_check_timeout ( void *data) { struct totemudp_instance *instance = (struct totemudp_instance *)data; int interface_up; int interface_num; struct totem_ip_address *bind_address; /* * Build sockets for every interface */ netif_determine (instance, &instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num); /* * If the network interface isn't back up and we are already * in loopback mode, add timer to check again and return */ if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && interface_up == 0) || (instance->my_memb_entries == 1 && instance->netif_bind_state == BIND_STATE_REGULAR && interface_up == 1)) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); /* * Add a timer to check for a downed regular interface */ return; } if (instance->totemudp_sockets.mcast_recv > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.mcast_recv); close (instance->totemudp_sockets.mcast_recv); } if (instance->totemudp_sockets.mcast_send > 0) { close (instance->totemudp_sockets.mcast_send); } if (instance->totemudp_sockets.local_mcast_loop[0] > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.local_mcast_loop[0]); close (instance->totemudp_sockets.local_mcast_loop[0]); close (instance->totemudp_sockets.local_mcast_loop[1]); } if (instance->totemudp_sockets.token > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.token); close (instance->totemudp_sockets.token); } if (interface_up == 0) { /* * Interface is not up */ instance->netif_bind_state = BIND_STATE_LOOPBACK; bind_address = &localhost; /* * Add a timer to retry building interfaces and request memb_gather_enter */ qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } else { /* * Interface is up */ instance->netif_bind_state = BIND_STATE_REGULAR; bind_address = &instance->totem_interface->bindnet; } /* * Create and bind the multicast and unicast sockets */ (void)totemudp_build_sockets (instance, &instance->mcast_address, bind_address, &instance->totemudp_sockets, &instance->totem_interface->boundto); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.mcast_recv, POLLIN, instance, net_deliver_fn); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.local_mcast_loop[0], POLLIN, instance, net_deliver_fn); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.token, POLLIN, instance, net_deliver_fn); totemip_copy (&instance->my_id, &instance->totem_interface->boundto); /* * This reports changes in the interface to the user and totemsrp */ if (instance->netif_bind_state == BIND_STATE_REGULAR) { if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { log_printf (instance->totemudp_log_level_notice, "The network interface [%s] is now up.", totemip_print (&instance->totem_interface->boundto)); instance->netif_state_report = NETIF_STATE_REPORT_DOWN; instance->totemudp_iface_change_fn (instance->context, &instance->my_id, 0); } /* * Add a timer to check for interface going down in single membership */ if (instance->my_memb_entries == 1) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } } else { if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { log_printf (instance->totemudp_log_level_notice, "The network interface is down."); instance->totemudp_iface_change_fn (instance->context, &instance->my_id, 0); } instance->netif_state_report = NETIF_STATE_REPORT_UP; } } /* Set the socket priority to INTERACTIVE to ensure that our messages don't get queued behind anything else */ static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock) { #ifdef SO_PRIORITY int prio = 6; /* TC_PRIO_INTERACTIVE */ if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority"); } #endif } static int totemudp_build_sockets_ip ( struct totemudp_instance *instance, struct totem_ip_address *mcast_address, struct totem_ip_address *bindnet_address, struct totemudp_socket *sockets, struct totem_ip_address *bound_to, int interface_num) { struct sockaddr_storage sockaddr; struct ipv6_mreq mreq6; struct ip_mreq mreq; struct sockaddr_storage mcast_ss, boundto_ss; struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss; struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss; struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss; unsigned int sendbuf_size; unsigned int recvbuf_size; unsigned int optlen = sizeof (sendbuf_size); int addrlen; int res; int flag; uint8_t sflag; int i; /* * Create multicast recv socket */ sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->mcast_recv == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->mcast_recv); res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } /* * Create local multicast loop socket */ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets->local_mcast_loop) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } for (i = 0; i < 2; i++) { totemip_nosigpipe (sockets->local_mcast_loop[i]); res = fcntl (sockets->local_mcast_loop[i], F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } } /* * Setup mcast send socket */ sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->mcast_send == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->mcast_send); res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1, &sockaddr, &addrlen); res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind the socket to send multicast packets"); return (-1); } /* * Setup unicast socket */ sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->token == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->token); res = fcntl (sockets->token, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface */ totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind UDP unicast socket"); return (-1); } recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; /* * Set buffer sizes to avoid overruns */ res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "Unable to set SO_RCVBUF size on UDP mcast socket"); return (-1); } res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "Unable to set SO_SNDBUF size on UDP mcast socket"); return (-1); } res = setsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "Unable to set SO_RCVBUF size on UDP local mcast loop socket"); return (-1); } res = setsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "Unable to set SO_SNDBUF size on UDP local mcast loop socket"); return (-1); } res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Receive multicast socket recv buffer size (%d bytes).", recvbuf_size); } res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Transmit multicast socket send buffer size (%d bytes).", sendbuf_size); } res = getsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Local receive multicast loop socket recv buffer size (%d bytes).", recvbuf_size); } res = getsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Local transmit multicast loop socket send buffer size (%d bytes).", sendbuf_size); } /* * Join group membership on socket */ totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen); totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen); if (instance->totem_config->broadcast_use == 1) { unsigned int broadcast = 1; if ((setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setting broadcast option failed"); return (-1); } if ((setsockopt(sockets->mcast_send, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setting broadcast option failed"); return (-1); } } else { switch (bindnet_address->family) { case AF_INET: memset(&mreq, 0, sizeof(mreq)); mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr; mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr; res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof (mreq)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "join ipv4 multicast group failed"); return (-1); } break; case AF_INET6: memset(&mreq6, 0, sizeof(mreq6)); memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr)); mreq6.ipv6mr_interface = interface_num; res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq6, sizeof (mreq6)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "join ipv6 multicast group failed"); return (-1); } break; } } /* * Turn off multicast loopback */ flag = 0; switch ( bindnet_address->family ) { case AF_INET: sflag = 0; res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP, &sflag, sizeof (sflag)); break; case AF_INET6: res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, &flag, sizeof (flag)); } if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to turn off multicast loopback"); return (-1); } /* * Set multicast packets TTL */ flag = instance->totem_interface->ttl; if (bindnet_address->family == AF_INET6) { res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &flag, sizeof (flag)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "set mcast v6 TTL failed"); return (-1); } } else { sflag = flag; res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL, &sflag, sizeof(sflag)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "set mcast v4 TTL failed"); return (-1); } } /* * Bind to a specific interface for multicast send and receive */ switch ( bindnet_address->family ) { case AF_INET: if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF, &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (send)"); return (-1); } if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF, &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (recv)"); return (-1); } break; case AF_INET6: if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF, &interface_num, sizeof (interface_num)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (send v6)"); return (-1); } if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF, &interface_num, sizeof (interface_num)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (recv v6)"); return (-1); } break; } /* * Bind to multicast socket used for multicast receives * This needs to happen after all of the multicast setsockopt() calls * as the kernel seems to only put them into effect (for IPV6) when bind() * is called. */ totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind the socket to receive multicast packets"); return (-1); } return 0; } static int totemudp_build_sockets ( struct totemudp_instance *instance, struct totem_ip_address *mcast_address, struct totem_ip_address *bindnet_address, struct totemudp_socket *sockets, struct totem_ip_address *bound_to) { int interface_num; int interface_up; int res; /* * Determine the ip address bound to and the interface name */ res = netif_determine (instance, bindnet_address, bound_to, &interface_up, &interface_num); if (res == -1) { return (-1); } totemip_copy(&instance->my_id, bound_to); res = totemudp_build_sockets_ip (instance, mcast_address, bindnet_address, sockets, bound_to, interface_num); /* We only send out of the token socket */ totemudp_traffic_control_set(instance, sockets->token); return res; } /* * Totem Network interface * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemudp_initialize ( qb_loop_t *poll_handle, void **udp_context, struct totem_config *totem_config, totemsrp_stats_t *stats, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address, unsigned int ring_no), void (*mtu_changed) ( void *context, int net_mtu), void (*target_set_completed) ( void *context)) { struct totemudp_instance *instance; instance = malloc (sizeof (struct totemudp_instance)); if (instance == NULL) { return (-1); } totemudp_instance_initialize (instance); instance->totem_config = totem_config; instance->stats = stats; /* * Configure logging */ instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize local variables for totemudp */ instance->totem_interface = &totem_config->interfaces[0]; totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr); memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); instance->totemudp_poll_handle = poll_handle; instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; instance->context = context; instance->totemudp_deliver_fn = deliver_fn; instance->totemudp_iface_change_fn = iface_change_fn; instance->totemudp_target_set_completed = target_set_completed; totemip_localhost (instance->mcast_address.family, &localhost); localhost.nodeid = instance->totem_config->node_id; /* * RRP layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); *udp_context = instance; return (0); } void *totemudp_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemudp_buffer_release (void *ptr) { return free (ptr); } int totemudp_processor_count_set ( void *udp_context, int processor_count) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; instance->my_memb_entries = processor_count; qb_loop_timer_del (instance->totemudp_poll_handle, instance->timer_netif_check_timeout); if (processor_count == 1) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } return (res); } int totemudp_recv_flush (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; struct pollfd ufd; int nfds; int res = 0; int i; int sock; instance->flushing = 1; for (i = 0; i < 2; i++) { sock = -1; if (i == 0) { sock = instance->totemudp_sockets.mcast_recv; } if (i == 1) { sock = instance->totemudp_sockets.local_mcast_loop[0]; } assert(sock != -1); do { ufd.fd = sock; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { net_deliver_fn (sock, ufd.revents, instance); } } while (nfds == 1); } instance->flushing = 0; return (res); } int totemudp_send_flush (void *udp_context) { return 0; } int totemudp_token_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemudp_mcast_flush_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } int totemudp_mcast_noflush_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } extern int totemudp_iface_check (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; timer_function_netif_check_timeout (instance); return (res); } int totemudp_ifaces_get ( void *net_context, char ***status, unsigned int *iface_count) { static char *statuses[INTERFACE_MAX] = {(char*)"OK"}; if (status) { *status = statuses; } *iface_count = 1; return (0); } extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config) { assert(totem_config->interface_count > 0); totem_config->net_mtu -= totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family); } int totemudp_token_target_set ( void *udp_context, const struct totem_ip_address *token_target) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemudp_target_set_completed (instance->context); return (res); } extern int totemudp_recv_mcast_empty ( void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; unsigned int res; struct sockaddr_storage system_from; struct msghdr msg_recv; struct pollfd ufd; int nfds; int msg_processed = 0; int i; int sock; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = &instance->totemudp_iov_recv_flush; msg_recv.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv.msg_accrightslen = 0; #endif for (i = 0; i < 2; i++) { sock = -1; if (i == 0) { sock = instance->totemudp_sockets.mcast_recv; } if (i == 1) { sock = instance->totemudp_sockets.local_mcast_loop[0]; } assert(sock != -1); do { ufd.fd = sock; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmsg (sock, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); } return (msg_processed); } diff --git a/exec/totemudpu.c b/exec/totemudpu.c index 8c0c074c..9e4864f9 100644 --- a/exec/totemudpu.c +++ b/exec/totemudpu.c @@ -1,1187 +1,1178 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include -#include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemudpu.h" #include "util.h" #include #include #include #include #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 #define NETIF_STATE_REPORT_DOWN 2 #define BIND_STATE_UNBOUND 0 #define BIND_STATE_REGULAR 1 #define BIND_STATE_LOOPBACK 2 struct totemudpu_member { - struct list_head list; + struct qb_list_head list; struct totem_ip_address member; int fd; int active; }; struct totemudpu_instance { qb_loop_t *totemudpu_poll_handle; struct totem_interface *totem_interface; int netif_state_report; int netif_bind_state; void *context; void (*totemudpu_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemudpu_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address, unsigned int ring_no); void (*totemudpu_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemudpu_log_level_security; int totemudpu_log_level_error; int totemudpu_log_level_warning; int totemudpu_log_level_notice; int totemudpu_log_level_debug; int totemudpu_subsys_id; void (*totemudpu_log_printf) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))); void *udpu_context; char iov_buffer[FRAME_SIZE_MAX]; struct iovec totemudpu_iov_recv; - struct list_head member_list; + struct qb_list_head member_list; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; struct totem_ip_address my_id; int firstrun; qb_loop_timer_handle timer_netif_check_timeout; unsigned int my_memb_entries; struct totem_config *totem_config; totemsrp_stats_t *stats; struct totem_ip_address token_target; int token_socket; qb_loop_timer_handle timer_merge_detect_timeout; int send_merge_detect_message; unsigned int merge_detect_messages_sent_before_timeout; }; struct work_item { const void *msg; unsigned int msg_len; struct totemudpu_instance *instance; }; static int totemudpu_build_sockets ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to); static int totemudpu_create_sending_socket( void *udpu_context, const struct totem_ip_address *member); int totemudpu_member_list_rebind_ip ( void *udpu_context); static void totemudpu_start_merge_detect_timeout( void *udpu_context); static void totemudpu_stop_merge_detect_timeout( void *udpu_context); static struct totem_ip_address localhost; static void totemudpu_instance_initialize (struct totemudpu_instance *instance) { memset (instance, 0, sizeof (struct totemudpu_instance)); instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; instance->totemudpu_iov_recv.iov_base = instance->iov_buffer; instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); /* * There is always atleast 1 processor */ instance->my_memb_entries = 1; - list_init (&instance->member_list); + qb_list_init (&instance->member_list); } #define log_printf(level, format, args...) \ do { \ instance->totemudpu_log_printf ( \ level, instance->totemudpu_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); #define LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemudpu_log_printf ( \ level, instance->totemudpu_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)", ##args, _error_ptr, err_num); \ } while(0) int totemudpu_crypto_set ( void *udpu_context, const char *cipher_type, const char *hash_type) { return (0); } static inline void ucast_sendmsg ( struct totemudpu_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { struct msghdr msg_ucast; int res = 0; struct sockaddr_storage sockaddr; struct iovec iovec; int addrlen; iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; /* * Build unicast message */ totemip_totemip_to_sockaddr_convert(system_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); memset(&msg_ucast, 0, sizeof(msg_ucast)); msg_ucast.msg_name = &sockaddr; msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = (void *)&iovec; msg_ucast.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_ucast.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_ucast.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_ucast.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_ucast.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_ucast.msg_accrightslen = 0; #endif /* * Transmit unicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } static inline void mcast_sendmsg ( struct totemudpu_instance *instance, const void *msg, unsigned int msg_len, int only_active) { struct msghdr msg_mcast; int res = 0; struct iovec iovec; struct sockaddr_storage sockaddr; int addrlen; - struct list_head *list; + struct qb_list_head *list; struct totemudpu_member *member; iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; memset(&msg_mcast, 0, sizeof(msg_mcast)); /* * Build multicast message */ - for (list = instance->member_list.next; - list != &instance->member_list; - list = list->next) { - - member = list_entry (list, + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, struct totemudpu_member, list); /* * Do not send multicast message if message is not "flush", member * is inactive and timeout for sending merge message didn't expired. */ if (only_active && !member->active && !instance->send_merge_detect_message) continue ; totemip_totemip_to_sockaddr_convert(&member->member, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_mcast.msg_name = &sockaddr; msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = (void *)&iovec; msg_mcast.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_mcast.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_mcast.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_mcast.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_mcast.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_mcast.msg_accrightslen = 0; #endif /* * Transmit multicast message * An error here is recovered by totemsrp */ res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, "sendmsg(mcast) failed (non-critical)"); } } if (!only_active || instance->send_merge_detect_message) { /* * Current message was sent to all nodes */ instance->merge_detect_messages_sent_before_timeout++; instance->send_merge_detect_message = 0; } } int totemudpu_finalize ( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; if (instance->token_socket > 0) { qb_loop_poll_del (instance->totemudpu_poll_handle, instance->token_socket); close (instance->token_socket); } totemudpu_stop_merge_detect_timeout(instance); return (res); } static int net_deliver_fn ( int fd, int revents, void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; struct msghdr msg_recv; struct iovec *iovec; struct sockaddr_storage system_from; int bytes_received; iovec = &instance->totemudpu_iov_recv; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv.msg_accrightslen = 0; #endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { return (0); } else { instance->stats_recv += bytes_received; } iovec->iov_len = bytes_received; /* * Handle incoming message */ instance->totemudpu_deliver_fn ( instance->context, iovec->iov_base, iovec->iov_len); iovec->iov_len = FRAME_SIZE_MAX; return (0); } static int netif_determine ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet, struct totem_ip_address *bound_to, int *interface_up, int *interface_num) { int res; res = totemip_iface_check (bindnet, bound_to, interface_up, interface_num, instance->totem_config->clear_node_high_bit); return (res); } /* * If the interface is up, the sockets for totem are built. If the interface is down * this function is requeued in the timer list to retry building the sockets later. */ static void timer_function_netif_check_timeout ( void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; int interface_up; int interface_num; struct totem_ip_address *bind_address; /* * Build sockets for every interface */ netif_determine (instance, &instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num); /* * If the network interface isn't back up and we are already * in loopback mode, add timer to check again and return */ if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && interface_up == 0) || (instance->my_memb_entries == 1 && instance->netif_bind_state == BIND_STATE_REGULAR && interface_up == 1)) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); /* * Add a timer to check for a downed regular interface */ return; } if (instance->token_socket > 0) { qb_loop_poll_del (instance->totemudpu_poll_handle, instance->token_socket); close (instance->token_socket); } if (interface_up == 0) { /* * Interface is not up */ instance->netif_bind_state = BIND_STATE_LOOPBACK; bind_address = &localhost; /* * Add a timer to retry building interfaces and request memb_gather_enter */ qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } else { /* * Interface is up */ instance->netif_bind_state = BIND_STATE_REGULAR; bind_address = &instance->totem_interface->bindnet; } /* * Create and bind the multicast and unicast sockets */ totemudpu_build_sockets (instance, bind_address, &instance->totem_interface->boundto); qb_loop_poll_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->token_socket, POLLIN, instance, net_deliver_fn); totemip_copy (&instance->my_id, &instance->totem_interface->boundto); /* * This reports changes in the interface to the user and totemsrp */ if (instance->netif_bind_state == BIND_STATE_REGULAR) { if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { log_printf (instance->totemudpu_log_level_notice, "The network interface [%s] is now up.", totemip_print (&instance->totem_interface->boundto)); instance->netif_state_report = NETIF_STATE_REPORT_DOWN; instance->totemudpu_iface_change_fn (instance->context, &instance->my_id, 0); } /* * Add a timer to check for interface going down in single membership */ if (instance->my_memb_entries == 1) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } } else { if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { log_printf (instance->totemudpu_log_level_notice, "The network interface is down."); instance->totemudpu_iface_change_fn (instance->context, &instance->my_id, 0); } instance->netif_state_report = NETIF_STATE_REPORT_UP; } } /* Set the socket priority to INTERACTIVE to ensure that our messages don't get queued behind anything else */ static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock) { #ifdef SO_PRIORITY int prio = 6; /* TC_PRIO_INTERACTIVE */ if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set traffic priority"); } #endif } static int totemudpu_build_sockets_ip ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to, int interface_num) { struct sockaddr_storage sockaddr; int addrlen; int res; unsigned int recvbuf_size; unsigned int optlen = sizeof (recvbuf_size); /* * Setup unicast socket */ instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0); if (instance->token_socket == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (instance->token_socket); res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface */ totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "bind token socket failed"); return (-1); } /* * the token_socket can receive many messages. Allow a large number * of receive messages on this socket */ recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, "Could not set recvbuf size"); } return 0; } int totemudpu_ifaces_get ( void *net_context, char ***status, unsigned int *iface_count) { static char *statuses[INTERFACE_MAX] = {(char*)"OK"}; if (status) { *status = statuses; } *iface_count = 1; return (0); } static int totemudpu_build_sockets ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to) { int interface_num; int interface_up; int res; /* * Determine the ip address bound to and the interface name */ res = netif_determine (instance, bindnet_address, bound_to, &interface_up, &interface_num); if (res == -1) { return (-1); } totemip_copy(&instance->my_id, bound_to); res = totemudpu_build_sockets_ip (instance, bindnet_address, bound_to, interface_num); /* We only send out of the token socket */ totemudpu_traffic_control_set(instance, instance->token_socket); /* * Rebind all members to new ips */ totemudpu_member_list_rebind_ip(instance); return res; } /* * Totem Network interface * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemudpu_initialize ( qb_loop_t *poll_handle, void **udpu_context, struct totem_config *totem_config, totemsrp_stats_t *stats, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address, unsigned int ring_no), void (*mtu_changed) ( void *context, int net_mtu), void (*target_set_completed) ( void *context)) { struct totemudpu_instance *instance; instance = malloc (sizeof (struct totemudpu_instance)); if (instance == NULL) { return (-1); } totemudpu_instance_initialize (instance); instance->totem_config = totem_config; instance->stats = stats; /* * Configure logging */ instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize local variables for totemudpu */ instance->totem_interface = &totem_config->interfaces[0]; memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); instance->totemudpu_poll_handle = poll_handle; instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; instance->context = context; instance->totemudpu_deliver_fn = deliver_fn; instance->totemudpu_iface_change_fn = iface_change_fn; instance->totemudpu_target_set_completed = target_set_completed; totemip_localhost (AF_INET, &localhost); localhost.nodeid = instance->totem_config->node_id; /* * RRP layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); totemudpu_start_merge_detect_timeout(instance); *udpu_context = instance; return (0); } void *totemudpu_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemudpu_buffer_release (void *ptr) { return free (ptr); } int totemudpu_processor_count_set ( void *udpu_context, int processor_count) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; instance->my_memb_entries = processor_count; qb_loop_timer_del (instance->totemudpu_poll_handle, instance->timer_netif_check_timeout); if (processor_count == 1) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } return (res); } int totemudpu_recv_flush (void *udpu_context) { int res = 0; return (res); } int totemudpu_send_flush (void *udpu_context) { int res = 0; return (res); } int totemudpu_token_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemudpu_mcast_flush_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; mcast_sendmsg (instance, msg, msg_len, 0); return (res); } int totemudpu_mcast_noflush_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; mcast_sendmsg (instance, msg, msg_len, 1); return (res); } extern int totemudpu_iface_check (void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; timer_function_netif_check_timeout (instance); return (res); } extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config) { assert(totem_config->interface_count > 0); totem_config->net_mtu -= totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family); } int totemudpu_token_target_set ( void *udpu_context, const struct totem_ip_address *token_target) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemudpu_target_set_completed (instance->context); return (res); } extern int totemudpu_recv_mcast_empty ( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; unsigned int res; struct sockaddr_storage system_from; struct msghdr msg_recv; struct pollfd ufd; int nfds; int msg_processed = 0; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = &instance->totemudpu_iov_recv; msg_recv.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv.msg_accrightslen = 0; #endif do { ufd.fd = instance->token_socket; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); return (msg_processed); } static int totemudpu_create_sending_socket( void *udpu_context, const struct totem_ip_address *member) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int fd; int res; unsigned int sendbuf_size; unsigned int optlen = sizeof (sendbuf_size); struct sockaddr_storage sockaddr; int addrlen; fd = socket (member->family, SOCK_DGRAM, 0); if (fd == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not create socket for new member"); return (-1); } totemip_nosigpipe (fd); res = fcntl (fd, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set non-blocking operation on token socket"); goto error_close_fd; } /* * These sockets are used to send multicast messages, so their buffers * should be large */ sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; res = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, "Could not set sendbuf size"); /* * Fail in setting sendbuf size is not fatal -> don't exit */ } /* * Bind to sending interface */ totemip_totemip_to_sockaddr_convert(&instance->my_id, 0, &sockaddr, &addrlen); res = bind (fd, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "bind token socket failed"); goto error_close_fd; } return (fd); error_close_fd: close(fd); return (-1); } int totemudpu_member_add ( void *udpu_context, const struct totem_ip_address *local, const struct totem_ip_address *member, int ring_no) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; struct totemudpu_member *new_member; new_member = malloc (sizeof (struct totemudpu_member)); if (new_member == NULL) { return (-1); } memset(new_member, 0, sizeof(*new_member)); log_printf (LOGSYS_LEVEL_NOTICE, "adding new UDPU member {%s}", totemip_print(member)); - list_init (&new_member->list); - list_add_tail (&new_member->list, &instance->member_list); + qb_list_init (&new_member->list); + qb_list_add_tail (&new_member->list, &instance->member_list); memcpy (&new_member->member, member, sizeof (struct totem_ip_address)); new_member->fd = totemudpu_create_sending_socket(udpu_context, member); new_member->active = 1; return (0); } int totemudpu_member_remove ( void *udpu_context, const struct totem_ip_address *token_target, int ring_no) { int found = 0; - struct list_head *list; + struct qb_list_head *list; struct totemudpu_member *member; struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; /* * Find the member to remove and close its socket */ - for (list = instance->member_list.next; - list != &instance->member_list; - list = list->next) { - - member = list_entry (list, + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, struct totemudpu_member, list); if (totemip_compare (token_target, &member->member)==0) { log_printf(LOGSYS_LEVEL_NOTICE, "removing UDPU member {%s}", totemip_print(&member->member)); if (member->fd > 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Closing socket to: {%s}", totemip_print(&member->member)); qb_loop_poll_del (instance->totemudpu_poll_handle, member->fd); close (member->fd); } found = 1; break; } } /* * Delete the member from the list */ if (found) { - list_del (list); + qb_list_del (list); } instance = NULL; return (0); } int totemudpu_member_list_rebind_ip ( void *udpu_context) { - struct list_head *list; + struct qb_list_head *list; struct totemudpu_member *member; struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; - for (list = instance->member_list.next; - list != &instance->member_list; - list = list->next) { - - member = list_entry (list, + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, struct totemudpu_member, list); if (member->fd > 0) { close (member->fd); } member->fd = totemudpu_create_sending_socket(udpu_context, &member->member); } return (0); } static void timer_function_merge_detect_timeout ( void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; if (instance->merge_detect_messages_sent_before_timeout == 0) { instance->send_merge_detect_message = 1; } instance->merge_detect_messages_sent_before_timeout = 0; totemudpu_start_merge_detect_timeout(instance); } static void totemudpu_start_merge_detect_timeout( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; qb_loop_timer_add(instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->merge_timeout * 2 * QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_merge_detect_timeout, &instance->timer_merge_detect_timeout); } static void totemudpu_stop_merge_detect_timeout( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; qb_loop_timer_del(instance->totemudpu_poll_handle, instance->timer_merge_detect_timeout); } diff --git a/exec/util.c b/exec/util.c index c7d561e7..4872376a 100644 --- a/exec/util.c +++ b/exec/util.c @@ -1,193 +1,192 @@ /* * Copyright (c) 2002-2004 MontaVista Software, Inc. * Copyright (c) 2004 Open Source Development Lab * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com), Mark Haverkamp (markh@osdl.org) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include -#include #include #include "util.h" LOGSYS_DECLARE_SUBSYS ("MAIN"); struct service_names { const char *c_name; int32_t c_val; }; static struct service_names servicenames[] = { { "CFG", CFG_SERVICE }, { "CPG", CPG_SERVICE }, { "QUORUM", QUORUM_SERVICE }, { "PLOAD", PLOAD_SERVICE }, { "VOTEQUORUM", VOTEQUORUM_SERVICE }, { "MON", MON_SERVICE }, { "WD", WD_SERVICE }, { "CMAP", CMAP_SERVICE }, { NULL, -1 } }; const char * short_service_name_get(uint32_t service_id, char *buf, size_t buf_size) { uint32_t i; for (i = 0; servicenames[i].c_name != NULL; i++) { if (service_id == servicenames[i].c_val) { return (servicenames[i].c_name); } } snprintf(buf, buf_size, "%d", service_id); return buf; } /* * Compare two names. returns non-zero on match. */ int name_match(cs_name_t *name1, cs_name_t *name2) { if (name1->length == name2->length) { return ((strncmp ((char *)name1->value, (char *)name2->value, name1->length)) == 0); } return 0; } /* * Get the time of day and convert to nanoseconds */ cs_time_t clust_time_now(void) { struct timeval tv; cs_time_t time_now; if (gettimeofday(&tv, 0)) { return 0ULL; } time_now = (cs_time_t)(tv.tv_sec) * 1000000000ULL; time_now += (cs_time_t)(tv.tv_usec) * 1000ULL; return time_now; } void _corosync_out_of_memory_error (void) __attribute__((noreturn)); void _corosync_out_of_memory_error (void) { assert (0==1); exit (EXIT_FAILURE); } void _corosync_exit_error ( enum e_corosync_done err, const char *file, unsigned int line) __attribute__((noreturn)); void _corosync_exit_error ( enum e_corosync_done err, const char *file, unsigned int line) { if (err == COROSYNC_DONE_EXIT) { log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine exiting normally"); } else { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Cluster Engine exiting " "with status %d at %s:%u.", err, file, line); } logsys_system_fini (); exit (err); } #define min(a,b) ((a) < (b) ? (a) : (b)) char *getcs_name_t (cs_name_t *name) { static char ret_name[CS_MAX_NAME_LENGTH]; /* if string is corrupt (non-terminated), ensure it's displayed safely */ if (name->length >= CS_MAX_NAME_LENGTH || name->value[name->length] != '\0') { memset (ret_name, 0, sizeof (ret_name)); memcpy (ret_name, name->value, min(name->length, CS_MAX_NAME_LENGTH -1)); return (ret_name); } return ((char *)name->value); } void setcs_name_t (cs_name_t *name, char *str) { strncpy ((char *)name->value, str, sizeof (name->value)); ((char *)name->value)[sizeof (name->value) - 1] = '\0'; if (strlen ((char *)name->value) > CS_MAX_NAME_LENGTH) { name->length = CS_MAX_NAME_LENGTH; } else { name->length = strlen (str); } } int cs_name_tisEqual (cs_name_t *str1, char *str2) { if (str1->length == strlen (str2)) { return ((strncmp ((char *)str1->value, (char *)str2, str1->length)) == 0); } else { return 0; } } const char *get_run_dir(void) { static char path[PATH_MAX] = {'\0'}; char *env_run_dir; int res; if (path[0] == '\0') { env_run_dir = getenv("COROSYNC_RUN_DIR"); if (env_run_dir != NULL && env_run_dir[0] != '\0') { res = snprintf(path, PATH_MAX, "%s", getenv("COROSYNC_RUN_DIR")); } else { res = snprintf(path, PATH_MAX, "%s/%s", LOCALSTATEDIR, "lib/corosync"); } assert(res < PATH_MAX); } return (path); } diff --git a/exec/votequorum.c b/exec/votequorum.c index f61aa81e..d05f7b5d 100644 --- a/exec/votequorum.c +++ b/exec/votequorum.c @@ -1,3031 +1,3027 @@ /* * Copyright (c) 2009-2015 Red Hat, Inc. * * All rights reserved. * * Authors: Christine Caulfield (ccaulfie@redhat.com) * Fabio M. Di Nitto (fdinitto@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include +#include #include #include "quorum.h" #include -#include #include #include #include #include #include #include "service.h" #include "util.h" LOGSYS_DECLARE_SUBSYS ("VOTEQ"); /* * interface with corosync */ static struct corosync_api_v1 *corosync_api; /* * votequorum global config vars */ static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; static struct cluster_node *qdevice = NULL; static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT; static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT; static uint8_t qdevice_can_operate = 1; static void *qdevice_reg_conn = NULL; static uint8_t qdevice_master_wins = 0; static uint8_t two_node = 0; static uint8_t wait_for_all = 0; static uint8_t wait_for_all_status = 0; static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE; static int lowest_node_id = -1; static int highest_node_id = -1; #define DEFAULT_LMS_WIN 10000 static uint8_t last_man_standing = 0; static uint32_t last_man_standing_window = DEFAULT_LMS_WIN; static uint8_t allow_downscale = 0; static uint32_t ev_barrier = 0; static uint8_t ev_tracking = 0; static uint32_t ev_tracking_barrier = 0; static int ev_tracking_fd = -1; /* * votequorum_exec defines/structs/forward definitions */ struct req_exec_quorum_nodeinfo { struct qb_ipc_request_header header __attribute__((aligned(8))); uint32_t nodeid; uint32_t votes; uint32_t expected_votes; uint32_t flags; } __attribute__((packed)); struct req_exec_quorum_reconfigure { struct qb_ipc_request_header header __attribute__((aligned(8))); uint32_t nodeid; uint32_t value; uint8_t param; uint8_t _pad0; uint8_t _pad1; uint8_t _pad2; } __attribute__((packed)); struct req_exec_quorum_qdevice_reg { struct qb_ipc_request_header header __attribute__((aligned(8))); uint32_t operation; char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; } __attribute__((packed)); struct req_exec_quorum_qdevice_reconfigure { struct qb_ipc_request_header header __attribute__((aligned(8))); char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; } __attribute__((packed)); /* * votequorum_exec onwire version (via totem) */ #include "votequorum.h" /* * votequorum_exec onwire messages (via totem) */ #define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0 #define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3 static void votequorum_exec_send_expectedvotes_notification(void); static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context); static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context); #define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1 #define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2 #define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value); /* * used by req_exec_quorum_qdevice_reg */ #define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0 #define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1 /* * votequorum internal node status/view */ #define NODE_FLAGS_QUORATE 1 #define NODE_FLAGS_LEAVING 2 #define NODE_FLAGS_WFASTATUS 4 #define NODE_FLAGS_FIRST 8 #define NODE_FLAGS_QDEVICE_REGISTERED 16 #define NODE_FLAGS_QDEVICE_ALIVE 32 #define NODE_FLAGS_QDEVICE_CAST_VOTE 64 #define NODE_FLAGS_QDEVICE_MASTER_WINS 128 typedef enum { NODESTATE_MEMBER=1, NODESTATE_DEAD, NODESTATE_LEAVING } nodestate_t; struct cluster_node { int node_id; nodestate_t state; uint32_t votes; uint32_t expected_votes; uint32_t flags; - struct list_head list; + struct qb_list_head list; }; /* * votequorum internal quorum status */ static uint8_t quorum; static uint8_t cluster_is_quorate; /* * votequorum membership data */ static struct cluster_node *us; -static struct list_head cluster_members_list; +static struct qb_list_head cluster_members_list; static unsigned int quorum_members[PROCESSOR_COUNT_MAX]; static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX]; static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX]; static int quorum_members_entries = 0; static int previous_quorum_members_entries = 0; static int atb_nodelist_entries = 0; static struct memb_ring_id quorum_ringid; /* * pre allocate all cluster_nodes + one for qdevice */ static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2]; static int cluster_nodes_entries = 0; /* * votequorum tracking */ struct quorum_pd { unsigned char track_flags; int tracking_enabled; uint64_t tracking_context; - struct list_head list; + struct qb_list_head list; void *conn; }; -static struct list_head trackers_list; +static struct qb_list_head trackers_list; /* * votequorum timers */ static corosync_timer_handle_t qdevice_timer; static int qdevice_timer_set = 0; static corosync_timer_handle_t last_man_standing_timer; static int last_man_standing_timer_set = 0; static int sync_nodeinfo_sent = 0; static int sync_wait_for_poll_or_timeout = 0; /* * Service Interfaces required by service_message_handler struct */ static int sync_in_progress = 0; static void votequorum_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id); static int votequorum_sync_process (void); static void votequorum_sync_activate (void); static void votequorum_sync_abort (void); static quorum_set_quorate_fn_t quorum_callback; /* * votequorum_exec handler and definitions */ static char *votequorum_exec_init_fn (struct corosync_api_v1 *api); static int votequorum_exec_exit_fn (void); static int votequorum_exec_send_nodeinfo(uint32_t nodeid); static void message_handler_req_exec_votequorum_nodeinfo ( const void *message, unsigned int nodeid); static void exec_votequorum_nodeinfo_endian_convert (void *message); static void message_handler_req_exec_votequorum_reconfigure ( const void *message, unsigned int nodeid); static void exec_votequorum_reconfigure_endian_convert (void *message); static void message_handler_req_exec_votequorum_qdevice_reg ( const void *message, unsigned int nodeid); static void exec_votequorum_qdevice_reg_endian_convert (void *message); static void message_handler_req_exec_votequorum_qdevice_reconfigure ( const void *message, unsigned int nodeid); static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message); static struct corosync_exec_handler votequorum_exec_engine[] = { { /* 0 */ .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo, .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert }, { /* 1 */ .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure, .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert }, { /* 2 */ .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg, .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert }, { /* 3 */ .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure, .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert }, }; /* * Library Handler and Functions Definitions */ static int quorum_lib_init_fn (void *conn); static int quorum_lib_exit_fn (void *conn); static void qdevice_timer_fn(void *arg); static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message); static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message); static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message); static void message_handler_req_lib_votequorum_trackstart (void *conn, const void *message); static void message_handler_req_lib_votequorum_trackstop (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_register (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_update (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_poll (void *conn, const void *message); static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn, const void *message); static struct corosync_lib_handler quorum_lib_service[] = { { /* 0 */ .lib_handler_fn = message_handler_req_lib_votequorum_getinfo, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 1 */ .lib_handler_fn = message_handler_req_lib_votequorum_setexpected, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 2 */ .lib_handler_fn = message_handler_req_lib_votequorum_setvotes, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 3 */ .lib_handler_fn = message_handler_req_lib_votequorum_trackstart, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 4 */ .lib_handler_fn = message_handler_req_lib_votequorum_trackstop, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 5 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 8 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 9 */ .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED } }; static struct corosync_service_engine votequorum_service_engine = { .name = "corosync vote quorum service v1.0", .id = VOTEQUORUM_SERVICE, .priority = 2, .private_data_size = sizeof (struct quorum_pd), .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED, .lib_init_fn = quorum_lib_init_fn, .lib_exit_fn = quorum_lib_exit_fn, .lib_engine = quorum_lib_service, .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler), .exec_init_fn = votequorum_exec_init_fn, .exec_exit_fn = votequorum_exec_exit_fn, .exec_engine = votequorum_exec_engine, .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler), .sync_init = votequorum_sync_init, .sync_process = votequorum_sync_process, .sync_activate = votequorum_sync_activate, .sync_abort = votequorum_sync_abort }; struct corosync_service_engine *votequorum_get_service_engine_ver0 (void) { return (&votequorum_service_engine); } static struct default_service votequorum_service[] = { { .name = "corosync_votequorum", .ver = 0, .loader = votequorum_get_service_engine_ver0 }, }; /* * common/utility macros/functions */ #define max(a,b) (((a) > (b)) ? (a) : (b)) -#define list_iterate(v, head) \ - for (v = (head)->next; v != head; v = v->next) - static void node_add_ordered(struct cluster_node *newnode) { struct cluster_node *node = NULL; - struct list_head *tmp; - struct list_head *newlist = &newnode->list; + struct qb_list_head *tmp; + struct qb_list_head *newlist = &newnode->list; ENTER(); - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); if (newnode->node_id < node->node_id) { break; } } if (!node) { - list_add(&newnode->list, &cluster_members_list); + qb_list_add(&newnode->list, &cluster_members_list); } else { newlist->prev = tmp->prev; newlist->next = tmp; tmp->prev->next = newlist; tmp->prev = newlist; } LEAVE(); } static struct cluster_node *allocate_node(unsigned int nodeid) { struct cluster_node *cl = NULL; - struct list_head *tmp; + struct qb_list_head *tmp; ENTER(); if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) { cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries]; cluster_nodes_entries++; } else { - list_iterate(tmp, &cluster_members_list) { - cl = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + cl = qb_list_entry(tmp, struct cluster_node, list); if (cl->state == NODESTATE_DEAD) { break; } } /* * this should never happen */ if (!cl) { log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node %u data!!", nodeid); goto out; } - list_del(tmp); + qb_list_del(tmp); } memset(cl, 0, sizeof(struct cluster_node)); cl->node_id = nodeid; if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { node_add_ordered(cl); } out: LEAVE(); return cl; } static struct cluster_node *find_node_by_nodeid(unsigned int nodeid) { struct cluster_node *node; - struct list_head *tmp; + struct qb_list_head *tmp; ENTER(); if (nodeid == us->node_id) { LEAVE(); return us; } if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { LEAVE(); return qdevice; } - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); if (node->node_id == nodeid) { LEAVE(); return node; } } LEAVE(); return NULL; } static void get_lowest_node_id(void) { struct cluster_node *node = NULL; - struct list_head *tmp; + struct qb_list_head *tmp; ENTER(); lowest_node_id = us->node_id; - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->node_id < lowest_node_id)) { lowest_node_id = node->node_id; } } log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: %d us: %d", lowest_node_id, us->node_id); icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id); LEAVE(); } static void get_highest_node_id(void) { struct cluster_node *node = NULL; - struct list_head *tmp; + struct qb_list_head *tmp; ENTER(); highest_node_id = us->node_id; - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->node_id > highest_node_id)) { highest_node_id = node->node_id; } } log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: %d us: %d", highest_node_id, us->node_id); icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id); LEAVE(); } static int check_low_node_id_partition(void) { struct cluster_node *node = NULL; - struct list_head *tmp; + struct qb_list_head *tmp; int found = 0; ENTER(); - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->node_id == lowest_node_id)) { found = 1; } } LEAVE(); return found; } static int check_high_node_id_partition(void) { struct cluster_node *node = NULL; - struct list_head *tmp; + struct qb_list_head *tmp; int found = 0; ENTER(); - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->node_id == highest_node_id)) { found = 1; } } LEAVE(); return found; } static int is_in_nodelist(int nodeid, unsigned int *members, int entries) { int i; ENTER(); for (i=0; istate == NODESTATE_MEMBER) && (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) && (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE)) { found = 1; } } LEAVE(); return found; } static void decode_flags(uint32_t flags) { ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s", (flags & NODE_FLAGS_QUORATE)?"Yes":"No", (flags & NODE_FLAGS_LEAVING)?"Yes":"No", (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No", (flags & NODE_FLAGS_FIRST)?"Yes":"No", (flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No", (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No", (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No", (flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No"); LEAVE(); } /* * load/save are copied almost pristine from totemsrp,c */ static int load_ev_tracking_barrier(void) { int res = 0; char filename[PATH_MAX]; ENTER(); snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_run_dir()); ev_tracking_fd = open(filename, O_RDWR, 0700); if (ev_tracking_fd != -1) { res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t)); close(ev_tracking_fd); if (res == sizeof (uint32_t)) { LEAVE(); return 0; } } ev_tracking_barrier = 0; umask(0); ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700); if (ev_tracking_fd != -1) { res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t)); if ((res == -1) || (res != sizeof (uint32_t))) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to write to %s", filename); } close(ev_tracking_fd); LEAVE(); return 0; } log_printf(LOGSYS_LEVEL_WARNING, "Unable to create %s file", filename); LEAVE(); return -1; } static void update_wait_for_all_status(uint8_t wfa_status) { ENTER(); wait_for_all_status = wfa_status; if (wait_for_all_status) { us->flags |= NODE_FLAGS_WFASTATUS; } else { us->flags &= ~NODE_FLAGS_WFASTATUS; } icmap_set_uint8("runtime.votequorum.wait_for_all_status", wait_for_all_status); LEAVE(); } static void update_two_node(void) { ENTER(); icmap_set_uint8("runtime.votequorum.two_node", two_node); LEAVE(); } static void update_ev_barrier(uint32_t expected_votes) { ENTER(); ev_barrier = expected_votes; icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier); LEAVE(); } static void update_qdevice_can_operate(uint8_t status) { ENTER(); qdevice_can_operate = status; icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate); LEAVE(); } static void update_qdevice_master_wins(uint8_t allow) { ENTER(); qdevice_master_wins = allow; icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins); LEAVE(); } static void update_ev_tracking_barrier(uint32_t ev_t_barrier) { int res; ENTER(); ev_tracking_barrier = ev_t_barrier; icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier); if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to update ev_tracking_barrier on disk data!!!"); LEAVE(); return; } res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t)); if (res != sizeof (uint32_t)) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to update ev_tracking_barrier on disk data!!!"); } #ifdef HAVE_FDATASYNC fdatasync(ev_tracking_fd); #else fsync(ev_tracking_fd); #endif LEAVE(); } /* * quorum calculation core bits */ static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes) { - struct list_head *nodelist; + struct qb_list_head *nodelist; struct cluster_node *node; unsigned int total_votes = 0; unsigned int highest_expected = 0; unsigned int newquorum, q1, q2; unsigned int total_nodes = 0; ENTER(); if ((allow_downscale) && (allow_decrease) && (max_expected)) { max_expected = max(ev_barrier, max_expected); } - list_iterate(nodelist, &cluster_members_list) { - node = list_entry(nodelist, struct cluster_node, list); + qb_list_for_each(nodelist, &cluster_members_list) { + node = qb_list_entry(nodelist, struct cluster_node, list); log_printf(LOGSYS_LEVEL_DEBUG, "node %u state=%d, votes=%u, expected=%u", node->node_id, node->state, node->votes, node->expected_votes); if (node->state == NODESTATE_MEMBER) { highest_expected = max(highest_expected, node->expected_votes); total_votes += node->votes; total_nodes++; } } if (us->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes); total_votes += qdevice->votes; total_nodes++; } if (max_expected > 0) { highest_expected = max_expected; } /* * This quorum calculation is taken from the OpenVMS Cluster Systems * manual, but, then, you guessed that didn't you */ q1 = (highest_expected + 2) / 2; q2 = (total_votes + 2) / 2; newquorum = max(q1, q2); /* * Normally quorum never decreases but the system administrator can * force it down by setting expected votes to a maximum value */ if (!allow_decrease) { newquorum = max(quorum, newquorum); } /* * The special two_node mode allows each of the two nodes to retain * quorum if the other fails. Only one of the two should live past * fencing (as both nodes try to fence each other in split-brain.) * Also: if there are more than two nodes, force us inquorate to avoid * any damage or confusion. */ if (two_node && total_nodes <= 2) { newquorum = 1; } if (ret_total_votes) { *ret_total_votes = total_votes; } LEAVE(); return newquorum; } static void update_node_expected_votes(int new_expected_votes) { - struct list_head *nodelist; + struct qb_list_head *nodelist; struct cluster_node *node; if (new_expected_votes) { - list_iterate(nodelist, &cluster_members_list) { - node = list_entry(nodelist, struct cluster_node, list); + qb_list_for_each(nodelist, &cluster_members_list) { + node = qb_list_entry(nodelist, struct cluster_node, list); if (node->state == NODESTATE_MEMBER) { node->expected_votes = new_expected_votes; } } } } static void are_we_quorate(unsigned int total_votes) { int quorate; int quorum_change = 0; ENTER(); /* * wait for all nodes to show up before granting quorum */ if ((wait_for_all) && (wait_for_all_status)) { if (total_votes != us->expected_votes) { log_printf(LOGSYS_LEVEL_NOTICE, "Waiting for all cluster members. " "Current votes: %d expected_votes: %d", total_votes, us->expected_votes); cluster_is_quorate = 0; return; } update_wait_for_all_status(0); } if (quorum > total_votes) { quorate = 0; } else { quorate = 1; get_lowest_node_id(); get_highest_node_id(); } if ((auto_tie_breaker != ATB_NONE) && /* Must be a half (or half-1) split */ (total_votes == (us->expected_votes / 2)) && /* If the 'other' partition in a split might have quorum then we can't run ATB */ (previous_quorum_members_entries - quorum_members_entries < quorum) && (check_auto_tie_breaker() == 1)) { quorate = 1; } if ((qdevice_master_wins) && (!quorate) && (check_qdevice_master() == 1)) { log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition"); quorate = 1; } if (cluster_is_quorate && !quorate) { quorum_change = 1; log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity"); } if (!cluster_is_quorate && quorate) { quorum_change = 1; log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity"); } cluster_is_quorate = quorate; if (cluster_is_quorate) { us->flags |= NODE_FLAGS_QUORATE; } else { us->flags &= ~NODE_FLAGS_QUORATE; } if (wait_for_all) { if (quorate) { update_wait_for_all_status(0); } else { update_wait_for_all_status(1); } } if ((quorum_change) && (sync_in_progress == 0)) { quorum_callback(quorum_members, quorum_members_entries, cluster_is_quorate, &quorum_ringid); votequorum_exec_send_quorum_notification(NULL, 0L); } LEAVE(); } static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members) { unsigned int total_votes = 0; unsigned int cluster_members = 0; - struct list_head *nodelist; + struct qb_list_head *nodelist; struct cluster_node *node; ENTER(); - list_iterate(nodelist, &cluster_members_list) { - node = list_entry(nodelist, struct cluster_node, list); + qb_list_for_each(nodelist, &cluster_members_list) { + node = qb_list_entry(nodelist, struct cluster_node, list); if (node->state == NODESTATE_MEMBER) { cluster_members++; total_votes += node->votes; } } if (qdevice->votes) { total_votes += qdevice->votes; cluster_members++; } *totalvotes = total_votes; *current_members = cluster_members; LEAVE(); } /* * Recalculate cluster quorum, set quorate and notify changes */ static void recalculate_quorum(int allow_decrease, int by_current_nodes) { unsigned int total_votes = 0; unsigned int cluster_members = 0; ENTER(); get_total_votes(&total_votes, &cluster_members); if (!by_current_nodes) { cluster_members = 0; } /* * Keep expected_votes at the highest number of votes in the cluster */ log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes); if (total_votes > us->expected_votes) { us->expected_votes = total_votes; votequorum_exec_send_expectedvotes_notification(); } if ((ev_tracking) && (us->expected_votes > ev_tracking_barrier)) { update_ev_tracking_barrier(us->expected_votes); } quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes); update_node_expected_votes(cluster_members); are_we_quorate(total_votes); LEAVE(); } /* * configuration bits and pieces */ static int votequorum_read_nodelist_configuration(uint32_t *votes, uint32_t *nodes, uint32_t *expected_votes) { icmap_iter_t iter; const char *iter_key; char tmp_key[ICMAP_KEYNAME_MAXLEN]; uint32_t our_pos, node_pos; uint32_t nodecount = 0; uint32_t nodelist_expected_votes = 0; uint32_t node_votes = 0; int res = 0; ENTER(); if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) { log_printf(LOGSYS_LEVEL_DEBUG, "No nodelist defined or our node is not in the nodelist"); return 0; } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } nodecount++; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos); if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) { node_votes = 1; } nodelist_expected_votes = nodelist_expected_votes + node_votes; if (node_pos == our_pos) { *votes = node_votes; } } *expected_votes = nodelist_expected_votes; *nodes = nodecount; icmap_iter_finalize(iter); LEAVE(); return 1; } static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes) { char *qdevice_model = NULL; int ret = 0; ENTER(); if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) { if (strlen(qdevice_model)) { if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) { *qdevice_votes = -1; } if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) { qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT; } if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) { qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT; } update_qdevice_can_operate(1); ret = 1; } free(qdevice_model); } LEAVE(); return ret; } #define VOTEQUORUM_READCONFIG_STARTUP 0 #define VOTEQUORUM_READCONFIG_RUNTIME 1 static char *votequorum_readconfig(int runtime) { uint32_t node_votes = 0, qdevice_votes = 0; uint32_t node_expected_votes = 0, expected_votes = 0; uint32_t node_count = 0; uint8_t atb = 0; int have_nodelist, have_qdevice; char *atb_string = NULL; char *error = NULL; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime); /* * Set the few things we re-read at runtime back to their defaults */ if (runtime) { two_node = 0; expected_votes = 0; } /* * gather basic data here */ icmap_get_uint32("quorum.expected_votes", &expected_votes); have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes); have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes); icmap_get_uint8("quorum.two_node", &two_node); /* * do config verification and enablement */ if ((!have_nodelist) && (!expected_votes)) { if (!runtime) { error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!"; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!"); log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data"); } goto out; } /* * two_node and qdevice are not compatible in the same config. * try to make an educated guess of what to do */ if ((two_node) && (have_qdevice)) { if (!runtime) { error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!"); if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node"); two_node = 0; } else { log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node"); update_qdevice_can_operate(0); } } } /* * Enable special features */ if (!runtime) { if (two_node) { wait_for_all = 1; } icmap_get_uint8("quorum.allow_downscale", &allow_downscale); icmap_get_uint8("quorum.wait_for_all", &wait_for_all); icmap_get_uint8("quorum.last_man_standing", &last_man_standing); icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window); icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking); icmap_get_uint8("quorum.auto_tie_breaker", &atb); icmap_get_string("quorum.auto_tie_breaker_node", &atb_string); /* auto_tie_breaker defaults to LOWEST */ if (atb) { auto_tie_breaker = ATB_LOWEST; icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); } else { auto_tie_breaker = ATB_NONE; if (atb_string) { log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0"); } } if (atb && atb_string) { parse_atb_string(atb_string); } free(atb_string); /* allow_downscale requires ev_tracking */ if (allow_downscale) { ev_tracking = 1; } if (ev_tracking) { if (load_ev_tracking_barrier() < 0) { LEAVE(); return ((char *)"Unable to load ev_tracking file!"); } update_ev_tracking_barrier(ev_tracking_barrier); } } /* two_node and auto_tie_breaker are not compatible as two_node uses * a fence race to decide quorum whereas ATB decides based on node id */ if (two_node && auto_tie_breaker != ATB_NONE) { log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible."); log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf"); two_node = 0; } /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs * to be set so that an isolated half+1 without the tie breaker node * does not have quorum on reboot. */ if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) && (!wait_for_all)) { if (last_man_standing) { /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what * they might want so we'll just quit. */ log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n"); log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n"); log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n"); log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n"); log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n"); error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n"); log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n"); log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n"); auto_tie_breaker = ATB_NONE; icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); } } /* * quorum device is not compatible with last_man_standing and auto_tie_breaker * neither lms or atb can be set at runtime, so there is no need to check for * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have * been enabled at startup. */ if ((have_qdevice) && (last_man_standing)) { if (!runtime) { error = (char *)"configuration error: quorum.device is not compatible with last_man_standing"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) { if (!runtime) { error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } if ((have_qdevice) && (allow_downscale)) { if (!runtime) { error = (char *)"configuration error: quorum.device is not compatible with allow_downscale"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } /* * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes * we don't know what the quorum device should vote. */ if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) { if (!runtime) { error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } /* * if user specifies a node list with uneven votes and no device.votes * we cannot autocalculate the votes */ if ((have_qdevice) && (qdevice_votes == -1) && (have_nodelist) && (node_count != node_expected_votes)) { if (!runtime) { error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } /* * validate quorum device votes vs expected_votes */ if ((qdevice_votes > 0) && (expected_votes)) { int delta = expected_votes - qdevice_votes; if (delta < 2) { if (!runtime) { error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low"; goto out; } else { log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low"); log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); update_qdevice_can_operate(0); } } } /* * automatically calculate device votes and adjust expected_votes from nodelist */ if ((have_qdevice) && (qdevice_votes == -1) && (!expected_votes) && (have_nodelist) && (node_count == node_expected_votes)) { qdevice_votes = node_expected_votes - 1; node_expected_votes = node_expected_votes + qdevice_votes; } /* * set this node votes and expected_votes */ log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes); if (ev_tracking) { expected_votes = ev_tracking_barrier; } if (have_nodelist) { us->votes = node_votes; us->expected_votes = node_expected_votes; } else { us->votes = 1; icmap_get_uint32("quorum.votes", &us->votes); } if (expected_votes) { us->expected_votes = expected_votes; } /* * set qdevice votes */ if (!have_qdevice) { qdevice->votes = 0; } if (qdevice_votes != -1) { qdevice->votes = qdevice_votes; } update_ev_barrier(us->expected_votes); update_two_node(); if (wait_for_all) { update_wait_for_all_status(1); } out: LEAVE(); return error; } static void votequorum_refresh_config( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { int old_votes, old_expected_votes; uint8_t reloading; uint8_t cancel_wfa; ENTER(); /* * If a full reload is in progress then don't do anything until it's done and * can reconfigure it all atomically */ if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) { return ; } icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa); if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 && cancel_wfa >= 1) { icmap_set_uint8("quorum.cancel_wait_for_all", 0); votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA, us->node_id, 0); return; } old_votes = us->votes; old_expected_votes = us->expected_votes; /* * Reload the configuration */ votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME); /* * activate new config */ votequorum_exec_send_nodeinfo(us->node_id); votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); if (us->votes != old_votes) { votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, us->node_id, us->votes); } if (us->expected_votes != old_expected_votes) { votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id, us->expected_votes); } LEAVE(); } static void votequorum_exec_add_config_notification(void) { icmap_track_t icmap_track_nodelist = NULL; icmap_track_t icmap_track_quorum = NULL; icmap_track_t icmap_track_reload = NULL; ENTER(); icmap_track_add("nodelist.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, votequorum_refresh_config, NULL, &icmap_track_nodelist); icmap_track_add("quorum.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, votequorum_refresh_config, NULL, &icmap_track_quorum); icmap_track_add("config.totemconfig_reload_in_progress", ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY, votequorum_refresh_config, NULL, &icmap_track_reload); LEAVE(); } /* * votequorum_exec core */ static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value) { struct req_exec_quorum_reconfigure req_exec_quorum_reconfigure; struct iovec iov[1]; int ret; ENTER(); req_exec_quorum_reconfigure.nodeid = nodeid; req_exec_quorum_reconfigure.value = value; req_exec_quorum_reconfigure.param = param; req_exec_quorum_reconfigure._pad0 = 0; req_exec_quorum_reconfigure._pad1 = 0; req_exec_quorum_reconfigure._pad2 = 0; req_exec_quorum_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE); req_exec_quorum_reconfigure.header.size = sizeof(req_exec_quorum_reconfigure); iov[0].iov_base = (void *)&req_exec_quorum_reconfigure; iov[0].iov_len = sizeof(req_exec_quorum_reconfigure); ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); LEAVE(); return ret; } static int votequorum_exec_send_nodeinfo(uint32_t nodeid) { struct req_exec_quorum_nodeinfo req_exec_quorum_nodeinfo; struct iovec iov[1]; struct cluster_node *node; int ret; ENTER(); node = find_node_by_nodeid(nodeid); if (!node) { return -1; } req_exec_quorum_nodeinfo.nodeid = nodeid; req_exec_quorum_nodeinfo.votes = node->votes; req_exec_quorum_nodeinfo.expected_votes = node->expected_votes; req_exec_quorum_nodeinfo.flags = node->flags; if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { decode_flags(node->flags); } req_exec_quorum_nodeinfo.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO); req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo); iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo; iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo); ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); LEAVE(); return ret; } static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname) { struct req_exec_quorum_qdevice_reconfigure req_exec_quorum_qdevice_reconfigure; struct iovec iov[1]; int ret; ENTER(); req_exec_quorum_qdevice_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE); req_exec_quorum_qdevice_reconfigure.header.size = sizeof(req_exec_quorum_qdevice_reconfigure); strcpy(req_exec_quorum_qdevice_reconfigure.oldname, oldname); strcpy(req_exec_quorum_qdevice_reconfigure.newname, newname); iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure; iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure); ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); LEAVE(); return ret; } static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req) { struct req_exec_quorum_qdevice_reg req_exec_quorum_qdevice_reg; struct iovec iov[1]; int ret; ENTER(); req_exec_quorum_qdevice_reg.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG); req_exec_quorum_qdevice_reg.header.size = sizeof(req_exec_quorum_qdevice_reg); req_exec_quorum_qdevice_reg.operation = operation; strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req); iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg; iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg); ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); LEAVE(); return ret; } static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context) { struct res_lib_votequorum_quorum_notification *res_lib_votequorum_notification; - struct list_head *tmp; + struct qb_list_head *tmp; struct cluster_node *node; int i = 0; int cluster_members = 0; int size; char buf[sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)]; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Sending quorum callback, quorate = %d", cluster_is_quorate); - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); cluster_members++; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { cluster_members++; } size = sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * cluster_members; res_lib_votequorum_notification = (struct res_lib_votequorum_quorum_notification *)&buf; res_lib_votequorum_notification->quorate = cluster_is_quorate; res_lib_votequorum_notification->context = context; res_lib_votequorum_notification->node_list_entries = cluster_members; res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION; res_lib_votequorum_notification->header.size = size; res_lib_votequorum_notification->header.error = CS_OK; /* Send all known nodes and their states */ - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); res_lib_votequorum_notification->node_list[i].nodeid = node->node_id; res_lib_votequorum_notification->node_list[i++].state = node->state; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID; res_lib_votequorum_notification->node_list[i++].state = qdevice->state; } /* Send it to all interested parties */ if (conn) { int ret = corosync_api->ipc_dispatch_send(conn, &buf, size); LEAVE(); return ret; } else { struct quorum_pd *qpd; - list_iterate(tmp, &trackers_list) { - qpd = list_entry(tmp, struct quorum_pd, list); + qb_list_for_each(tmp, &trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); res_lib_votequorum_notification->context = qpd->tracking_context; corosync_api->ipc_dispatch_send(qpd->conn, &buf, size); } } LEAVE(); return 0; } static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context) { struct res_lib_votequorum_nodelist_notification *res_lib_votequorum_notification; int i = 0; int size; - struct list_head *tmp; + struct qb_list_head *tmp; char buf[sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries]; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Sending nodelist callback. ring_id = %d/%lld", quorum_ringid.rep.nodeid, quorum_ringid.seq); size = sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries; res_lib_votequorum_notification = (struct res_lib_votequorum_nodelist_notification *)&buf; res_lib_votequorum_notification->node_list_entries = quorum_members_entries; res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.rep.nodeid; res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq; res_lib_votequorum_notification->context = context; for (i=0; inode_list[i] = quorum_members[i]; } res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION; res_lib_votequorum_notification->header.size = size; res_lib_votequorum_notification->header.error = CS_OK; /* Send it to all interested parties */ if (conn) { int ret = corosync_api->ipc_dispatch_send(conn, &buf, size); LEAVE(); return ret; } else { struct quorum_pd *qpd; - list_iterate(tmp, &trackers_list) { - qpd = list_entry(tmp, struct quorum_pd, list); + qb_list_for_each(tmp, &trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); res_lib_votequorum_notification->context = qpd->tracking_context; corosync_api->ipc_dispatch_send(qpd->conn, &buf, size); } } LEAVE(); return 0; } static void votequorum_exec_send_expectedvotes_notification(void) { struct res_lib_votequorum_expectedvotes_notification res_lib_votequorum_expectedvotes_notification; struct quorum_pd *qpd; - struct list_head *tmp; + struct qb_list_head *tmp; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback"); res_lib_votequorum_expectedvotes_notification.header.id = MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION; res_lib_votequorum_expectedvotes_notification.header.size = sizeof(res_lib_votequorum_expectedvotes_notification); res_lib_votequorum_expectedvotes_notification.header.error = CS_OK; res_lib_votequorum_expectedvotes_notification.expected_votes = us->expected_votes; - list_iterate(tmp, &trackers_list) { - qpd = list_entry(tmp, struct quorum_pd, list); + qb_list_for_each(tmp, &trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); res_lib_votequorum_expectedvotes_notification.context = qpd->tracking_context; corosync_api->ipc_dispatch_send(qpd->conn, &res_lib_votequorum_expectedvotes_notification, sizeof(struct res_lib_votequorum_expectedvotes_notification)); } LEAVE(); } static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message) { ENTER(); LEAVE(); } static void message_handler_req_exec_votequorum_qdevice_reconfigure ( const void *message, unsigned int nodeid) { const struct req_exec_quorum_qdevice_reconfigure *req_exec_quorum_qdevice_reconfigure = message; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node %u [from: %s to: %s]", nodeid, req_exec_quorum_qdevice_reconfigure->oldname, req_exec_quorum_qdevice_reconfigure->newname); if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) { log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename"); memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname); /* * TODO: notify qdevices about name change? * this is not relevant for now and can wait later on since * qdevices are local only and libvotequorum is not final */ } LEAVE(); } static void exec_votequorum_qdevice_reg_endian_convert (void *message) { struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message; ENTER(); req_exec_quorum_qdevice_reg->operation = swab32(req_exec_quorum_qdevice_reg->operation); LEAVE(); } static void message_handler_req_exec_votequorum_qdevice_reg ( const void *message, unsigned int nodeid) { const struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message; struct res_lib_votequorum_status res_lib_votequorum_status; int wipe_qdevice_name = 1; struct cluster_node *node = NULL; - struct list_head *tmp; + struct qb_list_head *tmp; cs_error_t error = CS_OK; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node %u [%s]", req_exec_quorum_qdevice_reg->operation, nodeid, req_exec_quorum_qdevice_reg->qdevice_name); switch(req_exec_quorum_qdevice_reg->operation) { case VOTEQUORUM_QDEVICE_OPERATION_REGISTER: if (nodeid != us->node_id) { if (!strlen(qdevice_name)) { log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded"); strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name); } LEAVE(); return; } /* * protect against the case where we broadcast qdevice registration * to new memebers, we receive the message back, but there is no registration * connection in progress */ if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { LEAVE(); return; } /* * this should NEVER happen */ if (!qdevice_reg_conn) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!"); LEAVE(); return; } /* * registering our own device in this case */ if (!strlen(qdevice_name)) { strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name); } /* * check if it is our device or something else */ if ((!strncmp(req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) { us->flags |= NODE_FLAGS_QDEVICE_REGISTERED; votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); votequorum_exec_send_nodeinfo(us->node_id); } else { log_printf(LOGSYS_LEVEL_WARNING, "A new qdevice with different name (new: %s old: %s) is trying to register!", req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name); error = CS_ERR_EXIST; } res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); qdevice_reg_conn = NULL; break; case VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER: - list_iterate(tmp, &cluster_members_list) { - node = list_entry(tmp, struct cluster_node, list); + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); if ((node->state == NODESTATE_MEMBER) && (node->flags & NODE_FLAGS_QDEVICE_REGISTERED)) { wipe_qdevice_name = 0; } } if (wipe_qdevice_name) { memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); } break; } LEAVE(); } static void exec_votequorum_nodeinfo_endian_convert (void *message) { struct req_exec_quorum_nodeinfo *nodeinfo = message; ENTER(); nodeinfo->nodeid = swab32(nodeinfo->nodeid); nodeinfo->votes = swab32(nodeinfo->votes); nodeinfo->expected_votes = swab32(nodeinfo->expected_votes); nodeinfo->flags = swab32(nodeinfo->flags); LEAVE(); } static void message_handler_req_exec_votequorum_nodeinfo ( const void *message, unsigned int sender_nodeid) { const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message; struct cluster_node *node = NULL; int old_votes; int old_expected; uint32_t old_flags; nodestate_t old_state; int new_node = 0; int allow_downgrade = 0; int by_node = 0; unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", sender_nodeid); log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d flags: %d", nodeid, req_exec_quorum_nodeinfo->votes, req_exec_quorum_nodeinfo->expected_votes, req_exec_quorum_nodeinfo->flags); if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { decode_flags(req_exec_quorum_nodeinfo->flags); } node = find_node_by_nodeid(nodeid); if (!node) { node = allocate_node(nodeid); new_node = 1; } if (!node) { corosync_api->error_memory_failure(); LEAVE(); return; } if (new_node) { old_votes = 0; old_expected = 0; old_state = NODESTATE_DEAD; old_flags = 0; } else { old_votes = node->votes; old_expected = node->expected_votes; old_state = node->state; old_flags = node->flags; } if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid); assert(sender_node != NULL); if ((!cluster_is_quorate) && (sender_node->flags & NODE_FLAGS_QUORATE)) { node->votes = req_exec_quorum_nodeinfo->votes; } else { node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes); } goto recalculate; } /* Update node state */ node->flags = req_exec_quorum_nodeinfo->flags; node->votes = req_exec_quorum_nodeinfo->votes; node->state = NODESTATE_MEMBER; if (node->flags & NODE_FLAGS_LEAVING) { node->state = NODESTATE_LEAVING; allow_downgrade = 1; by_node = 1; } if ((!cluster_is_quorate) && (node->flags & NODE_FLAGS_QUORATE)) { allow_downgrade = 1; us->expected_votes = req_exec_quorum_nodeinfo->expected_votes; } if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) { node->expected_votes = req_exec_quorum_nodeinfo->expected_votes; } else { node->expected_votes = us->expected_votes; } if ((last_man_standing) && (node->votes > 1)) { log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all" "cluster nodes votes are set to 1. Disabling LMS."); last_man_standing = 0; if (last_man_standing_timer_set) { corosync_api->timer_delete(last_man_standing_timer); last_man_standing_timer_set = 0; } } recalculate: if ((new_node) || (nodeid == us->node_id) || (node->flags & NODE_FLAGS_FIRST) || (old_votes != node->votes) || (old_expected != node->expected_votes) || (old_flags != node->flags) || (old_state != node->state)) { recalculate_quorum(allow_downgrade, by_node); } if ((wait_for_all) && (!(node->flags & NODE_FLAGS_WFASTATUS)) && (node->flags & NODE_FLAGS_QUORATE)) { update_wait_for_all_status(0); } LEAVE(); } static void exec_votequorum_reconfigure_endian_convert (void *message) { struct req_exec_quorum_reconfigure *reconfigure = message; ENTER(); reconfigure->nodeid = swab32(reconfigure->nodeid); reconfigure->value = swab32(reconfigure->value); LEAVE(); } static void message_handler_req_exec_votequorum_reconfigure ( const void *message, unsigned int nodeid) { const struct req_exec_quorum_reconfigure *req_exec_quorum_reconfigure = message; struct cluster_node *node; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node %u for %u", nodeid, req_exec_quorum_reconfigure->nodeid); switch(req_exec_quorum_reconfigure->param) { case VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES: update_node_expected_votes(req_exec_quorum_reconfigure->value); - votequorum_exec_send_expectedvotes_notification(); update_ev_barrier(req_exec_quorum_reconfigure->value); if (ev_tracking) { us->expected_votes = max(us->expected_votes, ev_tracking_barrier); } recalculate_quorum(1, 0); /* Allow decrease */ break; case VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES: node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid); if (!node) { LEAVE(); return; } node->votes = req_exec_quorum_reconfigure->value; recalculate_quorum(1, 0); /* Allow decrease */ break; case VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA: update_wait_for_all_status(0); log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node %d.", req_exec_quorum_reconfigure->nodeid); recalculate_quorum(0, 0); break; } LEAVE(); } static int votequorum_exec_exit_fn (void) { int ret = 0; ENTER(); /* * tell the other nodes we are leaving */ if (allow_downscale) { us->flags |= NODE_FLAGS_LEAVING; ret = votequorum_exec_send_nodeinfo(us->node_id); } if ((ev_tracking) && (ev_tracking_fd != -1)) { close(ev_tracking_fd); } LEAVE(); return ret; } static void votequorum_set_icmap_ro_keys(void) { icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE); icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE); } static char *votequorum_exec_init_fn (struct corosync_api_v1 *api) { char *error = NULL; ENTER(); /* * make sure we start clean */ - list_init(&cluster_members_list); - list_init(&trackers_list); + qb_list_init(&cluster_members_list); + qb_list_init(&trackers_list); qdevice = NULL; us = NULL; memset(cluster_nodes, 0, sizeof(cluster_nodes)); /* * Allocate a cluster_node for qdevice */ qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID); if (!qdevice) { LEAVE(); return ((char *)"Could not allocate node."); } qdevice->votes = 0; memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); /* * Allocate a cluster_node for us */ us = allocate_node(corosync_api->totem_nodeid_get()); if (!us) { LEAVE(); return ((char *)"Could not allocate node."); } icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id); us->state = NODESTATE_MEMBER; us->votes = 1; us->flags |= NODE_FLAGS_FIRST; error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP); if (error) { return error; } recalculate_quorum(0, 0); /* * Set RO keys in icmap */ votequorum_set_icmap_ro_keys(); /* * Listen for changes */ votequorum_exec_add_config_notification(); /* * Start us off with one node */ votequorum_exec_send_nodeinfo(us->node_id); LEAVE(); return (NULL); } /* * votequorum service core */ static void votequorum_last_man_standing_timer_fn(void *arg) { ENTER(); last_man_standing_timer_set = 0; if (cluster_is_quorate) { recalculate_quorum(1,1); } LEAVE(); } static void votequorum_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { int i, j; int found; int left_nodes; struct cluster_node *node; ENTER(); sync_in_progress = 1; sync_nodeinfo_sent = 0; sync_wait_for_poll_or_timeout = 0; if (member_list_entries > 1) { us->flags &= ~NODE_FLAGS_FIRST; } /* * we don't need to track which nodes have left directly, * since that info is in the node db, but we need to know * if somebody has left for last_man_standing */ left_nodes = 0; for (i = 0; i < quorum_members_entries; i++) { found = 0; for (j = 0; j < member_list_entries; j++) { if (quorum_members[i] == member_list[j]) { found = 1; break; } } if (found == 0) { left_nodes = 1; node = find_node_by_nodeid(quorum_members[i]); if (node) { node->state = NODESTATE_DEAD; } } } if (last_man_standing) { if (((member_list_entries >= quorum) && (left_nodes)) || ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) { if (last_man_standing_timer_set) { corosync_api->timer_delete(last_man_standing_timer); last_man_standing_timer_set = 0; } corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000, NULL, votequorum_last_man_standing_timer_fn, &last_man_standing_timer); last_man_standing_timer_set = 1; } } memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries); previous_quorum_members_entries = quorum_members_entries; memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries); quorum_members_entries = member_list_entries; memcpy(&quorum_ringid, ring_id, sizeof(*ring_id)); if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && us->flags & NODE_FLAGS_QDEVICE_ALIVE) { /* * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout */ if (qdevice_timer_set) { corosync_api->timer_delete(qdevice_timer); } corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice, qdevice_timer_fn, &qdevice_timer); qdevice_timer_set = 1; sync_wait_for_poll_or_timeout = 1; log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)", qdevice_name, qdevice_sync_timeout); } LEAVE(); } static int votequorum_sync_process (void) { if (!sync_nodeinfo_sent) { votequorum_exec_send_nodeinfo(us->node_id); votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); if (strlen(qdevice_name)) { votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER, qdevice_name); } votequorum_exec_send_nodelist_notification(NULL, 0LL); sync_nodeinfo_sent = 1; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) { /* * Waiting for qdevice to poll with new ringid or timeout */ return (-1); } return 0; } static void votequorum_sync_activate (void) { recalculate_quorum(0, 0); quorum_callback(quorum_members, quorum_members_entries, cluster_is_quorate, &quorum_ringid); votequorum_exec_send_quorum_notification(NULL, 0L); sync_in_progress = 0; } static void votequorum_sync_abort (void) { } char *votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn) { char *error; ENTER(); if (q_set_quorate_fn == NULL) { return ((char *)"Quorate function not set"); } corosync_api = api; quorum_callback = q_set_quorate_fn; error = corosync_service_link_and_init(corosync_api, &votequorum_service[0]); if (error) { return (error); } LEAVE(); return (NULL); } /* * Library Handler init/fini */ static int quorum_lib_init_fn (void *conn) { struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); ENTER(); - list_init (&pd->list); + qb_list_init (&pd->list); pd->conn = conn; LEAVE(); return (0); } static int quorum_lib_exit_fn (void *conn) { struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); ENTER(); if (quorum_pd->tracking_enabled) { - list_del (&quorum_pd->list); - list_init (&quorum_pd->list); + qb_list_del (&quorum_pd->list); + qb_list_init (&quorum_pd->list); } LEAVE(); return (0); } /* * library internal functions */ static void qdevice_timer_fn(void *arg) { ENTER(); if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) || (!qdevice_timer_set)) { LEAVE(); return; } us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE; us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name); votequorum_exec_send_nodeinfo(us->node_id); qdevice_timer_set = 0; sync_wait_for_poll_or_timeout = 0; LEAVE(); } /* * Library Handler Functions */ static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message) { const struct req_lib_votequorum_getinfo *req_lib_votequorum_getinfo = message; struct res_lib_votequorum_getinfo res_lib_votequorum_getinfo; struct cluster_node *node; unsigned int highest_expected = 0; unsigned int total_votes = 0; cs_error_t error = CS_OK; uint32_t nodeid = req_lib_votequorum_getinfo->nodeid; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node %u", conn, req_lib_votequorum_getinfo->nodeid); if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { nodeid = us->node_id; } node = find_node_by_nodeid(nodeid); if (node) { struct cluster_node *iternode; - struct list_head *nodelist; + struct qb_list_head *nodelist; - list_iterate(nodelist, &cluster_members_list) { - iternode = list_entry(nodelist, struct cluster_node, list); + qb_list_for_each(nodelist, &cluster_members_list) { + iternode = qb_list_entry(nodelist, struct cluster_node, list); if (iternode->state == NODESTATE_MEMBER) { highest_expected = max(highest_expected, iternode->expected_votes); total_votes += iternode->votes; } } if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { total_votes += qdevice->votes; } switch(node->state) { case NODESTATE_MEMBER: res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_MEMBER; break; case NODESTATE_DEAD: res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_DEAD; break; case NODESTATE_LEAVING: res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_LEAVING; break; default: res_lib_votequorum_getinfo.state = node->state; break; } res_lib_votequorum_getinfo.state = node->state; res_lib_votequorum_getinfo.votes = node->votes; res_lib_votequorum_getinfo.expected_votes = node->expected_votes; res_lib_votequorum_getinfo.highest_expected = highest_expected; res_lib_votequorum_getinfo.quorum = quorum; res_lib_votequorum_getinfo.total_votes = total_votes; res_lib_votequorum_getinfo.flags = 0; res_lib_votequorum_getinfo.nodeid = node->node_id; if (two_node) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_TWONODE; } if (cluster_is_quorate) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QUORATE; } if (wait_for_all) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_WAIT_FOR_ALL; } if (last_man_standing) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LAST_MAN_STANDING; } if (auto_tie_breaker != ATB_NONE) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER; } if (allow_downscale) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_ALLOW_DOWNSCALE; } memset(res_lib_votequorum_getinfo.qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name); res_lib_votequorum_getinfo.qdevice_votes = qdevice->votes; if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_REGISTERED; } if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_ALIVE; } if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_CAST_VOTE; } if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_MASTER_WINS; } } else { error = CS_ERR_NOT_EXIST; } res_lib_votequorum_getinfo.header.size = sizeof(res_lib_votequorum_getinfo); res_lib_votequorum_getinfo.header.id = MESSAGE_RES_VOTEQUORUM_GETINFO; res_lib_votequorum_getinfo.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_getinfo, sizeof(res_lib_votequorum_getinfo)); log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error); LEAVE(); } static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message) { const struct req_lib_votequorum_setexpected *req_lib_votequorum_setexpected = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; unsigned int newquorum; unsigned int total_votes; uint8_t allow_downscale_status = 0; ENTER(); allow_downscale_status = allow_downscale; allow_downscale = 0; /* * Validate new expected votes */ newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes); allow_downscale = allow_downscale_status; if (newquorum < total_votes / 2 || newquorum > total_votes) { error = CS_ERR_INVALID_PARAM; goto error_exit; } update_node_expected_votes(req_lib_votequorum_setexpected->expected_votes); votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id, req_lib_votequorum_setexpected->expected_votes); error_exit: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message) { const struct req_lib_votequorum_setvotes *req_lib_votequorum_setvotes = message; struct res_lib_votequorum_status res_lib_votequorum_status; struct cluster_node *node; unsigned int newquorum; unsigned int total_votes; unsigned int saved_votes; cs_error_t error = CS_OK; unsigned int nodeid; ENTER(); nodeid = req_lib_votequorum_setvotes->nodeid; node = find_node_by_nodeid(nodeid); if (!node) { error = CS_ERR_NAME_NOT_FOUND; goto error_exit; } /* * Check votes is valid */ saved_votes = node->votes; node->votes = req_lib_votequorum_setvotes->votes; newquorum = calculate_quorum(1, 0, &total_votes); if (newquorum < total_votes / 2 || newquorum > total_votes) { node->votes = saved_votes; error = CS_ERR_INVALID_PARAM; goto error_exit; } votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid, req_lib_votequorum_setvotes->votes); error_exit: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_trackstart (void *conn, const void *message) { const struct req_lib_votequorum_trackstart *req_lib_votequorum_trackstart = message; struct res_lib_votequorum_status res_lib_votequorum_status; struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); cs_error_t error = CS_OK; ENTER(); /* * If an immediate listing of the current cluster membership * is requested, generate membership list */ if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CURRENT || req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES) { log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn); votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->context); votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context); } if (quorum_pd->tracking_enabled) { error = CS_ERR_EXIST; goto response_send; } /* * Record requests for tracking */ if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES || req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) { quorum_pd->track_flags = req_lib_votequorum_trackstart->track_flags; quorum_pd->tracking_enabled = 1; quorum_pd->tracking_context = req_lib_votequorum_trackstart->context; - list_add (&quorum_pd->list, &trackers_list); + qb_list_add (&quorum_pd->list, &trackers_list); } response_send: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_trackstop (void *conn, const void *message) { struct res_lib_votequorum_status res_lib_votequorum_status; struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); int error = CS_OK; ENTER(); if (quorum_pd->tracking_enabled) { error = CS_OK; quorum_pd->tracking_enabled = 0; - list_del (&quorum_pd->list); - list_init (&quorum_pd->list); + qb_list_del (&quorum_pd->list); + qb_list_init (&quorum_pd->list); } else { error = CS_ERR_NOT_EXIST; } res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_register (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_register *req_lib_votequorum_qdevice_register = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; ENTER(); if (!qdevice_can_operate) { log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information"); error = CS_ERR_ACCESS; goto out; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if ((!strncmp(req_lib_votequorum_qdevice_register->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) { goto out; } else { log_printf(LOGSYS_LEVEL_WARNING, "A new qdevice with different name (new: %s old: %s) is trying to re-register!", req_lib_votequorum_qdevice_register->name, qdevice_name); error = CS_ERR_EXIST; goto out; } } else { if (qdevice_reg_conn != NULL) { log_printf(LOGSYS_LEVEL_WARNING, "Registration request already in progress"); error = CS_ERR_TRY_AGAIN; goto out; } qdevice_reg_conn = conn; if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER, req_lib_votequorum_qdevice_register->name) != 0) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to send qdevice registration request to cluster"); error = CS_ERR_TRY_AGAIN; qdevice_reg_conn = NULL; } else { LEAVE(); return; } } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_unregister *req_lib_votequorum_qdevice_unregister = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; ENTER(); if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if (strncmp(req_lib_votequorum_qdevice_unregister->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { error = CS_ERR_INVALID_PARAM; goto out; } if (qdevice_timer_set) { corosync_api->timer_delete(qdevice_timer); qdevice_timer_set = 0; sync_wait_for_poll_or_timeout = 0; } us->flags &= ~NODE_FLAGS_QDEVICE_REGISTERED; us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE; us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS; votequorum_exec_send_nodeinfo(us->node_id); votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER, req_lib_votequorum_qdevice_unregister->name); } else { error = CS_ERR_NOT_EXIST; } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_update (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_update *req_lib_votequorum_qdevice_update = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; ENTER(); if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if (strncmp(req_lib_votequorum_qdevice_update->oldname, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { error = CS_ERR_INVALID_PARAM; goto out; } votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname, req_lib_votequorum_qdevice_update->newname); } else { error = CS_ERR_NOT_EXIST; } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_poll (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_poll *req_lib_votequorum_qdevice_poll = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; uint32_t oldflags; ENTER(); if (!qdevice_can_operate) { error = CS_ERR_ACCESS; goto out; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.rep.nodeid && req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) { log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (%u.%"PRIu64") != last sync " "ring id (%u.%"PRIu64"). Ignoring poll call.", req_lib_votequorum_qdevice_poll->ring_id.nodeid, req_lib_votequorum_qdevice_poll->ring_id.seq, quorum_ringid.rep.nodeid, quorum_ringid.seq); error = CS_ERR_MESSAGE_ERROR; goto out; } if (strncmp(req_lib_votequorum_qdevice_poll->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { error = CS_ERR_INVALID_PARAM; goto out; } if (qdevice_timer_set) { corosync_api->timer_delete(qdevice_timer); qdevice_timer_set = 0; } oldflags = us->flags; us->flags |= NODE_FLAGS_QDEVICE_ALIVE; if (req_lib_votequorum_qdevice_poll->cast_vote) { us->flags |= NODE_FLAGS_QDEVICE_CAST_VOTE; } else { us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; } if (us->flags != oldflags) { votequorum_exec_send_nodeinfo(us->node_id); } corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice, qdevice_timer_fn, &qdevice_timer); qdevice_timer_set = 1; sync_wait_for_poll_or_timeout = 0; } else { error = CS_ERR_NOT_EXIST; } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn, const void *message) { const struct req_lib_votequorum_qdevice_master_wins *req_lib_votequorum_qdevice_master_wins = message; struct res_lib_votequorum_status res_lib_votequorum_status; cs_error_t error = CS_OK; uint32_t oldflags = us->flags; ENTER(); if (!qdevice_can_operate) { error = CS_ERR_ACCESS; goto out; } if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { if (strncmp(req_lib_votequorum_qdevice_master_wins->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { error = CS_ERR_INVALID_PARAM; goto out; } if (req_lib_votequorum_qdevice_master_wins->allow) { us->flags |= NODE_FLAGS_QDEVICE_MASTER_WINS; } else { us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS; } if (us->flags != oldflags) { votequorum_exec_send_nodeinfo(us->node_id); } update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow); } else { error = CS_ERR_NOT_EXIST; } out: res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; res_lib_votequorum_status.header.error = error; corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); LEAVE(); } diff --git a/exec/votequorum.h b/exec/votequorum.h index 90114227..697b6943 100644 --- a/exec/votequorum.h +++ b/exec/votequorum.h @@ -1,45 +1,44 @@ /* * Copyright (c) 2012 Red Hat, Inc. * * All rights reserved. * * Author: Fabio M. Di Nitto (fdinitto@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef VOTEQUORUM_H_DEFINED #define VOTEQUORUM_H_DEFINED #include "quorum.h" #include -#include #include char *votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn); #endif /* VOTEQUORUM_H_DEFINED */ diff --git a/exec/vsf_quorum.c b/exec/vsf_quorum.c index e268deac..4a2ab48e 100644 --- a/exec/vsf_quorum.c +++ b/exec/vsf_quorum.c @@ -1,488 +1,484 @@ /* * Copyright (c) 2008-2015 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield (ccaulfie@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Red Hat Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "quorum.h" #include #include #include #include -#include +#include #include #include #include #include #include #include #include "service.h" #include "votequorum.h" #include "vsf_ykd.h" LOGSYS_DECLARE_SUBSYS ("QUORUM"); struct quorum_pd { unsigned char track_flags; int tracking_enabled; - struct list_head list; + struct qb_list_head list; void *conn; }; struct internal_callback_pd { - struct list_head list; + struct qb_list_head list; quorum_callback_fn_t callback; void *context; }; static void message_handler_req_lib_quorum_getquorate (void *conn, const void *msg); static void message_handler_req_lib_quorum_trackstart (void *conn, const void *msg); static void message_handler_req_lib_quorum_trackstop (void *conn, const void *msg); static void message_handler_req_lib_quorum_gettype (void *conn, const void *msg); static void send_library_notification(void *conn); static void send_internal_notification(void); static char *quorum_exec_init_fn (struct corosync_api_v1 *api); static int quorum_lib_init_fn (void *conn); static int quorum_lib_exit_fn (void *conn); static int primary_designated = 0; static int quorum_type = 0; static struct corosync_api_v1 *corosync_api; -static struct list_head lib_trackers_list; -static struct list_head internal_trackers_list; +static struct qb_list_head lib_trackers_list; +static struct qb_list_head internal_trackers_list; static struct memb_ring_id quorum_ring_id; static size_t quorum_view_list_entries = 0; static int quorum_view_list[PROCESSOR_COUNT_MAX]; struct quorum_services_api_ver1 *quorum_iface = NULL; static char view_buf[64]; static void log_view_list(const unsigned int *view_list, size_t view_list_entries) { int total = (int)view_list_entries; int len, pos, ret; int i = 0; while (1) { len = sizeof(view_buf); pos = 0; memset(view_buf, 0, len); for (; i < total; i++) { ret = snprintf(view_buf + pos, len - pos, " %u", view_list[i]); if (ret >= len - pos) break; pos += ret; } log_printf (LOGSYS_LEVEL_NOTICE, "Members[%d]:%s%s", total, view_buf, i < total ? "\\" : ""); if (i == total) break; } } /* Internal quorum API function */ static void quorum_api_set_quorum(const unsigned int *view_list, size_t view_list_entries, int quorum, struct memb_ring_id *ring_id) { int old_quorum = primary_designated; primary_designated = quorum; if (primary_designated && !old_quorum) { log_printf (LOGSYS_LEVEL_NOTICE, "This node is within the primary component and will provide service."); } else if (!primary_designated && old_quorum) { log_printf (LOGSYS_LEVEL_NOTICE, "This node is within the non-primary component and will NOT provide any services."); } quorum_view_list_entries = view_list_entries; memcpy(&quorum_ring_id, ring_id, sizeof (quorum_ring_id)); memcpy(quorum_view_list, view_list, sizeof(unsigned int)*view_list_entries); log_view_list(view_list, view_list_entries); /* Tell internal listeners */ send_internal_notification(); /* Tell IPC listeners */ send_library_notification(NULL); } static struct corosync_lib_handler quorum_lib_service[] = { { /* 0 */ .lib_handler_fn = message_handler_req_lib_quorum_getquorate, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 1 */ .lib_handler_fn = message_handler_req_lib_quorum_trackstart, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 2 */ .lib_handler_fn = message_handler_req_lib_quorum_trackstop, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 3 */ .lib_handler_fn = message_handler_req_lib_quorum_gettype, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED } }; static struct corosync_service_engine quorum_service_handler = { .name = "corosync cluster quorum service v0.1", .id = QUORUM_SERVICE, .priority = 1, .private_data_size = sizeof (struct quorum_pd), .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = quorum_lib_init_fn, .lib_exit_fn = quorum_lib_exit_fn, .lib_engine = quorum_lib_service, .exec_init_fn = quorum_exec_init_fn, .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler) }; struct corosync_service_engine *vsf_quorum_get_service_engine_ver0 (void) { return (&quorum_service_handler); } /* -------------------------------------------------- */ /* * Internal API functions for corosync */ static int quorum_quorate(void) { return primary_designated; } static int quorum_register_callback(quorum_callback_fn_t function, void *context) { struct internal_callback_pd *pd = malloc(sizeof(struct internal_callback_pd)); if (!pd) return -1; pd->context = context; pd->callback = function; - list_add (&pd->list, &internal_trackers_list); + qb_list_add (&pd->list, &internal_trackers_list); return 0; } static int quorum_unregister_callback(quorum_callback_fn_t function, void *context) { struct internal_callback_pd *pd; - struct list_head *tmp; + struct qb_list_head *tmp; - for (tmp = internal_trackers_list.next; tmp != &internal_trackers_list; tmp = tmp->next) { - - pd = list_entry(tmp, struct internal_callback_pd, list); + qb_list_for_each(tmp, &internal_trackers_list) { + pd = qb_list_entry(tmp, struct internal_callback_pd, list); if (pd->callback == function && pd->context == context) { - list_del(&pd->list); + qb_list_del(&pd->list); free(pd); return 0; } } return -1; } static struct quorum_callin_functions callins = { .quorate = quorum_quorate, .register_callback = quorum_register_callback, .unregister_callback = quorum_unregister_callback }; /* --------------------------------------------------------------------- */ static char *quorum_exec_init_fn (struct corosync_api_v1 *api) { char *quorum_module = NULL; char *error; corosync_api = api; - list_init (&lib_trackers_list); - list_init (&internal_trackers_list); + qb_list_init (&lib_trackers_list); + qb_list_init (&internal_trackers_list); /* * Tell corosync we have a quorum engine. */ api->quorum_initialize(&callins); /* * Look for a quorum provider */ if (icmap_get_string("quorum.provider", &quorum_module) == CS_OK) { log_printf (LOGSYS_LEVEL_NOTICE, "Using quorum provider %s", quorum_module); error = (char *)"Invalid quorum provider"; if (strcmp (quorum_module, "corosync_votequorum") == 0) { error = votequorum_init (api, quorum_api_set_quorum); quorum_type = 1; } if (strcmp (quorum_module, "corosync_ykd") == 0) { error = ykd_init (api, quorum_api_set_quorum); quorum_type = 1; } if (error) { log_printf (LOGSYS_LEVEL_CRIT, "Quorum provider: %s failed to initialize.", quorum_module); free(quorum_module); return (error); } } if (quorum_module) { free(quorum_module); quorum_module = NULL; } /* * setting quorum_type and primary_designated in the right order is important * always try to lookup/init a quorum module, then revert back to be quorate */ if (quorum_type == 0) { primary_designated = 1; } return (NULL); } static int quorum_lib_init_fn (void *conn) { struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p", conn); - list_init (&pd->list); + qb_list_init (&pd->list); pd->conn = conn; return (0); } static int quorum_lib_exit_fn (void *conn) { struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "lib_exit_fn: conn=%p", conn); if (quorum_pd->tracking_enabled) { - list_del (&quorum_pd->list); - list_init (&quorum_pd->list); + qb_list_del (&quorum_pd->list); + qb_list_init (&quorum_pd->list); } return (0); } static void send_internal_notification(void) { - struct list_head *tmp; + struct qb_list_head *tmp; struct internal_callback_pd *pd; - - for (tmp = internal_trackers_list.next; tmp != &internal_trackers_list; tmp = tmp->next) { - - pd = list_entry(tmp, struct internal_callback_pd, list); + qb_list_for_each(tmp, &internal_trackers_list) { + pd = qb_list_entry(tmp, struct internal_callback_pd, list); pd->callback(primary_designated, pd->context); } } static void send_library_notification(void *conn) { int size = sizeof(struct res_lib_quorum_notification) + sizeof(unsigned int)*quorum_view_list_entries; char buf[size]; struct res_lib_quorum_notification *res_lib_quorum_notification = (struct res_lib_quorum_notification *)buf; - struct list_head *tmp; + struct qb_list_head *tmp; int i; log_printf(LOGSYS_LEVEL_DEBUG, "sending quorum notification to %p, length = %d", conn, size); res_lib_quorum_notification->quorate = primary_designated; res_lib_quorum_notification->ring_seq = quorum_ring_id.seq; res_lib_quorum_notification->view_list_entries = quorum_view_list_entries; for (i=0; iview_list[i] = quorum_view_list[i]; } res_lib_quorum_notification->header.id = MESSAGE_RES_QUORUM_NOTIFICATION; res_lib_quorum_notification->header.size = size; res_lib_quorum_notification->header.error = CS_OK; /* Send it to all interested parties */ if (conn) { corosync_api->ipc_dispatch_send(conn, res_lib_quorum_notification, size); } else { struct quorum_pd *qpd; - for (tmp = lib_trackers_list.next; tmp != &lib_trackers_list; tmp = tmp->next) { - - qpd = list_entry(tmp, struct quorum_pd, list); + qb_list_for_each(tmp, &lib_trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); corosync_api->ipc_dispatch_send(qpd->conn, res_lib_quorum_notification, size); } } return; } static void message_handler_req_lib_quorum_getquorate (void *conn, const void *msg) { struct res_lib_quorum_getquorate res_lib_quorum_getquorate; log_printf(LOGSYS_LEVEL_DEBUG, "got quorate request on %p", conn); /* send status */ res_lib_quorum_getquorate.quorate = primary_designated; res_lib_quorum_getquorate.header.size = sizeof(res_lib_quorum_getquorate); res_lib_quorum_getquorate.header.id = MESSAGE_RES_QUORUM_GETQUORATE; res_lib_quorum_getquorate.header.error = CS_OK; corosync_api->ipc_response_send(conn, &res_lib_quorum_getquorate, sizeof(res_lib_quorum_getquorate)); } static void message_handler_req_lib_quorum_trackstart (void *conn, const void *msg) { const struct req_lib_quorum_trackstart *req_lib_quorum_trackstart = msg; struct qb_ipc_response_header res; struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); cs_error_t error = CS_OK; log_printf(LOGSYS_LEVEL_DEBUG, "got trackstart request on %p", conn); /* * If an immediate listing of the current cluster membership * is requested, generate membership list */ if (req_lib_quorum_trackstart->track_flags & CS_TRACK_CURRENT || req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES) { log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn); send_library_notification(conn); } if (quorum_pd->tracking_enabled) { error = CS_ERR_EXIST; goto response_send; } /* * Record requests for tracking */ if (req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES || req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) { quorum_pd->track_flags = req_lib_quorum_trackstart->track_flags; quorum_pd->tracking_enabled = 1; - list_add (&quorum_pd->list, &lib_trackers_list); + qb_list_add (&quorum_pd->list, &lib_trackers_list); } response_send: /* send status */ res.size = sizeof(res); res.id = MESSAGE_RES_QUORUM_TRACKSTART; res.error = error; corosync_api->ipc_response_send(conn, &res, sizeof(struct qb_ipc_response_header)); } static void message_handler_req_lib_quorum_trackstop (void *conn, const void *msg) { struct qb_ipc_response_header res; struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "got trackstop request on %p", conn); if (quorum_pd->tracking_enabled) { res.error = CS_OK; quorum_pd->tracking_enabled = 0; - list_del (&quorum_pd->list); - list_init (&quorum_pd->list); + qb_list_del (&quorum_pd->list); + qb_list_init (&quorum_pd->list); } else { res.error = CS_ERR_NOT_EXIST; } /* send status */ res.size = sizeof(res); res.id = MESSAGE_RES_QUORUM_TRACKSTOP; res.error = CS_OK; corosync_api->ipc_response_send(conn, &res, sizeof(struct qb_ipc_response_header)); } static void message_handler_req_lib_quorum_gettype (void *conn, const void *msg) { struct res_lib_quorum_gettype res_lib_quorum_gettype; log_printf(LOGSYS_LEVEL_DEBUG, "got quorum_type request on %p", conn); /* send status */ res_lib_quorum_gettype.quorum_type = quorum_type; res_lib_quorum_gettype.header.size = sizeof(res_lib_quorum_gettype); res_lib_quorum_gettype.header.id = MESSAGE_RES_QUORUM_GETTYPE; res_lib_quorum_gettype.header.error = CS_OK; corosync_api->ipc_response_send(conn, &res_lib_quorum_gettype, sizeof(res_lib_quorum_gettype)); } diff --git a/exec/vsf_ykd.h b/exec/vsf_ykd.h index d1b1ffd9..d009e1c6 100644 --- a/exec/vsf_ykd.h +++ b/exec/vsf_ykd.h @@ -1,45 +1,44 @@ /* * Copyright (c) 2012 Red Hat, Inc. * * All rights reserved. * * Author: Fabio M. Di Nitto (fdinitto@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef VFS_YKD_H_DEFINED #define VFS_YKD_H_DEFINED #include "quorum.h" #include -#include #include char *ykd_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t set_primary); #endif /* VFS_YKD_H_DEFINED */ diff --git a/exec/wd.c b/exec/wd.c index 92132646..2dc4b847 100644 --- a/exec/wd.c +++ b/exec/wd.c @@ -1,761 +1,761 @@ /* * Copyright (c) 2010-2012 Red Hat, Inc. * * All rights reserved. * * Author: Angus Salkeld * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include "fsm.h" #include "service.h" typedef enum { WD_RESOURCE_GOOD, WD_RESOURCE_FAILED, WD_RESOURCE_STATE_UNKNOWN, WD_RESOURCE_NOT_MONITORED } wd_resource_state_t; struct resource { char res_path[ICMAP_KEYNAME_MAXLEN]; char *recovery; char name[CS_MAX_NAME_LENGTH]; time_t last_updated; struct cs_fsm fsm; corosync_timer_handle_t check_timer; uint64_t check_timeout; icmap_track_t icmap_track; }; LOGSYS_DECLARE_SUBSYS("WD"); /* * Service Interfaces required by service_message_handler struct */ static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api); static int wd_exec_exit_fn (void); static void wd_resource_check_fn (void* resource_ref); static struct corosync_api_v1 *api; #define WD_DEFAULT_TIMEOUT_SEC 6 #define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC) #define WD_MIN_TIMEOUT_MS 500 #define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC) static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC; static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2); static int dog = -1; static corosync_timer_handle_t wd_timer; static int watchdog_ok = 1; static char *watchdog_device = "/dev/watchdog"; struct corosync_service_engine wd_service_engine = { .name = "corosync watchdog service", .id = WD_SERVICE, .priority = 1, .private_data_size = 0, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, .lib_init_fn = NULL, .lib_exit_fn = NULL, .lib_engine = NULL, .lib_engine_count = 0, .exec_engine = NULL, .exec_engine_count = 0, .confchg_fn = NULL, .exec_init_fn = wd_exec_init_fn, .exec_exit_fn = wd_exec_exit_fn, .exec_dump_fn = NULL }; -static DECLARE_LIST_INIT (confchg_notify); +static QB_LIST_DECLARE (confchg_notify); /* * F S M */ static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data); static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data); enum wd_resource_state { WD_S_RUNNING, WD_S_FAILED, WD_S_STOPPED }; enum wd_resource_event { WD_E_FAILURE, WD_E_CONFIG_CHANGED }; const char * wd_running_str = "running"; const char * wd_failed_str = "failed"; const char * wd_failure_str = "failure"; const char * wd_stopped_str = "stopped"; const char * wd_config_changed_str = "config_changed"; struct cs_fsm_entry wd_fsm_table[] = { { WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} }, { WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} }, { WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} }, { WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} }, { WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} }, { WD_S_FAILED, WD_E_FAILURE, NULL, {-1} }, }; struct corosync_service_engine *wd_get_service_engine_ver0 (void) { return (&wd_service_engine); } static const char * wd_res_state_to_str(struct cs_fsm* fsm, int32_t state) { switch (state) { case WD_S_STOPPED: return wd_stopped_str; break; case WD_S_RUNNING: return wd_running_str; break; case WD_S_FAILED: return wd_failed_str; break; } return NULL; } static const char * wd_res_event_to_str(struct cs_fsm* fsm, int32_t event) { switch (event) { case WD_E_CONFIG_CHANGED: return wd_config_changed_str; break; case WD_E_FAILURE: return wd_failure_str; break; } return NULL; } static void wd_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state, int32_t next_state, int32_t fsm_event, void *data) { switch (cb_event) { case CS_FSM_CB_EVENT_PROCESS_NF: log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"", fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state)); corosync_exit_error(COROSYNC_DONE_FATAL_ERR); break; case CS_FSM_CB_EVENT_STATE_SET: log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"", fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state), fsm->state_to_str(fsm, next_state)); break; case CS_FSM_CB_EVENT_STATE_SET_NF: log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")", fsm->name, fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state), fsm->state_to_str(fsm, next_state), fsm->event_to_str(fsm, fsm_event)); corosync_exit_error(COROSYNC_DONE_FATAL_ERR); break; default: log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Unknown callback event!"); corosync_exit_error(COROSYNC_DONE_FATAL_ERR); break; } } /* * returns (CS_TRUE == OK, CS_FALSE == failed) */ static int32_t wd_resource_state_is_ok (struct resource *ref) { char* state = NULL; uint64_t last_updated; uint64_t my_time; uint64_t allowed_period; char key_name[ICMAP_KEYNAME_MAXLEN]; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated"); if (icmap_get_uint64(key_name, &last_updated) != CS_OK) { /* key does not exist. */ return CS_FALSE; } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state"); if (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0) { /* key does not exist. */ if (state != NULL) free(state); return CS_FALSE; } if (last_updated == 0) { /* initial value */ free(state); return CS_TRUE; } my_time = cs_timestamp_get(); /* * Here we check that the monitor has written a timestamp within the poll_period * plus a grace factor of (0.5 * poll_period). */ allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2; if ((last_updated + allowed_period) < my_time) { log_printf (LOGSYS_LEVEL_ERROR, "last_updated %"PRIu64" ms too late, period:%"PRIu64".", (uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)), ref->check_timeout); free(state); return CS_FALSE; } if (strcmp (state, wd_failed_str) == 0) { free(state); return CS_FALSE; } free(state); return CS_TRUE; } static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data) { char *state; uint64_t tmp_value; uint64_t next_timeout; struct resource *ref = (struct resource*)data; char key_name[ICMAP_KEYNAME_MAXLEN]; next_timeout = ref->check_timeout; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period"); if (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK) { if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) { log_printf (LOGSYS_LEVEL_DEBUG, "poll_period changing from:%"PRIu64" to %"PRIu64".", ref->check_timeout, tmp_value); /* * To easy in the transition between poll_period's we are going * to make the first timeout the bigger of the new and old value. * This is to give the monitoring system time to adjust. */ next_timeout = CS_MAX(tmp_value, ref->check_timeout); ref->check_timeout = tmp_value; } else { log_printf (LOGSYS_LEVEL_WARNING, "Could NOT use poll_period:%"PRIu64" ms for resource %s", tmp_value, ref->name); } } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery"); if (icmap_get_string(key_name, &ref->recovery) != CS_OK) { /* key does not exist. */ log_printf (LOGSYS_LEVEL_WARNING, "resource %s missing a recovery key.", ref->name); cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb); return; } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state"); if (icmap_get_string(key_name, &state) != CS_OK) { /* key does not exist. */ log_printf (LOGSYS_LEVEL_WARNING, "resource %s missing a state key.", ref->name); cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb); return; } if (ref->check_timer) { api->timer_delete(ref->check_timer); ref->check_timer = 0; } if (strcmp(wd_stopped_str, state) == 0) { cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb); } else { api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS, ref, wd_resource_check_fn, &ref->check_timer); cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb); } free(state); } static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data) { struct resource* ref = (struct resource*)data; if (ref->check_timer) { api->timer_delete(ref->check_timer); ref->check_timer = 0; } log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!", ref->recovery, (char*)ref->name); if (strcmp (ref->recovery, "watchdog") == 0 || strcmp (ref->recovery, "quit") == 0) { watchdog_ok = 0; } else if (strcmp (ref->recovery, "reboot") == 0) { reboot(RB_AUTOBOOT); } else if (strcmp (ref->recovery, "shutdown") == 0) { reboot(RB_POWER_OFF); } cs_fsm_state_set(fsm, WD_S_FAILED, data, wd_fsm_cb); } static void wd_key_changed( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { struct resource* ref = (struct resource*)user_data; char *last_key_part; if (ref == NULL) { return ; } last_key_part = strrchr(key_name, '.'); if (last_key_part == NULL) { return ; } last_key_part++; if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) { if (strcmp(last_key_part, "last_updated") == 0 || strcmp(last_key_part, "current") == 0) { return; } cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref, wd_fsm_cb); } if (event == ICMAP_TRACK_DELETE && ref != NULL) { if (strcmp(last_key_part, "state") != 0) { return ; } log_printf (LOGSYS_LEVEL_WARNING, "resource \"%s\" deleted from cmap!", ref->name); api->timer_delete(ref->check_timer); ref->check_timer = 0; icmap_track_delete(ref->icmap_track); free(ref); } } static void wd_resource_check_fn (void* resource_ref) { struct resource* ref = (struct resource*)resource_ref; if (wd_resource_state_is_ok (ref) == CS_FALSE) { cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref, wd_fsm_cb); return; } api->timer_add_duration(ref->check_timeout*MILLI_2_NANO_SECONDS, ref, wd_resource_check_fn, &ref->check_timer); } /* * return 0 - fully configured * return -1 - partially configured */ static int32_t wd_resource_create (char *res_path, char *res_name) { char *state; uint64_t tmp_value; struct resource *ref = calloc (1, sizeof (struct resource)); char key_name[ICMAP_KEYNAME_MAXLEN]; strcpy(ref->res_path, res_path); ref->check_timeout = WD_DEFAULT_TIMEOUT_MS; ref->check_timer = 0; strcpy(ref->name, res_name); ref->fsm.name = ref->name; ref->fsm.table = wd_fsm_table; ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry); ref->fsm.curr_entry = 0; ref->fsm.curr_state = WD_S_STOPPED; ref->fsm.state_to_str = wd_res_state_to_str; ref->fsm.event_to_str = wd_res_event_to_str; snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period"); if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) { icmap_set_uint64(key_name, ref->check_timeout); } else { if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) { ref->check_timeout = tmp_value; } else { log_printf (LOGSYS_LEVEL_WARNING, "Could NOT use poll_period:%"PRIu64" ms for resource %s", tmp_value, ref->name); } } icmap_track_add(res_path, ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX, wd_key_changed, ref, &ref->icmap_track); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery"); if (icmap_get_string(key_name, &ref->recovery) != CS_OK) { /* key does not exist. */ log_printf (LOGSYS_LEVEL_WARNING, "resource %s missing a recovery key.", ref->name); return -1; } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state"); if (icmap_get_string(key_name, &state) != CS_OK) { /* key does not exist. */ log_printf (LOGSYS_LEVEL_WARNING, "resource %s missing a state key.", ref->name); return -1; } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated"); if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) { /* key does not exist. */ ref->last_updated = 0; } else { ref->last_updated = tmp_value; } /* * delay the first check to give the monitor time to start working. */ tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS); api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS, ref, wd_resource_check_fn, &ref->check_timer); cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb); return 0; } static void wd_tickle_fn (void* arg) { ENTER(); if (watchdog_ok) { if (dog > 0) { ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok); } api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL, wd_tickle_fn, &wd_timer); } else { log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!"); } } static void wd_resource_created_cb( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { char res_name[ICMAP_KEYNAME_MAXLEN]; char res_type[ICMAP_KEYNAME_MAXLEN]; char tmp_key[ICMAP_KEYNAME_MAXLEN]; int res; if (event != ICMAP_TRACK_ADD) { return ; } res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key); if (res != 3) { return ; } if (strcmp(tmp_key, "state") != 0) { return ; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name); wd_resource_create (tmp_key, res_name); } static void wd_scan_resources (void) { int res_count = 0; icmap_track_t icmap_track = NULL; icmap_iter_t iter; const char *key_name; int res; char res_name[ICMAP_KEYNAME_MAXLEN]; char res_type[ICMAP_KEYNAME_MAXLEN]; char tmp_key[ICMAP_KEYNAME_MAXLEN]; ENTER(); iter = icmap_iter_init("resources."); while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key); if (res != 3) { continue ; } if (strcmp(tmp_key, "state") != 0) { continue ; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name); if (wd_resource_create (tmp_key, res_name) == 0) { res_count++; } } icmap_iter_finalize(iter); icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX, wd_resource_created_cb, NULL, &icmap_track); icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX, wd_resource_created_cb, NULL, &icmap_track); if (res_count == 0) { log_printf (LOGSYS_LEVEL_INFO, "no resources configured."); } } static void watchdog_timeout_apply (uint32_t new) { struct watchdog_info ident; uint32_t original_timeout = 0; if (dog > 0) { ioctl(dog, WDIOC_GETTIMEOUT, &original_timeout); } if (new == original_timeout) { return; } watchdog_timeout = new; if (dog > 0) { ioctl(dog, WDIOC_GETSUPPORT, &ident); if (ident.options & WDIOF_SETTIMEOUT) { /* yay! the dog is trained. */ ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout); } ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout); } if (watchdog_timeout == new) { tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2; /* reset the tickle timer in case it was reduced. */ api->timer_delete (wd_timer); api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL, wd_tickle_fn, &wd_timer); log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout); log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout); } else { log_printf (LOGSYS_LEVEL_WARNING, "Could not change the Watchdog timeout from %d to %d seconds", original_timeout, new); } } static int setup_watchdog(void) { struct watchdog_info ident; char *str; ENTER(); if (icmap_get_string("resources.watchdog_device", &str) == CS_OK) { if (strcmp (str, "off") == 0) { log_printf (LOGSYS_LEVEL_WARNING, "Watchdog disabled by configuration"); free(str); dog = -1; return -1; } else { watchdog_device = str; } } if (access (watchdog_device, W_OK) != 0) { log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog %s, try modprobe ", watchdog_device); dog = -1; return -1; } /* here goes, lets hope they have "Magic Close" */ dog = open(watchdog_device, O_WRONLY); if (dog == -1) { log_printf (LOGSYS_LEVEL_WARNING, "Watchdog %s exists but couldn't be opened.", watchdog_device); dog = -1; return -1; } /* Right we have the dog. * Lets see what breed it is. */ ioctl(dog, WDIOC_GETSUPPORT, &ident); log_printf (LOGSYS_LEVEL_INFO, "Watchdog %s is now been tickled by corosync.", watchdog_device); log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity); watchdog_timeout_apply (watchdog_timeout); ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD); return 0; } static void wd_top_level_key_changed( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { uint32_t tmp_value_32; ENTER(); if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) == CS_OK) { if (tmp_value_32 >= 2 && tmp_value_32 <= 120) { watchdog_timeout_apply (tmp_value_32); return; } } log_printf (LOGSYS_LEVEL_WARNING, "Set watchdog_timeout is out of range (2..120)."); icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout); } static void watchdog_timeout_get_initial (void) { uint32_t tmp_value_32; icmap_track_t icmap_track = NULL; ENTER(); if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) { watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC); icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout); } else { if (tmp_value_32 >= 2 && tmp_value_32 <= 120) { watchdog_timeout_apply (tmp_value_32); } else { log_printf (LOGSYS_LEVEL_WARNING, "Set watchdog_timeout is out of range (2..120)."); log_printf (LOGSYS_LEVEL_INFO, "use default value %d seconds.", WD_DEFAULT_TIMEOUT_SEC); watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC); icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout); } } icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY, wd_top_level_key_changed, NULL, &icmap_track); } static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api) { ENTER(); api = corosync_api; watchdog_timeout_get_initial(); setup_watchdog(); wd_scan_resources(); return NULL; } static int wd_exec_exit_fn (void) { char magic = 'V'; ENTER(); if (dog > 0) { log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog."); write (dog, &magic, 1); } return 0; } diff --git a/include/Makefile.am b/include/Makefile.am index c0566744..dba08dfc 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,47 +1,47 @@ # Copyright (c) 2009 Red Hat, Inc. # # Authors: Andrew Beekhof # Steven Dake (sdake@redhat.com) # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. MAINTAINERCLEANFILES = Makefile.in corosync/config.h.in CS_H = hdb.h cpg.h cfg.h corodefs.h \ corotypes.h quorum.h votequorum.h sam.h cmap.h CS_INTERNAL_H = ipc_cfg.h ipc_cpg.h ipc_quorum.h \ quorum.h sq.h ipc_votequorum.h ipc_cmap.h \ - logsys.h coroapi.h icmap.h mar_gen.h list.h swab.h + logsys.h coroapi.h icmap.h mar_gen.h swab.h TOTEM_H = totem.h totemip.h totempg.h EXTRA_DIST = $(noinst_HEADERS) noinst_HEADERS = $(CS_INTERNAL_H:%=corosync/%) nobase_include_HEADERS = $(CS_H:%=corosync/%) $(TOTEM_H:%=corosync/totem/%) diff --git a/include/corosync/totem/totemip.h b/include/corosync/totem/totemip.h index 0168e66c..428b9513 100644 --- a/include/corosync/totem/totemip.h +++ b/include/corosync/totem/totemip.h @@ -1,123 +1,123 @@ /* * Copyright (c) 2005-2010 Red Hat, Inc. * * All rights reserved. * * Author: Patrick Caulfield (pcaulfie@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* IPv4/6 abstraction */ #ifndef TOTEMIP_H_DEFINED #define TOTEMIP_H_DEFINED #include #include #include #include -#include +#include #ifdef __cplusplus extern "C" { #endif #ifdef SO_NOSIGPIPE #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif void totemip_nosigpipe(int s); #else #define totemip_nosigpipe(s) #endif #define TOTEMIP_ADDRLEN (sizeof(struct in6_addr)) /* These are the things that get passed around */ #define TOTEM_IP_ADDRESS struct totem_ip_address { unsigned int nodeid; unsigned short family; unsigned char addr[TOTEMIP_ADDRLEN]; } __attribute__((packed)); struct totem_ip_if_address { struct totem_ip_address ip_addr; struct totem_ip_address mask_addr; int interface_up; int interface_num; char *name; - struct list_head list; + struct qb_list_head list; }; extern int totemip_equal(const struct totem_ip_address *addr1, const struct totem_ip_address *addr2); extern int totemip_compare(const void *a, const void *b); extern int totemip_is_mcast(struct totem_ip_address *addr); extern void totemip_copy(struct totem_ip_address *addr1, const struct totem_ip_address *addr2); extern void totemip_copy_endian_convert(struct totem_ip_address *addr1, const struct totem_ip_address *addr2); int totemip_localhost(int family, struct totem_ip_address *localhost); extern int totemip_localhost_check(const struct totem_ip_address *addr); extern const char *totemip_print(const struct totem_ip_address *addr); extern int totemip_sockaddr_to_totemip_convert(const struct sockaddr_storage *saddr, struct totem_ip_address *ip_addr); extern int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, uint16_t port, struct sockaddr_storage *saddr, int *addrlen); extern int totemip_parse(struct totem_ip_address *totemip, const char *addr, int family); extern int totemip_iface_check(struct totem_ip_address *bindnet, struct totem_ip_address *boundto, int *interface_up, int *interface_num, int mask_high_bit); -extern int totemip_getifaddrs(struct list_head *addrs); +extern int totemip_getifaddrs(struct qb_list_head *addrs); -extern void totemip_freeifaddrs(struct list_head *addrs); +extern void totemip_freeifaddrs(struct qb_list_head *addrs); /* These two simulate a zero in_addr by clearing the family field */ static inline void totemip_zero_set(struct totem_ip_address *addr) { addr->family = 0; } static inline int totemip_zero_check(const struct totem_ip_address *addr) { return (addr->family == 0); } extern size_t totemip_udpip_header_size(int family); #ifdef __cplusplus } #endif #endif diff --git a/lib/cmap.c b/lib/cmap.c index 8a5bed06..0f7da8b1 100644 --- a/lib/cmap.c +++ b/lib/cmap.c @@ -1,1075 +1,1074 @@ /* * Copyright (c) 2011-2012 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include "util.h" #include struct cmap_inst { int finalize; qb_ipcc_connection_t *c; const void *context; }; struct cmap_track_inst { void *user_data; cmap_notify_fn_t notify_fn; qb_ipcc_connection_t *c; cmap_track_handle_t track_handle; }; static void cmap_inst_free (void *inst); DECLARE_HDB_DATABASE(cmap_handle_t_db, cmap_inst_free); DECLARE_HDB_DATABASE(cmap_track_handle_t_db,NULL); /* * Function prototypes */ static cs_error_t cmap_get_int( cmap_handle_t handle, const char *key_name, void *value, size_t value_size, cmap_value_types_t type); static cs_error_t cmap_adjust_int(cmap_handle_t handle, const char *key_name, int32_t step); /* * Function implementations */ cs_error_t cmap_initialize (cmap_handle_t *handle) { cs_error_t error; struct cmap_inst *cmap_inst; error = hdb_error_to_cs(hdb_handle_create(&cmap_handle_t_db, sizeof(*cmap_inst), handle)); if (error != CS_OK) { goto error_no_destroy; } error = hdb_error_to_cs(hdb_handle_get(&cmap_handle_t_db, *handle, (void *)&cmap_inst)); if (error != CS_OK) { goto error_destroy; } error = CS_OK; cmap_inst->finalize = 0; cmap_inst->c = qb_ipcc_connect("cmap", IPC_REQUEST_SIZE); if (cmap_inst->c == NULL) { error = qb_to_cs_error(-errno); goto error_put_destroy; } (void)hdb_handle_put(&cmap_handle_t_db, *handle); return (CS_OK); error_put_destroy: (void)hdb_handle_put(&cmap_handle_t_db, *handle); error_destroy: (void)hdb_handle_destroy(&cmap_handle_t_db, *handle); error_no_destroy: return (error); } static void cmap_inst_free (void *inst) { struct cmap_inst *cmap_inst = (struct cmap_inst *)inst; qb_ipcc_disconnect(cmap_inst->c); } cs_error_t cmap_finalize(cmap_handle_t handle) { struct cmap_inst *cmap_inst; cs_error_t error; hdb_handle_t track_inst_handle = 0; struct cmap_track_inst *cmap_track_inst; error = hdb_error_to_cs(hdb_handle_get(&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } if (cmap_inst->finalize) { (void)hdb_handle_put (&cmap_handle_t_db, handle); return (CS_ERR_BAD_HANDLE); } cmap_inst->finalize = 1; /* * Destroy all track instances for given connection */ hdb_iterator_reset(&cmap_track_handle_t_db); while (hdb_iterator_next(&cmap_track_handle_t_db, (void*)&cmap_track_inst, &track_inst_handle) == 0) { if (cmap_track_inst->c == cmap_inst->c) { (void)hdb_handle_destroy(&cmap_track_handle_t_db, track_inst_handle); } (void)hdb_handle_put (&cmap_track_handle_t_db, track_inst_handle); } (void)hdb_handle_destroy(&cmap_handle_t_db, handle); (void)hdb_handle_put(&cmap_handle_t_db, handle); return (CS_OK); } cs_error_t cmap_fd_get(cmap_handle_t handle, int *fd) { cs_error_t error; struct cmap_inst *cmap_inst; error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } error = qb_to_cs_error (qb_ipcc_fd_get (cmap_inst->c, fd)); (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_dispatch ( cmap_handle_t handle, cs_dispatch_flags_t dispatch_types) { int timeout = -1; cs_error_t error; int cont = 1; /* always continue do loop except when set to 0 */ struct cmap_inst *cmap_inst; struct qb_ipc_response_header *dispatch_data; char dispatch_buf[IPC_DISPATCH_SIZE]; struct res_lib_cmap_notify_callback *res_lib_cmap_notify_callback; struct cmap_track_inst *cmap_track_inst; struct cmap_notify_value old_val; struct cmap_notify_value new_val; error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } /* * Timeout instantly for CS_DISPATCH_ONE_NONBLOCKING or CS_DISPATCH_ALL and * wait indefinately for CS_DISPATCH_ONE or CS_DISPATCH_BLOCKING */ if (dispatch_types == CS_DISPATCH_ALL || dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { timeout = 0; } dispatch_data = (struct qb_ipc_response_header *)dispatch_buf; do { error = qb_to_cs_error(qb_ipcc_event_recv ( cmap_inst->c, dispatch_buf, IPC_DISPATCH_SIZE, timeout)); if (error == CS_ERR_BAD_HANDLE) { error = CS_OK; goto error_put; } if (error == CS_ERR_TRY_AGAIN) { if (dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { /* * Don't mask error */ goto error_put; } error = CS_OK; if (dispatch_types == CS_DISPATCH_ALL) { break; /* exit do while cont is 1 loop */ } else { continue; /* next poll */ } } if (error != CS_OK) { goto error_put; } /* * Dispatch incoming message */ switch (dispatch_data->id) { case MESSAGE_RES_CMAP_NOTIFY_CALLBACK: res_lib_cmap_notify_callback = (struct res_lib_cmap_notify_callback *)dispatch_data; error = hdb_error_to_cs(hdb_handle_get(&cmap_track_handle_t_db, res_lib_cmap_notify_callback->track_inst_handle, (void *)&cmap_track_inst)); if (error == CS_ERR_BAD_HANDLE) { /* * User deleted tracker -> ignore error */ break; } if (error != CS_OK) { goto error_put; } new_val.type = res_lib_cmap_notify_callback->new_value_type; old_val.type = res_lib_cmap_notify_callback->old_value_type; new_val.len = res_lib_cmap_notify_callback->new_value_len; old_val.len = res_lib_cmap_notify_callback->old_value_len; new_val.data = res_lib_cmap_notify_callback->new_value; old_val.data = (((const char *)res_lib_cmap_notify_callback->new_value) + new_val.len); cmap_track_inst->notify_fn(handle, cmap_track_inst->track_handle, res_lib_cmap_notify_callback->event, (char *)res_lib_cmap_notify_callback->key_name.value, new_val, old_val, cmap_track_inst->user_data); (void)hdb_handle_put(&cmap_track_handle_t_db, res_lib_cmap_notify_callback->track_inst_handle); break; default: error = CS_ERR_LIBRARY; goto error_put; break; } if (cmap_inst->finalize) { /* * If the finalize has been called then get out of the dispatch. */ error = CS_ERR_BAD_HANDLE; goto error_put; } /* * Determine if more messages should be processed */ if (dispatch_types == CS_DISPATCH_ONE || dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { cont = 0; } } while (cont); error_put: (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_context_get ( cmap_handle_t handle, const void **context) { cs_error_t error; struct cmap_inst *cmap_inst; error = hdb_error_to_cs(hdb_handle_get(&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } *context = cmap_inst->context; (void)hdb_handle_put (&cmap_handle_t_db, handle); return (CS_OK); } cs_error_t cmap_context_set ( cmap_handle_t handle, const void *context) { cs_error_t error; struct cmap_inst *cmap_inst; error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } cmap_inst->context = context; (void)hdb_handle_put (&cmap_handle_t_db, handle); return (CS_OK); } cs_error_t cmap_set ( cmap_handle_t handle, const char *key_name, const void *value, size_t value_len, cmap_value_types_t type) { cs_error_t error; struct iovec iov[2]; struct cmap_inst *cmap_inst; struct req_lib_cmap_set req_lib_cmap_set; struct res_lib_cmap_set res_lib_cmap_set; if (key_name == NULL || value == NULL) { return (CS_ERR_INVALID_PARAM); } if (strlen(key_name) >= CS_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } memset(&req_lib_cmap_set, 0, sizeof(req_lib_cmap_set)); req_lib_cmap_set.header.size = sizeof(req_lib_cmap_set) + value_len; req_lib_cmap_set.header.id = MESSAGE_REQ_CMAP_SET; memcpy(req_lib_cmap_set.key_name.value, key_name, strlen(key_name)); req_lib_cmap_set.key_name.length = strlen(key_name); req_lib_cmap_set.value_len = value_len; req_lib_cmap_set.type = type; iov[0].iov_base = (char *)&req_lib_cmap_set; iov[0].iov_len = sizeof(req_lib_cmap_set); iov[1].iov_base = (void *)value; iov[1].iov_len = value_len; error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, iov, 2, &res_lib_cmap_set, sizeof (struct res_lib_cmap_set), CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_set.header.error; } (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_set_int8(cmap_handle_t handle, const char *key_name, int8_t value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_INT8)); } cs_error_t cmap_set_uint8(cmap_handle_t handle, const char *key_name, uint8_t value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_UINT8)); } cs_error_t cmap_set_int16(cmap_handle_t handle, const char *key_name, int16_t value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_INT16)); } cs_error_t cmap_set_uint16(cmap_handle_t handle, const char *key_name, uint16_t value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_UINT16)); } cs_error_t cmap_set_int32(cmap_handle_t handle, const char *key_name, int32_t value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_INT32)); } cs_error_t cmap_set_uint32(cmap_handle_t handle, const char *key_name, uint32_t value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_UINT32)); } cs_error_t cmap_set_int64(cmap_handle_t handle, const char *key_name, int64_t value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_INT64)); } cs_error_t cmap_set_uint64(cmap_handle_t handle, const char *key_name, uint64_t value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_UINT64)); } cs_error_t cmap_set_float(cmap_handle_t handle, const char *key_name, float value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_FLOAT)); } cs_error_t cmap_set_double(cmap_handle_t handle, const char *key_name, double value) { return (cmap_set(handle, key_name, &value, sizeof(value), CMAP_VALUETYPE_DOUBLE)); } cs_error_t cmap_set_string(cmap_handle_t handle, const char *key_name, const char *value) { if (value == NULL) { return (CS_ERR_INVALID_PARAM); } return (cmap_set(handle, key_name, value, strlen(value), CMAP_VALUETYPE_STRING)); } cs_error_t cmap_delete(cmap_handle_t handle, const char *key_name) { cs_error_t error; struct iovec iov; struct cmap_inst *cmap_inst; struct req_lib_cmap_delete req_lib_cmap_delete; struct res_lib_cmap_delete res_lib_cmap_delete; if (key_name == NULL) { return (CS_ERR_INVALID_PARAM); } if (strlen(key_name) >= CS_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } memset(&req_lib_cmap_delete, 0, sizeof(req_lib_cmap_delete)); req_lib_cmap_delete.header.size = sizeof(req_lib_cmap_delete); req_lib_cmap_delete.header.id = MESSAGE_REQ_CMAP_DELETE; memcpy(req_lib_cmap_delete.key_name.value, key_name, strlen(key_name)); req_lib_cmap_delete.key_name.length = strlen(key_name); iov.iov_base = (char *)&req_lib_cmap_delete; iov.iov_len = sizeof(req_lib_cmap_delete); error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, &iov, 1, &res_lib_cmap_delete, sizeof (struct res_lib_cmap_delete), CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_delete.header.error; } (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_get( cmap_handle_t handle, const char *key_name, void *value, size_t *value_len, cmap_value_types_t *type) { cs_error_t error; struct cmap_inst *cmap_inst; struct iovec iov; struct req_lib_cmap_get req_lib_cmap_get; struct res_lib_cmap_get *res_lib_cmap_get; size_t res_size; if (key_name == NULL) { return (CS_ERR_INVALID_PARAM); } if (strlen(key_name) >= CS_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } if (value != NULL && value_len == NULL) { return (CS_ERR_INVALID_PARAM); } error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } memset(&req_lib_cmap_get, 0, sizeof(req_lib_cmap_get)); req_lib_cmap_get.header.size = sizeof(req_lib_cmap_get); req_lib_cmap_get.header.id = MESSAGE_REQ_CMAP_GET; memcpy(req_lib_cmap_get.key_name.value, key_name, strlen(key_name)); req_lib_cmap_get.key_name.length = strlen(key_name); if (value != NULL && value_len != NULL) { req_lib_cmap_get.value_len = *value_len; } else { req_lib_cmap_get.value_len = 0; } iov.iov_base = (char *)&req_lib_cmap_get; iov.iov_len = sizeof(req_lib_cmap_get); res_size = sizeof(struct res_lib_cmap_get) + req_lib_cmap_get.value_len; res_lib_cmap_get = malloc(res_size); if (res_lib_cmap_get == NULL) { return (CS_ERR_NO_MEMORY); } error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, &iov, 1, res_lib_cmap_get, res_size, CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_get->header.error; } if (error == CS_OK) { if (type != NULL) { *type = res_lib_cmap_get->type; } if (value_len != NULL) { *value_len = res_lib_cmap_get->value_len; } if (value != NULL && value_len != NULL) { memcpy(value, res_lib_cmap_get->value, res_lib_cmap_get->value_len); } } free(res_lib_cmap_get); (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } static cs_error_t cmap_get_int( cmap_handle_t handle, const char *key_name, void *value, size_t value_size, cmap_value_types_t type) { char key_value[16]; size_t key_size; cs_error_t err; cmap_value_types_t key_type; key_size = sizeof(key_value); memset(key_value, 0, key_size); err = cmap_get(handle, key_name, key_value, &key_size, &key_type); if (err != CS_OK) return (err); if (key_type != type) { return (CS_ERR_INVALID_PARAM); } memcpy(value, key_value, value_size); return (CS_OK); } cs_error_t cmap_get_int8(cmap_handle_t handle, const char *key_name, int8_t *i8) { return (cmap_get_int(handle, key_name, i8, sizeof(*i8), CMAP_VALUETYPE_INT8)); } cs_error_t cmap_get_uint8(cmap_handle_t handle, const char *key_name, uint8_t *u8) { return (cmap_get_int(handle, key_name, u8, sizeof(*u8), CMAP_VALUETYPE_UINT8)); } cs_error_t cmap_get_int16(cmap_handle_t handle, const char *key_name, int16_t *i16) { return (cmap_get_int(handle, key_name, i16, sizeof(*i16), CMAP_VALUETYPE_INT16)); } cs_error_t cmap_get_uint16(cmap_handle_t handle, const char *key_name, uint16_t *u16) { return (cmap_get_int(handle, key_name, u16, sizeof(*u16), CMAP_VALUETYPE_UINT16)); } cs_error_t cmap_get_int32(cmap_handle_t handle, const char *key_name, int32_t *i32) { return (cmap_get_int(handle, key_name, i32, sizeof(*i32), CMAP_VALUETYPE_INT32)); } cs_error_t cmap_get_uint32(cmap_handle_t handle, const char *key_name, uint32_t *u32) { return (cmap_get_int(handle, key_name, u32, sizeof(*u32), CMAP_VALUETYPE_UINT32)); } cs_error_t cmap_get_int64(cmap_handle_t handle, const char *key_name, int64_t *i64) { return (cmap_get_int(handle, key_name, i64, sizeof(*i64), CMAP_VALUETYPE_INT64)); } cs_error_t cmap_get_uint64(cmap_handle_t handle, const char *key_name, uint64_t *u64) { return (cmap_get_int(handle, key_name, u64, sizeof(*u64), CMAP_VALUETYPE_UINT64)); } cs_error_t cmap_get_float(cmap_handle_t handle, const char *key_name, float *flt) { return (cmap_get_int(handle, key_name, flt, sizeof(*flt), CMAP_VALUETYPE_FLOAT)); } cs_error_t cmap_get_double(cmap_handle_t handle, const char *key_name, double *dbl) { return (cmap_get_int(handle, key_name, dbl, sizeof(*dbl), CMAP_VALUETYPE_DOUBLE)); } cs_error_t cmap_get_string(cmap_handle_t handle, const char *key_name, char **str) { cs_error_t res; size_t str_len; cmap_value_types_t type; res = cmap_get(handle, key_name, NULL, &str_len, &type); if (res != CS_OK || type != CMAP_VALUETYPE_STRING) { if (res == CS_OK) { res = CS_ERR_INVALID_PARAM; } goto return_error; } *str = malloc(str_len); if (*str == NULL) { res = CS_ERR_NO_MEMORY; goto return_error; } res = cmap_get(handle, key_name, *str, &str_len, &type); if (res != CS_OK) { free(*str); goto return_error; } return (CS_OK); return_error: return (res); } static cs_error_t cmap_adjust_int(cmap_handle_t handle, const char *key_name, int32_t step) { cs_error_t error; struct iovec iov; struct cmap_inst *cmap_inst; struct req_lib_cmap_adjust_int req_lib_cmap_adjust_int; struct res_lib_cmap_adjust_int res_lib_cmap_adjust_int; if (key_name == NULL) { return (CS_ERR_INVALID_PARAM); } if (strlen(key_name) >= CS_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } memset(&req_lib_cmap_adjust_int, 0, sizeof(req_lib_cmap_adjust_int)); req_lib_cmap_adjust_int.header.size = sizeof(req_lib_cmap_adjust_int); req_lib_cmap_adjust_int.header.id = MESSAGE_REQ_CMAP_ADJUST_INT; memcpy(req_lib_cmap_adjust_int.key_name.value, key_name, strlen(key_name)); req_lib_cmap_adjust_int.key_name.length = strlen(key_name); req_lib_cmap_adjust_int.step = step; iov.iov_base = (char *)&req_lib_cmap_adjust_int; iov.iov_len = sizeof(req_lib_cmap_adjust_int); error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, &iov, 1, &res_lib_cmap_adjust_int, sizeof (struct res_lib_cmap_adjust_int), CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_adjust_int.header.error; } (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_inc(cmap_handle_t handle, const char *key_name) { return (cmap_adjust_int(handle, key_name, 1)); } cs_error_t cmap_dec(cmap_handle_t handle, const char *key_name) { return (cmap_adjust_int(handle, key_name, -1)); } cs_error_t cmap_iter_init( cmap_handle_t handle, const char *prefix, cmap_iter_handle_t *cmap_iter_handle) { cs_error_t error; struct iovec iov; struct cmap_inst *cmap_inst; struct req_lib_cmap_iter_init req_lib_cmap_iter_init; struct res_lib_cmap_iter_init res_lib_cmap_iter_init; if (cmap_iter_handle == NULL) { return (CS_ERR_INVALID_PARAM); } error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } memset(&req_lib_cmap_iter_init, 0, sizeof(req_lib_cmap_iter_init)); req_lib_cmap_iter_init.header.size = sizeof(req_lib_cmap_iter_init); req_lib_cmap_iter_init.header.id = MESSAGE_REQ_CMAP_ITER_INIT; if (prefix) { if (strlen(prefix) >= CS_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } memcpy(req_lib_cmap_iter_init.prefix.value, prefix, strlen(prefix)); req_lib_cmap_iter_init.prefix.length = strlen(prefix); } iov.iov_base = (char *)&req_lib_cmap_iter_init; iov.iov_len = sizeof(req_lib_cmap_iter_init); error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, &iov, 1, &res_lib_cmap_iter_init, sizeof (struct res_lib_cmap_iter_init), CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_iter_init.header.error; } if (error == CS_OK) { *cmap_iter_handle = res_lib_cmap_iter_init.iter_handle; } (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_iter_next( cmap_handle_t handle, cmap_iter_handle_t iter_handle, char key_name[], size_t *value_len, cmap_value_types_t *type) { cs_error_t error; struct iovec iov; struct cmap_inst *cmap_inst; struct req_lib_cmap_iter_next req_lib_cmap_iter_next; struct res_lib_cmap_iter_next res_lib_cmap_iter_next; if (key_name == NULL) { return (CS_ERR_INVALID_PARAM); } error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } memset(&req_lib_cmap_iter_next, 0, sizeof(req_lib_cmap_iter_next)); req_lib_cmap_iter_next.header.size = sizeof(req_lib_cmap_iter_next); req_lib_cmap_iter_next.header.id = MESSAGE_REQ_CMAP_ITER_NEXT; req_lib_cmap_iter_next.iter_handle = iter_handle; iov.iov_base = (char *)&req_lib_cmap_iter_next; iov.iov_len = sizeof(req_lib_cmap_iter_next); error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, &iov, 1, &res_lib_cmap_iter_next, sizeof (struct res_lib_cmap_iter_next), CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_iter_next.header.error; } if (error == CS_OK) { strncpy(key_name, (const char *)res_lib_cmap_iter_next.key_name.value, CMAP_KEYNAME_MAXLEN); if (value_len != NULL) { *value_len = res_lib_cmap_iter_next.value_len; } if (type != NULL) { *type = res_lib_cmap_iter_next.type; } } (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_iter_finalize( cmap_handle_t handle, cmap_iter_handle_t iter_handle) { cs_error_t error; struct iovec iov; struct cmap_inst *cmap_inst; struct req_lib_cmap_iter_finalize req_lib_cmap_iter_finalize; struct res_lib_cmap_iter_finalize res_lib_cmap_iter_finalize; error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } memset(&req_lib_cmap_iter_finalize, 0, sizeof(req_lib_cmap_iter_finalize)); req_lib_cmap_iter_finalize.header.size = sizeof(req_lib_cmap_iter_finalize); req_lib_cmap_iter_finalize.header.id = MESSAGE_REQ_CMAP_ITER_FINALIZE; req_lib_cmap_iter_finalize.iter_handle = iter_handle; iov.iov_base = (char *)&req_lib_cmap_iter_finalize; iov.iov_len = sizeof(req_lib_cmap_iter_finalize); error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, &iov, 1, &res_lib_cmap_iter_finalize, sizeof (struct res_lib_cmap_iter_finalize), CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_iter_finalize.header.error; } (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_track_add( cmap_handle_t handle, const char *key_name, int32_t track_type, cmap_notify_fn_t notify_fn, void *user_data, cmap_track_handle_t *cmap_track_handle) { cs_error_t error; struct iovec iov; struct cmap_inst *cmap_inst; struct req_lib_cmap_track_add req_lib_cmap_track_add; struct res_lib_cmap_track_add res_lib_cmap_track_add; struct cmap_track_inst *cmap_track_inst; cmap_track_handle_t cmap_track_inst_handle; if (cmap_track_handle == NULL || notify_fn == NULL) { return (CS_ERR_INVALID_PARAM); } error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } error = hdb_error_to_cs(hdb_handle_create(&cmap_track_handle_t_db, sizeof(*cmap_track_inst), &cmap_track_inst_handle)); if (error != CS_OK) { goto error_put; } error = hdb_error_to_cs(hdb_handle_get(&cmap_track_handle_t_db, cmap_track_inst_handle, (void *)&cmap_track_inst)); if (error != CS_OK) { goto error_put_destroy; } cmap_track_inst->user_data = user_data; cmap_track_inst->notify_fn = notify_fn; cmap_track_inst->c = cmap_inst->c; memset(&req_lib_cmap_track_add, 0, sizeof(req_lib_cmap_track_add)); req_lib_cmap_track_add.header.size = sizeof(req_lib_cmap_track_add); req_lib_cmap_track_add.header.id = MESSAGE_REQ_CMAP_TRACK_ADD; if (key_name) { if (strlen(key_name) >= CS_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } memcpy(req_lib_cmap_track_add.key_name.value, key_name, strlen(key_name)); req_lib_cmap_track_add.key_name.length = strlen(key_name); } req_lib_cmap_track_add.track_type = track_type; req_lib_cmap_track_add.track_inst_handle = cmap_track_inst_handle; iov.iov_base = (char *)&req_lib_cmap_track_add; iov.iov_len = sizeof(req_lib_cmap_track_add); error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, &iov, 1, &res_lib_cmap_track_add, sizeof (struct res_lib_cmap_track_add), CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_track_add.header.error; } if (error == CS_OK) { *cmap_track_handle = res_lib_cmap_track_add.track_handle; cmap_track_inst->track_handle = *cmap_track_handle; } (void)hdb_handle_put (&cmap_track_handle_t_db, cmap_track_inst_handle); (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); error_put_destroy: (void)hdb_handle_put (&cmap_track_handle_t_db, cmap_track_inst_handle); (void)hdb_handle_destroy (&cmap_track_handle_t_db, cmap_track_inst_handle); error_put: (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } cs_error_t cmap_track_delete( cmap_handle_t handle, cmap_track_handle_t track_handle) { cs_error_t error; struct iovec iov; struct cmap_inst *cmap_inst; struct cmap_track_inst *cmap_track_inst; struct req_lib_cmap_track_delete req_lib_cmap_track_delete; struct res_lib_cmap_track_delete res_lib_cmap_track_delete; error = hdb_error_to_cs(hdb_handle_get (&cmap_handle_t_db, handle, (void *)&cmap_inst)); if (error != CS_OK) { return (error); } memset(&req_lib_cmap_track_delete, 0, sizeof(req_lib_cmap_track_delete)); req_lib_cmap_track_delete.header.size = sizeof(req_lib_cmap_track_delete); req_lib_cmap_track_delete.header.id = MESSAGE_REQ_CMAP_TRACK_DELETE; req_lib_cmap_track_delete.track_handle = track_handle; iov.iov_base = (char *)&req_lib_cmap_track_delete; iov.iov_len = sizeof(req_lib_cmap_track_delete); error = qb_to_cs_error(qb_ipcc_sendv_recv( cmap_inst->c, &iov, 1, &res_lib_cmap_track_delete, sizeof (struct res_lib_cmap_track_delete), CS_IPC_TIMEOUT_MS)); if (error == CS_OK) { error = res_lib_cmap_track_delete.header.error; } if (error == CS_OK) { error = hdb_error_to_cs(hdb_handle_get(&cmap_track_handle_t_db, res_lib_cmap_track_delete.track_inst_handle, (void *)&cmap_track_inst)); if (error != CS_OK) { goto error_put; } (void)hdb_handle_put(&cmap_track_handle_t_db, res_lib_cmap_track_delete.track_inst_handle); (void)hdb_handle_destroy(&cmap_track_handle_t_db, res_lib_cmap_track_delete.track_inst_handle); } error_put: (void)hdb_handle_put (&cmap_handle_t_db, handle); return (error); } diff --git a/lib/cpg.c b/lib/cpg.c index c831390b..bdff4756 100644 --- a/lib/cpg.c +++ b/lib/cpg.c @@ -1,1371 +1,1369 @@ /* * vi: set autoindent tabstop=4 shiftwidth=4 : * * Copyright (c) 2006-2015 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield (ccaulfi@redhat.com) * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * Provides a closed process group API using the coroipcc executive */ #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include -#include #include #include #include #include #include "util.h" #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif /* * Maximum number of times to retry a send when transmitting * a large message fragment */ #define MAX_RETRIES 100 /* * ZCB files have following umask (umask is same as used in libqb) */ #define CPG_MEMORY_MAP_UMASK 077 struct cpg_inst { qb_ipcc_connection_t *c; int finalize; void *context; union { cpg_model_data_t model_data; cpg_model_v1_data_t model_v1_data; }; - struct list_head iteration_list_head; + struct qb_list_head iteration_list_head; uint32_t max_msg_size; char *assembly_buf; uint32_t assembly_buf_ptr; int assembling; /* Flag that says we have started assembling a message. * It's here to catch the situation where a node joins * the cluster/group in the middle of a CPG message send * so we don't pass on a partial message to the client. */ }; static void cpg_inst_free (void *inst); DECLARE_HDB_DATABASE(cpg_handle_t_db, cpg_inst_free); struct cpg_iteration_instance_t { cpg_iteration_handle_t cpg_iteration_handle; qb_ipcc_connection_t *conn; hdb_handle_t executive_iteration_handle; - struct list_head list; + struct qb_list_head list; }; DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL); /* * Internal (not visible by API) functions */ static cs_error_t coroipcc_msg_send_reply_receive ( qb_ipcc_connection_t *c, const struct iovec *iov, unsigned int iov_len, void *res_msg, size_t res_len) { return qb_to_cs_error(qb_ipcc_sendv_recv(c, iov, iov_len, res_msg, res_len, CS_IPC_TIMEOUT_MS)); } static void cpg_iteration_instance_finalize (struct cpg_iteration_instance_t *cpg_iteration_instance) { - list_del (&cpg_iteration_instance->list); + qb_list_del (&cpg_iteration_instance->list); hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->cpg_iteration_handle); } static void cpg_inst_free (void *inst) { struct cpg_inst *cpg_inst = (struct cpg_inst *)inst; qb_ipcc_disconnect(cpg_inst->c); } static void cpg_inst_finalize (struct cpg_inst *cpg_inst, hdb_handle_t handle) { - struct list_head *iter, *iter_next; + struct qb_list_head *iter; struct cpg_iteration_instance_t *cpg_iteration_instance; /* * Traverse thru iteration instances and delete them */ - for (iter = cpg_inst->iteration_list_head.next; iter != &cpg_inst->iteration_list_head;iter = iter_next) { - iter_next = iter->next; - - cpg_iteration_instance = list_entry (iter, struct cpg_iteration_instance_t, list); + qb_list_for_each(iter, &(cpg_inst->iteration_list_head)) { + cpg_iteration_instance = qb_list_entry (iter, struct cpg_iteration_instance_t, list); cpg_iteration_instance_finalize (cpg_iteration_instance); } hdb_handle_destroy (&cpg_handle_t_db, handle); } /** * @defgroup cpg_coroipcc The closed process group API * @ingroup coroipcc * * @{ */ cs_error_t cpg_initialize ( cpg_handle_t *handle, cpg_callbacks_t *callbacks) { cpg_model_v1_data_t model_v1_data; memset (&model_v1_data, 0, sizeof (cpg_model_v1_data_t)); if (callbacks) { model_v1_data.cpg_deliver_fn = callbacks->cpg_deliver_fn; model_v1_data.cpg_confchg_fn = callbacks->cpg_confchg_fn; } return (cpg_model_initialize (handle, CPG_MODEL_V1, (cpg_model_data_t *)&model_v1_data, NULL)); } cs_error_t cpg_model_initialize ( cpg_handle_t *handle, cpg_model_t model, cpg_model_data_t *model_data, void *context) { cs_error_t error; struct cpg_inst *cpg_inst; if (model != CPG_MODEL_V1) { error = CS_ERR_INVALID_PARAM; goto error_no_destroy; } error = hdb_error_to_cs (hdb_handle_create (&cpg_handle_t_db, sizeof (struct cpg_inst), handle)); if (error != CS_OK) { goto error_no_destroy; } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, *handle, (void *)&cpg_inst)); if (error != CS_OK) { goto error_destroy; } cpg_inst->c = qb_ipcc_connect ("cpg", IPC_REQUEST_SIZE); if (cpg_inst->c == NULL) { error = qb_to_cs_error(-errno); goto error_put_destroy; } if (model_data != NULL) { switch (model) { case CPG_MODEL_V1: memcpy (&cpg_inst->model_v1_data, model_data, sizeof (cpg_model_v1_data_t)); if ((cpg_inst->model_v1_data.flags & ~(CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF)) != 0) { error = CS_ERR_INVALID_PARAM; goto error_destroy; } break; } } /* Allow space for corosync internal headers */ cpg_inst->max_msg_size = IPC_REQUEST_SIZE - 1024; cpg_inst->model_data.model = model; cpg_inst->context = context; - list_init(&cpg_inst->iteration_list_head); + qb_list_init(&cpg_inst->iteration_list_head); hdb_handle_put (&cpg_handle_t_db, *handle); return (CS_OK); error_put_destroy: hdb_handle_put (&cpg_handle_t_db, *handle); error_destroy: hdb_handle_destroy (&cpg_handle_t_db, *handle); error_no_destroy: return (error); } cs_error_t cpg_finalize ( cpg_handle_t handle) { struct cpg_inst *cpg_inst; struct iovec iov; struct req_lib_cpg_finalize req_lib_cpg_finalize; struct res_lib_cpg_finalize res_lib_cpg_finalize; cs_error_t error; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } /* * Another thread has already started finalizing */ if (cpg_inst->finalize) { hdb_handle_put (&cpg_handle_t_db, handle); return (CS_ERR_BAD_HANDLE); } cpg_inst->finalize = 1; /* * Send service request */ req_lib_cpg_finalize.header.size = sizeof (struct req_lib_cpg_finalize); req_lib_cpg_finalize.header.id = MESSAGE_REQ_CPG_FINALIZE; iov.iov_base = (void *)&req_lib_cpg_finalize; iov.iov_len = sizeof (struct req_lib_cpg_finalize); error = coroipcc_msg_send_reply_receive (cpg_inst->c, &iov, 1, &res_lib_cpg_finalize, sizeof (struct res_lib_cpg_finalize)); cpg_inst_finalize (cpg_inst, handle); hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_fd_get ( cpg_handle_t handle, int *fd) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } error = qb_to_cs_error (qb_ipcc_fd_get (cpg_inst->c, fd)); hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_max_atomic_msgsize_get ( cpg_handle_t handle, uint32_t *size) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } *size = cpg_inst->max_msg_size; hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_context_get ( cpg_handle_t handle, void **context) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } *context = cpg_inst->context; hdb_handle_put (&cpg_handle_t_db, handle); return (CS_OK); } cs_error_t cpg_context_set ( cpg_handle_t handle, void *context) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } cpg_inst->context = context; hdb_handle_put (&cpg_handle_t_db, handle); return (CS_OK); } cs_error_t cpg_dispatch ( cpg_handle_t handle, cs_dispatch_flags_t dispatch_types) { int timeout = -1; cs_error_t error; int cont = 1; /* always continue do loop except when set to 0 */ struct cpg_inst *cpg_inst; struct res_lib_cpg_confchg_callback *res_cpg_confchg_callback; struct res_lib_cpg_deliver_callback *res_cpg_deliver_callback; struct res_lib_cpg_partial_deliver_callback *res_cpg_partial_deliver_callback; struct res_lib_cpg_totem_confchg_callback *res_cpg_totem_confchg_callback; struct cpg_inst cpg_inst_copy; struct qb_ipc_response_header *dispatch_data; struct cpg_address member_list[CPG_MEMBERS_MAX]; struct cpg_address left_list[CPG_MEMBERS_MAX]; struct cpg_address joined_list[CPG_MEMBERS_MAX]; struct cpg_name group_name; mar_cpg_address_t *left_list_start; mar_cpg_address_t *joined_list_start; unsigned int i; struct cpg_ring_id ring_id; uint32_t totem_member_list[CPG_MEMBERS_MAX]; int32_t errno_res; char dispatch_buf[IPC_DISPATCH_SIZE]; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } /* * Timeout instantly for CS_DISPATCH_ONE_NONBLOCKING or CS_DISPATCH_ALL and * wait indefinitely for CS_DISPATCH_ONE or CS_DISPATCH_BLOCKING */ if (dispatch_types == CS_DISPATCH_ALL || dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { timeout = 0; } dispatch_data = (struct qb_ipc_response_header *)dispatch_buf; do { errno_res = qb_ipcc_event_recv ( cpg_inst->c, dispatch_buf, IPC_DISPATCH_SIZE, timeout); error = qb_to_cs_error (errno_res); if (error == CS_ERR_BAD_HANDLE) { error = CS_OK; goto error_put; } if (error == CS_ERR_TRY_AGAIN) { if (dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { /* * Don't mask error */ goto error_put; } error = CS_OK; if (dispatch_types == CS_DISPATCH_ALL) { break; /* exit do while cont is 1 loop */ } else { continue; /* next poll */ } } if (error != CS_OK) { goto error_put; } /* * Make copy of callbacks, message data, unlock instance, and call callback * A risk of this dispatch method is that the callback routines may * operate at the same time that cpgFinalize has been called. */ memcpy (&cpg_inst_copy, cpg_inst, sizeof (struct cpg_inst)); switch (cpg_inst_copy.model_data.model) { case CPG_MODEL_V1: /* * Dispatch incoming message */ switch (dispatch_data->id) { case MESSAGE_RES_CPG_DELIVER_CALLBACK: if (cpg_inst_copy.model_v1_data.cpg_deliver_fn == NULL) { break; } res_cpg_deliver_callback = (struct res_lib_cpg_deliver_callback *)dispatch_data; marshall_from_mar_cpg_name_t ( &group_name, &res_cpg_deliver_callback->group_name); cpg_inst_copy.model_v1_data.cpg_deliver_fn (handle, &group_name, res_cpg_deliver_callback->nodeid, res_cpg_deliver_callback->pid, &res_cpg_deliver_callback->message, res_cpg_deliver_callback->msglen); break; case MESSAGE_RES_CPG_PARTIAL_DELIVER_CALLBACK: res_cpg_partial_deliver_callback = (struct res_lib_cpg_partial_deliver_callback *)dispatch_data; marshall_from_mar_cpg_name_t ( &group_name, &res_cpg_partial_deliver_callback->group_name); if (res_cpg_partial_deliver_callback->type == LIBCPG_PARTIAL_FIRST) { /* * Allocate a buffer to contain a full message. */ cpg_inst->assembly_buf = malloc(res_cpg_partial_deliver_callback->msglen); if (!cpg_inst->assembly_buf) { error = CS_ERR_NO_MEMORY; goto error_put; } cpg_inst->assembling = 1; cpg_inst->assembly_buf_ptr = 0; } if (cpg_inst->assembling) { memcpy(cpg_inst->assembly_buf + cpg_inst->assembly_buf_ptr, res_cpg_partial_deliver_callback->message, res_cpg_partial_deliver_callback->fraglen); cpg_inst->assembly_buf_ptr += res_cpg_partial_deliver_callback->fraglen; if (res_cpg_partial_deliver_callback->type == LIBCPG_PARTIAL_LAST) { cpg_inst_copy.model_v1_data.cpg_deliver_fn (handle, &group_name, res_cpg_partial_deliver_callback->nodeid, res_cpg_partial_deliver_callback->pid, cpg_inst->assembly_buf, res_cpg_partial_deliver_callback->msglen); free(cpg_inst->assembly_buf); cpg_inst->assembling = 0; } } break; case MESSAGE_RES_CPG_CONFCHG_CALLBACK: if (cpg_inst_copy.model_v1_data.cpg_confchg_fn == NULL) { break; } res_cpg_confchg_callback = (struct res_lib_cpg_confchg_callback *)dispatch_data; for (i = 0; i < res_cpg_confchg_callback->member_list_entries; i++) { marshall_from_mar_cpg_address_t (&member_list[i], &res_cpg_confchg_callback->member_list[i]); } left_list_start = res_cpg_confchg_callback->member_list + res_cpg_confchg_callback->member_list_entries; for (i = 0; i < res_cpg_confchg_callback->left_list_entries; i++) { marshall_from_mar_cpg_address_t (&left_list[i], &left_list_start[i]); } joined_list_start = res_cpg_confchg_callback->member_list + res_cpg_confchg_callback->member_list_entries + res_cpg_confchg_callback->left_list_entries; for (i = 0; i < res_cpg_confchg_callback->joined_list_entries; i++) { marshall_from_mar_cpg_address_t (&joined_list[i], &joined_list_start[i]); } marshall_from_mar_cpg_name_t ( &group_name, &res_cpg_confchg_callback->group_name); cpg_inst_copy.model_v1_data.cpg_confchg_fn (handle, &group_name, member_list, res_cpg_confchg_callback->member_list_entries, left_list, res_cpg_confchg_callback->left_list_entries, joined_list, res_cpg_confchg_callback->joined_list_entries); break; case MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK: if (cpg_inst_copy.model_v1_data.cpg_totem_confchg_fn == NULL) { break; } res_cpg_totem_confchg_callback = (struct res_lib_cpg_totem_confchg_callback *)dispatch_data; marshall_from_mar_cpg_ring_id_t (&ring_id, &res_cpg_totem_confchg_callback->ring_id); for (i = 0; i < res_cpg_totem_confchg_callback->member_list_entries; i++) { totem_member_list[i] = res_cpg_totem_confchg_callback->member_list[i]; } cpg_inst_copy.model_v1_data.cpg_totem_confchg_fn (handle, ring_id, res_cpg_totem_confchg_callback->member_list_entries, totem_member_list); break; default: error = CS_ERR_LIBRARY; goto error_put; break; } /* - switch (dispatch_data->id) */ break; /* case CPG_MODEL_V1 */ } /* - switch (cpg_inst_copy.model_data.model) */ if (cpg_inst_copy.finalize || cpg_inst->finalize) { /* * If the finalize has been called then get out of the dispatch. */ cpg_inst->finalize = 1; error = CS_ERR_BAD_HANDLE; goto error_put; } /* * Determine if more messages should be processed */ if (dispatch_types == CS_DISPATCH_ONE || dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { cont = 0; } } while (cont); error_put: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_join ( cpg_handle_t handle, const struct cpg_name *group) { cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov[2]; struct req_lib_cpg_join req_lib_cpg_join; struct res_lib_cpg_join response; if (group->length > CPG_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } /* Now join */ req_lib_cpg_join.header.size = sizeof (struct req_lib_cpg_join); req_lib_cpg_join.header.id = MESSAGE_REQ_CPG_JOIN; req_lib_cpg_join.pid = getpid(); req_lib_cpg_join.flags = 0; switch (cpg_inst->model_data.model) { case CPG_MODEL_V1: req_lib_cpg_join.flags = cpg_inst->model_v1_data.flags; break; } marshall_to_mar_cpg_name_t (&req_lib_cpg_join.group_name, group); iov[0].iov_base = (void *)&req_lib_cpg_join; iov[0].iov_len = sizeof (struct req_lib_cpg_join); do { error = coroipcc_msg_send_reply_receive (cpg_inst->c, iov, 1, &response, sizeof (struct res_lib_cpg_join)); if (error != CS_OK) { goto error_exit; } } while (response.header.error == CS_ERR_BUSY); error = response.header.error; error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_leave ( cpg_handle_t handle, const struct cpg_name *group) { cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov[2]; struct req_lib_cpg_leave req_lib_cpg_leave; struct res_lib_cpg_leave res_lib_cpg_leave; if (group->length > CPG_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } req_lib_cpg_leave.header.size = sizeof (struct req_lib_cpg_leave); req_lib_cpg_leave.header.id = MESSAGE_REQ_CPG_LEAVE; req_lib_cpg_leave.pid = getpid(); marshall_to_mar_cpg_name_t (&req_lib_cpg_leave.group_name, group); iov[0].iov_base = (void *)&req_lib_cpg_leave; iov[0].iov_len = sizeof (struct req_lib_cpg_leave); do { error = coroipcc_msg_send_reply_receive (cpg_inst->c, iov, 1, &res_lib_cpg_leave, sizeof (struct res_lib_cpg_leave)); if (error != CS_OK) { goto error_exit; } } while (res_lib_cpg_leave.header.error == CS_ERR_BUSY); error = res_lib_cpg_leave.header.error; error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_membership_get ( cpg_handle_t handle, struct cpg_name *group_name, struct cpg_address *member_list, int *member_list_entries) { cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov; struct req_lib_cpg_membership_get req_lib_cpg_membership_get; struct res_lib_cpg_membership_get res_lib_cpg_membership_get; unsigned int i; if (group_name->length > CPG_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } if (member_list == NULL) { return (CS_ERR_INVALID_PARAM); } if (member_list_entries == NULL) { return (CS_ERR_INVALID_PARAM); } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } req_lib_cpg_membership_get.header.size = sizeof (struct req_lib_cpg_membership_get); req_lib_cpg_membership_get.header.id = MESSAGE_REQ_CPG_MEMBERSHIP; marshall_to_mar_cpg_name_t (&req_lib_cpg_membership_get.group_name, group_name); iov.iov_base = (void *)&req_lib_cpg_membership_get; iov.iov_len = sizeof (struct req_lib_cpg_membership_get); error = coroipcc_msg_send_reply_receive (cpg_inst->c, &iov, 1, &res_lib_cpg_membership_get, sizeof (res_lib_cpg_membership_get)); if (error != CS_OK) { goto error_exit; } error = res_lib_cpg_membership_get.header.error; /* * Copy results to caller */ *member_list_entries = res_lib_cpg_membership_get.member_count; if (member_list) { for (i = 0; i < res_lib_cpg_membership_get.member_count; i++) { marshall_from_mar_cpg_address_t (&member_list[i], &res_lib_cpg_membership_get.member_list[i]); } } error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_local_get ( cpg_handle_t handle, unsigned int *local_nodeid) { cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov; struct req_lib_cpg_local_get req_lib_cpg_local_get; struct res_lib_cpg_local_get res_lib_cpg_local_get; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } req_lib_cpg_local_get.header.size = sizeof (struct qb_ipc_request_header); req_lib_cpg_local_get.header.id = MESSAGE_REQ_CPG_LOCAL_GET; iov.iov_base = (void *)&req_lib_cpg_local_get; iov.iov_len = sizeof (struct req_lib_cpg_local_get); error = coroipcc_msg_send_reply_receive (cpg_inst->c, &iov, 1, &res_lib_cpg_local_get, sizeof (res_lib_cpg_local_get)); if (error != CS_OK) { goto error_exit; } error = res_lib_cpg_local_get.header.error; *local_nodeid = res_lib_cpg_local_get.local_nodeid; error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_flow_control_state_get ( cpg_handle_t handle, cpg_flow_control_state_t *flow_control_state) { cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } *flow_control_state = CPG_FLOW_CONTROL_DISABLED; error = CS_OK; hdb_handle_put (&cpg_handle_t_db, handle); return (error); } static int memory_map (char *path, const char *file, void **buf, size_t bytes) { int32_t fd; void *addr; int32_t res; char *buffer; int32_t i; size_t written; size_t page_size; long int sysconf_page_size; mode_t old_umask; snprintf (path, PATH_MAX, "/dev/shm/%s", file); old_umask = umask(CPG_MEMORY_MAP_UMASK); fd = mkstemp (path); (void)umask(old_umask); if (fd == -1) { snprintf (path, PATH_MAX, LOCALSTATEDIR "/run/%s", file); old_umask = umask(CPG_MEMORY_MAP_UMASK); fd = mkstemp (path); (void)umask(old_umask); if (fd == -1) { return (-1); } } res = ftruncate (fd, bytes); if (res == -1) { goto error_close_unlink; } sysconf_page_size = sysconf(_SC_PAGESIZE); if (sysconf_page_size <= 0) { goto error_close_unlink; } page_size = sysconf_page_size; buffer = malloc (page_size); if (buffer == NULL) { goto error_close_unlink; } memset (buffer, 0, page_size); for (i = 0; i < (bytes / page_size); i++) { retry_write: written = write (fd, buffer, page_size); if (written == -1 && errno == EINTR) { goto retry_write; } if (written != page_size) { free (buffer); goto error_close_unlink; } } free (buffer); addr = mmap (NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { goto error_close_unlink; } #ifdef MADV_NOSYNC madvise(addr, bytes, MADV_NOSYNC); #endif res = close (fd); if (res) { munmap(addr, bytes); return (-1); } *buf = addr; return 0; error_close_unlink: close (fd); unlink(path); return -1; } cs_error_t cpg_zcb_alloc ( cpg_handle_t handle, size_t size, void **buffer) { void *buf = NULL; char path[PATH_MAX]; mar_req_coroipcc_zc_alloc_t req_coroipcc_zc_alloc; struct qb_ipc_response_header res_coroipcs_zc_alloc; size_t map_size; struct iovec iovec; struct coroipcs_zc_header *hdr; cs_error_t error; struct cpg_inst *cpg_inst; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } map_size = size + sizeof (struct req_lib_cpg_mcast) + sizeof (struct coroipcs_zc_header); assert(memory_map (path, "corosync_zerocopy-XXXXXX", &buf, map_size) != -1); if (strlen(path) >= CPG_ZC_PATH_LEN) { unlink(path); munmap (buf, map_size); return (CS_ERR_NAME_TOO_LONG); } req_coroipcc_zc_alloc.header.size = sizeof (mar_req_coroipcc_zc_alloc_t); req_coroipcc_zc_alloc.header.id = MESSAGE_REQ_CPG_ZC_ALLOC; req_coroipcc_zc_alloc.map_size = map_size; strcpy (req_coroipcc_zc_alloc.path_to_file, path); iovec.iov_base = (void *)&req_coroipcc_zc_alloc; iovec.iov_len = sizeof (mar_req_coroipcc_zc_alloc_t); error = coroipcc_msg_send_reply_receive ( cpg_inst->c, &iovec, 1, &res_coroipcs_zc_alloc, sizeof (struct qb_ipc_response_header)); if (error != CS_OK) { goto error_exit; } hdr = (struct coroipcs_zc_header *)buf; hdr->map_size = map_size; *buffer = ((char *)buf) + sizeof (struct coroipcs_zc_header) + sizeof (struct req_lib_cpg_mcast); error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_zcb_free ( cpg_handle_t handle, void *buffer) { cs_error_t error; unsigned int res; struct cpg_inst *cpg_inst; mar_req_coroipcc_zc_free_t req_coroipcc_zc_free; struct qb_ipc_response_header res_coroipcs_zc_free; struct iovec iovec; struct coroipcs_zc_header *header = (struct coroipcs_zc_header *)((char *)buffer - sizeof (struct coroipcs_zc_header) - sizeof (struct req_lib_cpg_mcast)); error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } req_coroipcc_zc_free.header.size = sizeof (mar_req_coroipcc_zc_free_t); req_coroipcc_zc_free.header.id = MESSAGE_REQ_CPG_ZC_FREE; req_coroipcc_zc_free.map_size = header->map_size; req_coroipcc_zc_free.server_address = header->server_address; iovec.iov_base = (void *)&req_coroipcc_zc_free; iovec.iov_len = sizeof (mar_req_coroipcc_zc_free_t); error = coroipcc_msg_send_reply_receive ( cpg_inst->c, &iovec, 1, &res_coroipcs_zc_free, sizeof (struct qb_ipc_response_header)); if (error != CS_OK) { goto error_exit; } res = munmap ((void *)header, header->map_size); if (res == -1) { error = qb_to_cs_error(-errno); goto error_exit; } error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_zcb_mcast_joined ( cpg_handle_t handle, cpg_guarantee_t guarantee, void *msg, size_t msg_len) { cs_error_t error; struct cpg_inst *cpg_inst; struct req_lib_cpg_mcast *req_lib_cpg_mcast; struct res_lib_cpg_mcast res_lib_cpg_mcast; mar_req_coroipcc_zc_execute_t req_coroipcc_zc_execute; struct coroipcs_zc_header *hdr; struct iovec iovec; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } if (msg_len > IPC_REQUEST_SIZE) { error = CS_ERR_TOO_BIG; goto error_exit; } req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)(((char *)msg) - sizeof (struct req_lib_cpg_mcast)); req_lib_cpg_mcast->header.size = sizeof (struct req_lib_cpg_mcast) + msg_len; req_lib_cpg_mcast->header.id = MESSAGE_REQ_CPG_MCAST; req_lib_cpg_mcast->guarantee = guarantee; req_lib_cpg_mcast->msglen = msg_len; hdr = (struct coroipcs_zc_header *)(((char *)req_lib_cpg_mcast) - sizeof (struct coroipcs_zc_header)); req_coroipcc_zc_execute.header.size = sizeof (mar_req_coroipcc_zc_execute_t); req_coroipcc_zc_execute.header.id = MESSAGE_REQ_CPG_ZC_EXECUTE; req_coroipcc_zc_execute.server_address = hdr->server_address; iovec.iov_base = (void *)&req_coroipcc_zc_execute; iovec.iov_len = sizeof (mar_req_coroipcc_zc_execute_t); error = coroipcc_msg_send_reply_receive ( cpg_inst->c, &iovec, 1, &res_lib_cpg_mcast, sizeof(res_lib_cpg_mcast)); if (error != CS_OK) { goto error_exit; } error = res_lib_cpg_mcast.header.error; error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } static cs_error_t send_fragments ( struct cpg_inst *cpg_inst, cpg_guarantee_t guarantee, size_t msg_len, const struct iovec *iovec, unsigned int iov_len) { int i; cs_error_t error = CS_OK; struct iovec iov[2]; struct req_lib_cpg_partial_mcast req_lib_cpg_mcast; struct res_lib_cpg_partial_send res_lib_cpg_partial_send; size_t sent = 0; size_t iov_sent = 0; int retry_count; req_lib_cpg_mcast.header.id = MESSAGE_REQ_CPG_PARTIAL_MCAST; req_lib_cpg_mcast.guarantee = guarantee; req_lib_cpg_mcast.msglen = msg_len; iov[0].iov_base = (void *)&req_lib_cpg_mcast; iov[0].iov_len = sizeof (struct req_lib_cpg_partial_mcast); i=0; iov_sent = 0 ; qb_ipcc_fc_enable_max_set(cpg_inst->c, 2); while (error == CS_OK && sent < msg_len) { retry_count = 0; if ( (iovec[i].iov_len - iov_sent) > cpg_inst->max_msg_size) { iov[1].iov_len = cpg_inst->max_msg_size; } else { iov[1].iov_len = iovec[i].iov_len - iov_sent; } if (sent == 0) { req_lib_cpg_mcast.type = LIBCPG_PARTIAL_FIRST; } else if ((sent + iov[1].iov_len) == msg_len) { req_lib_cpg_mcast.type = LIBCPG_PARTIAL_LAST; } else { req_lib_cpg_mcast.type = LIBCPG_PARTIAL_CONTINUED; } req_lib_cpg_mcast.fraglen = iov[1].iov_len; req_lib_cpg_mcast.header.size = sizeof (struct req_lib_cpg_partial_mcast) + iov[1].iov_len; iov[1].iov_base = (char *)iovec[i].iov_base + iov_sent; resend: error = coroipcc_msg_send_reply_receive (cpg_inst->c, iov, 2, &res_lib_cpg_partial_send, sizeof (res_lib_cpg_partial_send)); if (error == CS_ERR_TRY_AGAIN) { fprintf(stderr, "sleep. counter=%d\n", retry_count); if (++retry_count > MAX_RETRIES) { goto error_exit; } usleep(10000); goto resend; } iov_sent += iov[1].iov_len; sent += iov[1].iov_len; /* Next iovec */ if (iov_sent >= iovec[i].iov_len) { i++; iov_sent = 0; } error = res_lib_cpg_partial_send.header.error; } error_exit: qb_ipcc_fc_enable_max_set(cpg_inst->c, 1); return error; } cs_error_t cpg_mcast_joined ( cpg_handle_t handle, cpg_guarantee_t guarantee, const struct iovec *iovec, unsigned int iov_len) { int i; cs_error_t error; struct cpg_inst *cpg_inst; struct iovec iov[64]; struct req_lib_cpg_mcast req_lib_cpg_mcast; size_t msg_len = 0; error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } for (i = 0; i < iov_len; i++ ) { msg_len += iovec[i].iov_len; } if (msg_len > cpg_inst->max_msg_size) { error = send_fragments(cpg_inst, guarantee, msg_len, iovec, iov_len); goto error_exit; } req_lib_cpg_mcast.header.size = sizeof (struct req_lib_cpg_mcast) + msg_len; req_lib_cpg_mcast.header.id = MESSAGE_REQ_CPG_MCAST; req_lib_cpg_mcast.guarantee = guarantee; req_lib_cpg_mcast.msglen = msg_len; iov[0].iov_base = (void *)&req_lib_cpg_mcast; iov[0].iov_len = sizeof (struct req_lib_cpg_mcast); memcpy (&iov[1], iovec, iov_len * sizeof (struct iovec)); qb_ipcc_fc_enable_max_set(cpg_inst->c, 2); error = qb_to_cs_error(qb_ipcc_sendv(cpg_inst->c, iov, iov_len + 1)); qb_ipcc_fc_enable_max_set(cpg_inst->c, 1); error_exit: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_iteration_initialize( cpg_handle_t handle, cpg_iteration_type_t iteration_type, const struct cpg_name *group, cpg_iteration_handle_t *cpg_iteration_handle) { cs_error_t error; struct iovec iov; struct cpg_inst *cpg_inst; struct cpg_iteration_instance_t *cpg_iteration_instance; struct req_lib_cpg_iterationinitialize req_lib_cpg_iterationinitialize; struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize; if (group && group->length > CPG_MAX_NAME_LENGTH) { return (CS_ERR_NAME_TOO_LONG); } if (cpg_iteration_handle == NULL) { return (CS_ERR_INVALID_PARAM); } if ((iteration_type == CPG_ITERATION_ONE_GROUP && group == NULL) || (iteration_type != CPG_ITERATION_ONE_GROUP && group != NULL)) { return (CS_ERR_INVALID_PARAM); } if (iteration_type != CPG_ITERATION_NAME_ONLY && iteration_type != CPG_ITERATION_ONE_GROUP && iteration_type != CPG_ITERATION_ALL) { return (CS_ERR_INVALID_PARAM); } error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); if (error != CS_OK) { return (error); } error = hdb_error_to_cs (hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance_t), cpg_iteration_handle)); if (error != CS_OK) { goto error_put_cpg_db; } error = hdb_error_to_cs (hdb_handle_get (&cpg_iteration_handle_t_db, *cpg_iteration_handle, (void *)&cpg_iteration_instance)); if (error != CS_OK) { goto error_destroy; } cpg_iteration_instance->conn = cpg_inst->c; - list_init (&cpg_iteration_instance->list); + qb_list_init (&cpg_iteration_instance->list); req_lib_cpg_iterationinitialize.header.size = sizeof (struct req_lib_cpg_iterationinitialize); req_lib_cpg_iterationinitialize.header.id = MESSAGE_REQ_CPG_ITERATIONINITIALIZE; req_lib_cpg_iterationinitialize.iteration_type = iteration_type; if (group) { marshall_to_mar_cpg_name_t (&req_lib_cpg_iterationinitialize.group_name, group); } iov.iov_base = (void *)&req_lib_cpg_iterationinitialize; iov.iov_len = sizeof (struct req_lib_cpg_iterationinitialize); error = coroipcc_msg_send_reply_receive (cpg_inst->c, &iov, 1, &res_lib_cpg_iterationinitialize, sizeof (struct res_lib_cpg_iterationinitialize)); if (error != CS_OK) { goto error_put_destroy; } cpg_iteration_instance->executive_iteration_handle = res_lib_cpg_iterationinitialize.iteration_handle; cpg_iteration_instance->cpg_iteration_handle = *cpg_iteration_handle; - list_add (&cpg_iteration_instance->list, &cpg_inst->iteration_list_head); + qb_list_add (&cpg_iteration_instance->list, &cpg_inst->iteration_list_head); hdb_handle_put (&cpg_iteration_handle_t_db, *cpg_iteration_handle); hdb_handle_put (&cpg_handle_t_db, handle); return (res_lib_cpg_iterationinitialize.header.error); error_put_destroy: hdb_handle_put (&cpg_iteration_handle_t_db, *cpg_iteration_handle); error_destroy: hdb_handle_destroy (&cpg_iteration_handle_t_db, *cpg_iteration_handle); error_put_cpg_db: hdb_handle_put (&cpg_handle_t_db, handle); return (error); } cs_error_t cpg_iteration_next( cpg_iteration_handle_t handle, struct cpg_iteration_description_t *description) { cs_error_t error; struct cpg_iteration_instance_t *cpg_iteration_instance; struct req_lib_cpg_iterationnext req_lib_cpg_iterationnext; struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext; if (description == NULL) { return CS_ERR_INVALID_PARAM; } error = hdb_error_to_cs (hdb_handle_get (&cpg_iteration_handle_t_db, handle, (void *)&cpg_iteration_instance)); if (error != CS_OK) { goto error_exit; } req_lib_cpg_iterationnext.header.size = sizeof (struct req_lib_cpg_iterationnext); req_lib_cpg_iterationnext.header.id = MESSAGE_REQ_CPG_ITERATIONNEXT; req_lib_cpg_iterationnext.iteration_handle = cpg_iteration_instance->executive_iteration_handle; error = qb_to_cs_error (qb_ipcc_send (cpg_iteration_instance->conn, &req_lib_cpg_iterationnext, req_lib_cpg_iterationnext.header.size)); if (error != CS_OK) { goto error_put; } error = qb_to_cs_error (qb_ipcc_recv (cpg_iteration_instance->conn, &res_lib_cpg_iterationnext, sizeof(struct res_lib_cpg_iterationnext), -1)); if (error != CS_OK) { goto error_put; } marshall_from_mar_cpg_iteration_description_t( description, &res_lib_cpg_iterationnext.description); error = res_lib_cpg_iterationnext.header.error; error_put: hdb_handle_put (&cpg_iteration_handle_t_db, handle); error_exit: return (error); } cs_error_t cpg_iteration_finalize ( cpg_iteration_handle_t handle) { cs_error_t error; struct iovec iov; struct cpg_iteration_instance_t *cpg_iteration_instance; struct req_lib_cpg_iterationfinalize req_lib_cpg_iterationfinalize; struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize; error = hdb_error_to_cs (hdb_handle_get (&cpg_iteration_handle_t_db, handle, (void *)&cpg_iteration_instance)); if (error != CS_OK) { goto error_exit; } req_lib_cpg_iterationfinalize.header.size = sizeof (struct req_lib_cpg_iterationfinalize); req_lib_cpg_iterationfinalize.header.id = MESSAGE_REQ_CPG_ITERATIONFINALIZE; req_lib_cpg_iterationfinalize.iteration_handle = cpg_iteration_instance->executive_iteration_handle; iov.iov_base = (void *)&req_lib_cpg_iterationfinalize; iov.iov_len = sizeof (struct req_lib_cpg_iterationfinalize); error = coroipcc_msg_send_reply_receive (cpg_iteration_instance->conn, &iov, 1, &res_lib_cpg_iterationfinalize, sizeof (struct req_lib_cpg_iterationfinalize)); if (error != CS_OK) { goto error_put; } cpg_iteration_instance_finalize (cpg_iteration_instance); hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->cpg_iteration_handle); return (res_lib_cpg_iterationfinalize.header.error); error_put: hdb_handle_put (&cpg_iteration_handle_t_db, handle); error_exit: return (error); } /** @} */