diff --git a/libknet/threads_tx.c b/libknet/threads_tx.c index fcf5915e..7ef7243b 100644 --- a/libknet/threads_tx.c +++ b/libknet/threads_tx.c @@ -1,999 +1,997 @@ /* * Copyright (C) 2012-2023 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "compat.h" #include "compress.h" #include "crypto.h" #include "host.h" #include "link.h" #include "logging.h" #include "transports.h" #include "transport_common.h" #include "threads_common.h" #include "threads_heartbeat.h" #include "threads_tx.h" #include "netutils.h" #include "onwire_v1.h" /* * SEND */ static int _dispatch_to_links(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_mmsghdr *msg, int msgs_to_send) { int link_idx, msg_idx, sent_msgs, prev_sent, progress; int err = 0, savederrno = 0, locked = 0; unsigned int i; struct knet_mmsghdr *cur; struct knet_link *cur_link; for (link_idx = 0; link_idx < dst_host->active_link_entries; link_idx++) { prev_sent = 0; progress = 1; locked = 0; cur_link = &dst_host->link[dst_host->active_links[link_idx]]; if (cur_link->transport == KNET_TRANSPORT_LOOPBACK) { continue; } savederrno = pthread_mutex_lock(&cur_link->link_stats_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_TX, "Unable to get stats mutex lock for host %u link %u: %s", dst_host->host_id, cur_link->link_id, strerror(savederrno)); continue; } locked = 1; msg_idx = 0; while (msg_idx < msgs_to_send) { msg[msg_idx].msg_hdr.msg_name = &cur_link->dst_addr; msg[msg_idx].msg_hdr.msg_namelen = knet_h->knet_transport_fd_tracker[cur_link->outsock].sockaddr_len; /* Cast for Linux/BSD compatibility */ for (i=0; i<(unsigned int)msg[msg_idx].msg_hdr.msg_iovlen; i++) { cur_link->status.stats.tx_data_bytes += msg[msg_idx].msg_hdr.msg_iov[i].iov_len; } cur_link->status.stats.tx_data_packets++; msg_idx++; } retry: cur = &msg[prev_sent]; sent_msgs = _sendmmsg(dst_host->link[dst_host->active_links[link_idx]].outsock, transport_get_connection_oriented(knet_h, dst_host->link[dst_host->active_links[link_idx]].transport), &cur[0], msgs_to_send - prev_sent, MSG_DONTWAIT | MSG_NOSIGNAL); savederrno = errno; err = transport_tx_sock_error(knet_h, dst_host->link[dst_host->active_links[link_idx]].transport, dst_host->link[dst_host->active_links[link_idx]].outsock, KNET_SUB_TX, sent_msgs, savederrno); switch(err) { case KNET_TRANSPORT_SOCK_ERROR_INTERNAL: cur_link->status.stats.tx_data_errors++; goto out_unlock; break; case KNET_TRANSPORT_SOCK_ERROR_IGNORE: break; case KNET_TRANSPORT_SOCK_ERROR_RETRY: cur_link->status.stats.tx_data_retries++; goto retry; break; } prev_sent = prev_sent + sent_msgs; if ((sent_msgs >= 0) && (prev_sent < msgs_to_send)) { if ((sent_msgs) || (progress)) { if (sent_msgs) { progress = 1; } else { progress = 0; } -#ifdef DEBUG - log_debug(knet_h, KNET_SUB_TX, "Unable to send all (%d/%d) data packets to host %s (%u) link %s:%s (%u)", + log_trace(knet_h, KNET_SUB_TX, "Unable to send all (%d/%d) data packets to host %s (%u) link %s:%s (%u)", sent_msgs, msg_idx, dst_host->name, dst_host->host_id, dst_host->link[dst_host->active_links[link_idx]].status.dst_ipaddr, dst_host->link[dst_host->active_links[link_idx]].status.dst_port, dst_host->link[dst_host->active_links[link_idx]].link_id); -#endif goto retry; } if (!progress) { savederrno = EAGAIN; err = -1; goto out_unlock; } } if ((dst_host->link_handler_policy == KNET_LINK_POLICY_RR) && (dst_host->active_link_entries > 1)) { uint8_t cur_link_id = dst_host->active_links[0]; memmove(&dst_host->active_links[0], &dst_host->active_links[1], KNET_MAX_LINK - 1); dst_host->active_links[dst_host->active_link_entries - 1] = cur_link_id; break; } pthread_mutex_unlock(&cur_link->link_stats_mutex); locked = 0; } out_unlock: if (locked) { pthread_mutex_unlock(&cur_link->link_stats_mutex); } errno = savederrno; return err; } static int _dispatch_to_local(knet_handle_t knet_h, unsigned char *data, size_t inlen, int8_t channel) { int err = 0, savederrno = 0; const unsigned char *buf = data; ssize_t buflen = inlen; struct knet_link *local_link = knet_h->host_index[knet_h->host_id]->link; local_retry: err = write(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], buf, buflen); savederrno = errno; if (err < 0) { log_err(knet_h, KNET_SUB_TRANSP_LOOPBACK, "send local failed. error=%s\n", strerror(errno)); local_link->status.stats.tx_data_errors++; goto out; } if (err > 0 && err < buflen) { log_debug(knet_h, KNET_SUB_TRANSP_LOOPBACK, "send local incomplete=%d bytes of %zu\n", err, inlen); local_link->status.stats.tx_data_retries++; buf += err; buflen -= err; goto local_retry; } if (err == buflen) { local_link->status.stats.tx_data_packets++; local_link->status.stats.tx_data_bytes += inlen; } out: errno = savederrno; return err; } static int _prep_tx_bufs(knet_handle_t knet_h, struct knet_header *inbuf, uint8_t onwire_ver, unsigned char *data, size_t inlen, uint32_t data_checksum, seq_num_t tx_seq_num, int8_t channel, int bcast, int data_compressed, int *msgs_to_send, struct iovec iov_out[PCKT_FRAG_MAX][2], int *iovcnt_out) { int err = 0, savederrno = 0; unsigned int temp_data_mtu; if (!knet_h->data_mtu) { /* * using MIN_MTU_V4 for data mtu is not completely accurate but safe enough */ log_debug(knet_h, KNET_SUB_TX, "Received data packet but data MTU is still unknown." " Packet might not be delivered." " Assuming minimum IPv4 MTU (%d)", KNET_PMTUD_MIN_MTU_V4); temp_data_mtu = KNET_PMTUD_MIN_MTU_V4; } else { /* * take a copy of the mtu to avoid value changing under * our feet while we are sending a fragmented pckt */ temp_data_mtu = knet_h->data_mtu; } if (knet_h->onwire_ver_remap) { prep_tx_bufs_v1(knet_h, inbuf, data, inlen, data_checksum, temp_data_mtu, tx_seq_num, channel, bcast, data_compressed, msgs_to_send, iov_out, iovcnt_out); } else { switch (onwire_ver) { case 1: prep_tx_bufs_v1(knet_h, inbuf, data, inlen, data_checksum, temp_data_mtu, tx_seq_num, channel, bcast, data_compressed, msgs_to_send, iov_out, iovcnt_out); break; default: /* this should never hit as filters are in place in the calling functions */ log_warn(knet_h, KNET_SUB_TX, "preparing data onwire version %u not supported", onwire_ver); savederrno = EINVAL; err = -1; goto out; break; } } out: errno = savederrno; return err; } static int _compress_data(knet_handle_t knet_h, unsigned char* data, size_t *inlen, int *data_compressed) { int err = 0, savederrno = 0; int stats_locked = 0, stats_err = 0; size_t cmp_outlen = KNET_DATABUFSIZE_COMPRESS; struct timespec start_time; struct timespec end_time; uint64_t compress_time; /* * compress data */ if (knet_h->compress_model > 0) { if (*inlen > knet_h->compress_threshold) { clock_gettime(CLOCK_MONOTONIC, &start_time); err = compress(knet_h, data, *inlen, knet_h->send_to_links_buf_compress, (ssize_t *)&cmp_outlen); savederrno = errno; clock_gettime(CLOCK_MONOTONIC, &end_time); timespec_diff(start_time, end_time, &compress_time); stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); if (stats_err < 0) { log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); err = -1; savederrno = stats_err; goto out; } stats_locked = 1; /* Collect stats */ if (compress_time < knet_h->stats.tx_compress_time_min) { knet_h->stats.tx_compress_time_min = compress_time; } if (compress_time > knet_h->stats.tx_compress_time_max) { knet_h->stats.tx_compress_time_max = compress_time; } knet_h->stats.tx_compress_time_ave = (unsigned long long)(knet_h->stats.tx_compress_time_ave * knet_h->stats.tx_compressed_packets + compress_time) / (knet_h->stats.tx_compressed_packets+1); if (err < 0) { knet_h->stats.tx_failed_to_compress++; log_warn(knet_h, KNET_SUB_COMPRESS, "Compression failed (%d): %s", err, strerror(savederrno)); } else { knet_h->stats.tx_compressed_packets++; knet_h->stats.tx_compressed_original_bytes += *inlen; knet_h->stats.tx_compressed_size_bytes += cmp_outlen; if (cmp_outlen < *inlen) { memmove(data, knet_h->send_to_links_buf_compress, cmp_outlen); *inlen = cmp_outlen; *data_compressed = 1; } else { knet_h->stats.tx_unable_to_compress++; } } } if (!*data_compressed) { if (!stats_locked) { stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); if (stats_err < 0) { log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); err = -1; savederrno = stats_err; goto out; } stats_locked = 1; } knet_h->stats.tx_uncompressed_packets++; } if (stats_locked) { pthread_mutex_unlock(&knet_h->handle_stats_mutex); } } out: errno = savederrno; return err; } static int _encrypt_bufs(knet_handle_t knet_h, int msgs_to_send, struct iovec iov_out[PCKT_FRAG_MAX][2], int *iovcnt_out) { int err = 0, savederrno = 0, stats_err = 0; struct timespec start_time; struct timespec end_time; uint64_t crypt_time; uint8_t frag_idx = 0; size_t outlen, uncrypted_frag_size; int j; if (knet_h->crypto_in_use_config) { while (frag_idx < msgs_to_send) { clock_gettime(CLOCK_MONOTONIC, &start_time); if (crypto_encrypt_and_signv( knet_h, iov_out[frag_idx], *iovcnt_out, knet_h->send_to_links_buf_crypt[frag_idx], (ssize_t *)&outlen) < 0) { log_debug(knet_h, KNET_SUB_TX, "Unable to encrypt packet"); savederrno = ECHILD; err = -1; goto out; } clock_gettime(CLOCK_MONOTONIC, &end_time); timespec_diff(start_time, end_time, &crypt_time); stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); if (stats_err < 0) { log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); err = -1; savederrno = stats_err; goto out; } if (crypt_time < knet_h->stats.tx_crypt_time_min) { knet_h->stats.tx_crypt_time_min = crypt_time; } if (crypt_time > knet_h->stats.tx_crypt_time_max) { knet_h->stats.tx_crypt_time_max = crypt_time; } knet_h->stats.tx_crypt_time_ave = (knet_h->stats.tx_crypt_time_ave * knet_h->stats.tx_crypt_packets + crypt_time) / (knet_h->stats.tx_crypt_packets+1); uncrypted_frag_size = 0; for (j=0; j < *iovcnt_out; j++) { uncrypted_frag_size += iov_out[frag_idx][j].iov_len; } knet_h->stats.tx_crypt_byte_overhead += (outlen - uncrypted_frag_size); knet_h->stats.tx_crypt_packets++; pthread_mutex_unlock(&knet_h->handle_stats_mutex); iov_out[frag_idx][0].iov_base = knet_h->send_to_links_buf_crypt[frag_idx]; iov_out[frag_idx][0].iov_len = outlen; frag_idx++; } *iovcnt_out = 1; } out: errno = savederrno; return err; } static int _get_tx_seq_num(knet_handle_t knet_h, seq_num_t *tx_seq_num) { int savederrno = 0; savederrno = pthread_mutex_lock(&knet_h->tx_seq_num_mutex); if (savederrno) { log_debug(knet_h, KNET_SUB_TX, "Unable to get seq mutex lock"); errno = savederrno; return -1; } knet_h->tx_seq_num++; /* * force seq_num 0 to detect a node that has crashed and rejoining * the knet instance. seq_num 0 will clear the buffers in the RX * thread */ if (knet_h->tx_seq_num == 0) { knet_h->tx_seq_num++; } /* * cache the value in locked context */ *tx_seq_num = knet_h->tx_seq_num; pthread_mutex_unlock(&knet_h->tx_seq_num_mutex); /* * forcefully broadcast a ping to all nodes every SEQ_MAX / 8 * pckts. * this solves 2 problems: * 1) on TX socket overloads we generate extra pings to keep links alive * 2) in 3+ nodes setup, where all the traffic is flowing between node 1 and 2, * node 3+ will be able to keep in sync on the TX seq_num even without * receiving traffic or pings in betweens. This avoids issues with * rollover of the circular buffer */ if (*tx_seq_num % (SEQ_MAX / 8) == 0) { _send_pings(knet_h, 0); } return 0; } static int _get_data_dests(knet_handle_t knet_h, unsigned char* data, size_t inlen, int8_t *channel, int *bcast, int *send_local, knet_node_id_t *dst_host_ids, size_t *dst_host_ids_entries, int is_sync) { int err = 0, savederrno = 0; knet_node_id_t dst_host_ids_temp[KNET_MAX_HOST]; /* store destinations from filter */ size_t dst_host_ids_entries_temp = 0; size_t dst_host_ids_entries_temp2 = 0; /* workaround gcc here */ struct knet_host *dst_host; size_t host_idx; memset(dst_host_ids_temp, 0, sizeof(dst_host_ids_temp)); if (knet_h->dst_host_filter_fn) { *bcast = knet_h->dst_host_filter_fn( knet_h->dst_host_filter_fn_private_data, data, inlen, KNET_NOTIFY_TX, knet_h->host_id, knet_h->host_id, channel, dst_host_ids_temp, &dst_host_ids_entries_temp); if (*bcast < 0) { log_debug(knet_h, KNET_SUB_TX, "Error from dst_host_filter_fn: %d", *bcast); savederrno = EFAULT; err = -1; goto out; } if ((!*bcast) && (!dst_host_ids_entries_temp)) { log_debug(knet_h, KNET_SUB_TX, "Message is unicast but no dst_host_ids_entries"); savederrno = EINVAL; err = -1; goto out; } if ((!*bcast) && (dst_host_ids_entries_temp > KNET_MAX_HOST)) { log_debug(knet_h, KNET_SUB_TX, "dst_host_filter_fn returned too many destinations"); savederrno = EINVAL; err = -1; goto out; } if (is_sync) { if ((*bcast) || ((!*bcast) && (dst_host_ids_entries_temp > 1))) { log_debug(knet_h, KNET_SUB_TX, "knet_send_sync is only supported with unicast packets for one destination"); savederrno = E2BIG; err = -1; goto out; } } } /* * check destinations hosts before spending time * in fragmenting/encrypting packets to save * time processing data for unreachable hosts. * for unicast, also remap the destination data * to skip unreachable hosts. */ if (!*bcast) { *dst_host_ids_entries = dst_host_ids_entries_temp2; for (host_idx = 0; host_idx < dst_host_ids_entries_temp; host_idx++) { dst_host = knet_h->host_index[dst_host_ids_temp[host_idx]]; if (!dst_host) { continue; } if ((dst_host->host_id == knet_h->host_id) && (knet_h->has_loop_link)) { *send_local = 1; } if (!((dst_host->host_id == knet_h->host_id) && (knet_h->has_loop_link)) && dst_host->status.reachable) { dst_host_ids[dst_host_ids_entries_temp2] = dst_host_ids_temp[host_idx]; dst_host_ids_entries_temp2++; } } if ((!dst_host_ids_entries_temp2) && (!*send_local)) { savederrno = EHOSTDOWN; err = -1; goto out; } *dst_host_ids_entries = dst_host_ids_entries_temp2; } else { *bcast = 0; *send_local = 0; for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { if ((dst_host->host_id == knet_h->host_id) && (knet_h->has_loop_link)) { *send_local = 1; } if (!(dst_host->host_id == knet_h->host_id && knet_h->has_loop_link) && dst_host->status.reachable) { *bcast = 1; } } if ((!*bcast) && (!*send_local)) { savederrno = EHOSTDOWN; err = -1; goto out; } } out: errno = savederrno; return err; } static int _prep_and_send_msgs(knet_handle_t knet_h, int bcast, knet_node_id_t *dst_host_ids, size_t dst_host_ids_entries, int msgs_to_send, struct iovec iov_out[PCKT_FRAG_MAX][2], int iovcnt_out) { int err = 0, savederrno = 0; struct knet_host *dst_host; struct knet_mmsghdr msg[PCKT_FRAG_MAX]; int msg_idx; size_t host_idx; memset(&msg, 0, sizeof(msg)); msg_idx = 0; while (msg_idx < msgs_to_send) { msg[msg_idx].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); /* this will set properly in _dispatch_to_links() */ msg[msg_idx].msg_hdr.msg_iov = &iov_out[msg_idx][0]; msg[msg_idx].msg_hdr.msg_iovlen = iovcnt_out; msg_idx++; } if (!bcast) { for (host_idx = 0; host_idx < dst_host_ids_entries; host_idx++) { dst_host = knet_h->host_index[dst_host_ids[host_idx]]; err = _dispatch_to_links(knet_h, dst_host, &msg[0], msgs_to_send); savederrno = errno; if (err) { goto out; } } } else { for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { if (dst_host->status.reachable) { err = _dispatch_to_links(knet_h, dst_host, &msg[0], msgs_to_send); savederrno = errno; if (err) { goto out; } } } } out: errno = savederrno; return err; } static int _parse_recv_from_sock(knet_handle_t knet_h, size_t inlen, int8_t channel, uint8_t onwire_ver, int is_sync) { int err = 0, savederrno = 0; struct knet_header *inbuf = knet_h->recv_from_sock_buf; /* all TX packets are stored here regardless of the onwire */ unsigned char *data; /* onwire neutrual pointer to data to send */ int data_compressed = 0; /* track data compression to fill the header */ seq_num_t tx_seq_num; uint32_t data_checksum = 0; /* used only for debugging at the moment */ int bcast = 1; /* assume all packets are to be broadcasted unless filter tells us differently */ knet_node_id_t dst_host_ids[KNET_MAX_HOST]; /* store destinations from filter */ size_t dst_host_ids_entries = 0; int send_local = 0; /* send packets to loopback */ struct iovec iov_out[PCKT_FRAG_MAX][2]; int iovcnt_out = 2; int msgs_to_send = 0; if (knet_h->enabled != 1) { log_debug(knet_h, KNET_SUB_TX, "Received data packet but forwarding is disabled"); savederrno = ECANCELED; err = -1; goto out; } if (knet_h->onwire_ver_remap) { data = get_data_v1(knet_h, inbuf); } else { switch (onwire_ver) { case 1: data = get_data_v1(knet_h, inbuf); break; default: /* this should never hit as filters are in place in the calling functions */ log_warn(knet_h, KNET_SUB_TX, "preparing data onwire version %u not supported", onwire_ver); savederrno = EINVAL; err = -1; goto out; break; } } #ifdef ONWIRE_V1_EXTRA_DEBUG data_checksum = compute_chksum(data, inlen); #endif memset(dst_host_ids, 0, sizeof(dst_host_ids)); err = _get_data_dests(knet_h, data, inlen, &channel, &bcast, &send_local, dst_host_ids, &dst_host_ids_entries, is_sync); if (err < 0) { savederrno = errno; goto out; } /* Send to localhost if appropriate and enabled */ if (send_local) { err = _dispatch_to_local(knet_h, data, inlen, channel); if (err < 0) { savederrno = errno; goto out; } } err = _compress_data(knet_h, data, &inlen, &data_compressed); if (err < 0) { savederrno = errno; goto out; } err = _get_tx_seq_num(knet_h, &tx_seq_num); if (err < 0) { savederrno = errno; goto out; } err = _prep_tx_bufs(knet_h, inbuf, onwire_ver, data, inlen, data_checksum, tx_seq_num, channel, bcast, data_compressed, &msgs_to_send, iov_out, &iovcnt_out); if (err < 0) { savederrno = errno; goto out; } err = _encrypt_bufs(knet_h, msgs_to_send, iov_out, &iovcnt_out); if (err < 0) { savederrno = errno; goto out; } err = _prep_and_send_msgs(knet_h, bcast, dst_host_ids, dst_host_ids_entries, msgs_to_send, iov_out, iovcnt_out); if (err < 0) { savederrno = errno; goto out; } out: errno = savederrno; return err; } static void _handle_send_to_links(knet_handle_t knet_h, int sockfd, uint8_t onwire_ver, int8_t channel) { ssize_t inlen = 0; int savederrno = 0, docallback = 0; struct iovec iov_in; struct msghdr msg; struct sockaddr_storage address; memset(&iov_in, 0, sizeof(iov_in)); if (knet_h->onwire_ver_remap) { iov_in.iov_base = (void *)get_data_v1(knet_h, knet_h->recv_from_sock_buf); iov_in.iov_len = KNET_MAX_PACKET_SIZE; } else { switch (onwire_ver) { case 1: iov_in.iov_base = (void *)get_data_v1(knet_h, knet_h->recv_from_sock_buf); iov_in.iov_len = KNET_MAX_PACKET_SIZE; break; default: log_warn(knet_h, KNET_SUB_TX, "preparing data onwire version %u not supported", onwire_ver); break; } } memset(&msg, 0, sizeof(struct msghdr)); msg.msg_name = &address; msg.msg_namelen = knet_h->knet_transport_fd_tracker[sockfd].sockaddr_len; msg.msg_iov = &iov_in; msg.msg_iovlen = 1; if ((channel >= 0) && (channel < KNET_DATAFD_MAX) && (!knet_h->sockfd[channel].is_socket)) { inlen = readv(sockfd, msg.msg_iov, 1); } else { inlen = recvmsg(sockfd, &msg, MSG_DONTWAIT | MSG_NOSIGNAL); if (msg.msg_flags & MSG_TRUNC) { log_warn(knet_h, KNET_SUB_TX, "Received truncated message from sock %d. Discarding", sockfd); return; } } if (inlen == 0) { savederrno = 0; docallback = 1; } else if (inlen < 0) { struct epoll_event ev; savederrno = errno; docallback = 1; memset(&ev, 0, sizeof(struct epoll_event)); if (epoll_ctl(knet_h->send_to_links_epollfd, EPOLL_CTL_DEL, knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], &ev)) { log_err(knet_h, KNET_SUB_TX, "Unable to del datafd %d from linkfd epoll pool: %s", knet_h->sockfd[channel].sockfd[0], strerror(savederrno)); } else { knet_h->sockfd[channel].has_error = 1; } } else { _parse_recv_from_sock(knet_h, inlen, channel, onwire_ver, 0); } if (docallback) { knet_h->sock_notify_fn(knet_h->sock_notify_fn_private_data, knet_h->sockfd[channel].sockfd[0], channel, KNET_NOTIFY_TX, inlen, savederrno); } } void *_handle_send_to_links_thread(void *data) { knet_handle_t knet_h = (knet_handle_t) data; struct epoll_event events[KNET_EPOLL_MAX_EVENTS + 1]; /* see _init_epolls for + 1 */ int i, nev; int flush, flush_queue_limit; int8_t channel; uint8_t onwire_ver; set_thread_status(knet_h, KNET_THREAD_TX, KNET_THREAD_STARTED); memset(&events, 0, sizeof(events)); flush_queue_limit = 0; while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(knet_h->send_to_links_epollfd, events, KNET_EPOLL_MAX_EVENTS + 1, knet_h->threads_timer_res / 1000); flush = get_thread_flush_queue(knet_h, KNET_THREAD_TX); /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { /* * ideally we want to communicate that we are done flushing * the queue when we have an epoll timeout event */ if (flush == KNET_THREAD_QUEUE_FLUSH) { set_thread_flush_queue(knet_h, KNET_THREAD_TX, KNET_THREAD_QUEUE_FLUSHED); flush_queue_limit = 0; } continue; } /* * fall back in case the TX sockets will continue receive traffic * and we do not hit an epoll timeout. * * allow up to a 100 loops to flush queues, then we give up. * there might be more clean ways to do it by checking the buffer queue * on each socket, but we have tons of sockets and calculations can go wrong. * Also, why would you disable data forwarding and still send packets? */ if (flush == KNET_THREAD_QUEUE_FLUSH) { if (flush_queue_limit >= 100) { log_debug(knet_h, KNET_SUB_TX, "Timeout flushing the TX queue, expect packet loss"); set_thread_flush_queue(knet_h, KNET_THREAD_TX, KNET_THREAD_QUEUE_FLUSHED); flush_queue_limit = 0; } else { flush_queue_limit++; } } else { flush_queue_limit = 0; } if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_TX, "Unable to get read lock"); continue; } if (pthread_mutex_lock(&knet_h->onwire_mutex)) { log_debug(knet_h, KNET_SUB_TX, "Unable to get onwire mutex lock"); goto out_unlock; } onwire_ver = knet_h->onwire_ver; pthread_mutex_unlock(&knet_h->onwire_mutex); for (i = 0; i < nev; i++) { for (channel = 0; channel < KNET_DATAFD_MAX; channel++) { if ((knet_h->sockfd[channel].in_use) && (knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created] == events[i].data.fd)) { break; } } if (channel >= KNET_DATAFD_MAX) { log_debug(knet_h, KNET_SUB_TX, "No available channels"); continue; /* channel not found */ } if (pthread_mutex_lock(&knet_h->tx_mutex) != 0) { log_debug(knet_h, KNET_SUB_TX, "Unable to get mutex lock"); continue; } _handle_send_to_links(knet_h, events[i].data.fd, onwire_ver, channel); pthread_mutex_unlock(&knet_h->tx_mutex); } out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); } set_thread_status(knet_h, KNET_THREAD_TX, KNET_THREAD_STOPPED); return NULL; } int knet_send_sync(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0, err = 0; uint8_t onwire_ver; if (!_is_valid_handle(knet_h)) { return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_TX, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->dst_host_filter_fn) { savederrno = ENETDOWN; err = -1; goto out; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out; } if (pthread_mutex_lock(&knet_h->onwire_mutex)) { log_debug(knet_h, KNET_SUB_TX, "Unable to get onwire mutex lock"); goto out; } onwire_ver = knet_h->onwire_ver; pthread_mutex_unlock(&knet_h->onwire_mutex); savederrno = pthread_mutex_lock(&knet_h->tx_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_TX, "Unable to get TX mutex lock: %s", strerror(savederrno)); err = -1; goto out; } if (knet_h->onwire_ver_remap) { memmove(get_data_v1(knet_h, knet_h->recv_from_sock_buf), buff, buff_len); } else { switch (onwire_ver) { case 1: memmove(get_data_v1(knet_h, knet_h->recv_from_sock_buf), buff, buff_len); break; default: log_warn(knet_h, KNET_SUB_TX, "preparing sync data onwire version %u not supported", onwire_ver); goto out_tx; break; } } err = _parse_recv_from_sock(knet_h, buff_len, channel, onwire_ver, 1); savederrno = errno; out_tx: pthread_mutex_unlock(&knet_h->tx_mutex); out: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } ssize_t knet_send(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0; ssize_t err = 0; struct iovec iov_out[1]; if (!_is_valid_handle(knet_h)) { return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out_unlock; } memset(iov_out, 0, sizeof(iov_out)); iov_out[0].iov_base = (void *)buff; iov_out[0].iov_len = buff_len; err = writev(knet_h->sockfd[channel].sockfd[0], iov_out, 1); savederrno = errno; out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } diff --git a/libknet/transport_sctp.c b/libknet/transport_sctp.c index 0bd584ad..c27794de 100644 --- a/libknet/transport_sctp.c +++ b/libknet/transport_sctp.c @@ -1,1639 +1,1637 @@ /* * Copyright (C) 2016-2023 Red Hat, Inc. All rights reserved. * * Author: Christine Caulfield * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include "compat.h" #include "host.h" #include "links.h" #include "links_acl.h" #include "links_acl_ip.h" #include "logging.h" #include "netutils.h" #include "common.h" #include "transport_common.h" #include "transports.h" #include "threads_common.h" #ifdef HAVE_NETINET_SCTP_H #include #include "transport_sctp.h" typedef struct sctp_handle_info { struct qb_list_head listen_links_list; struct qb_list_head connect_links_list; int connect_epollfd; int connectsockfd[2]; int listen_epollfd; int listensockfd[2]; pthread_t connect_thread; pthread_t listen_thread; socklen_t event_subscribe_kernel_size; char *event_subscribe_buffer; } sctp_handle_info_t; /* * use by fd_tracker data type */ #define SCTP_NO_LINK_INFO 0 #define SCTP_LISTENER_LINK_INFO 1 #define SCTP_ACCEPTED_LINK_INFO 2 #define SCTP_CONNECT_LINK_INFO 3 /* * this value is per listener */ #define MAX_ACCEPTED_SOCKS 256 typedef struct sctp_listen_link_info { struct qb_list_head list; int listen_sock; int accepted_socks[MAX_ACCEPTED_SOCKS]; struct sockaddr_storage src_address; int on_listener_epoll; int on_rx_epoll; int sock_shutdown; } sctp_listen_link_info_t; typedef struct sctp_accepted_link_info { char mread_buf[KNET_DATABUFSIZE]; ssize_t mread_len; sctp_listen_link_info_t *link_info; } sctp_accepted_link_info_t ; typedef struct sctp_connect_link_info { struct qb_list_head list; sctp_listen_link_info_t *listener; struct knet_link *link; struct sockaddr_storage dst_address; int connect_sock; int on_rx_epoll; int close_sock; int sock_shutdown; } sctp_connect_link_info_t; /* * socket handling functions * * those functions do NOT perform locking. locking * should be handled in the right context from callers */ /* * sockets are removed from rx_epoll from callers * see also error handling functions */ static int _close_connect_socket(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; struct epoll_event ev; sctp_connect_link_info_t *info = kn_link->transport_link; if (info->connect_sock != -1) { if (info->on_rx_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = info->connect_sock; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->connect_sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove connected socket from epoll pool: %s", strerror(savederrno)); goto exit_error; } info->on_rx_epoll = 0; } if (_set_fd_tracker(knet_h, info->connect_sock, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); } else { close(info->connect_sock); info->connect_sock = -1; } } exit_error: errno = savederrno; return err; } static int _enable_sctp_notifications(knet_handle_t knet_h, int sock, const char *type) { int err = 0, savederrno = 0; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; if (setsockopt(sock, IPPROTO_SCTP, SCTP_EVENTS, handle_info->event_subscribe_buffer, handle_info->event_subscribe_kernel_size) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to enable %s events: %s", type, strerror(savederrno)); } errno = savederrno; return err; } static int _configure_sctp_socket(knet_handle_t knet_h, int sock, struct sockaddr_storage *address, uint64_t flags, const char *type) { int err = 0, savederrno = 0; int value; int level; #ifdef SOL_SCTP level = SOL_SCTP; #else level = IPPROTO_SCTP; #endif if (_configure_transport_socket(knet_h, sock, address, flags, type) < 0) { savederrno = errno; err = -1; goto exit_error; } value = 1; if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &value, sizeof(value)) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set reuseaddr on socket %d: %s", sock, strerror(savederrno)); goto exit_error; } value = 1; if (setsockopt(sock, level, SCTP_NODELAY, &value, sizeof(value)) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set sctp nodelay: %s", strerror(savederrno)); goto exit_error; } if (_enable_sctp_notifications(knet_h, sock, type) < 0) { savederrno = errno; err = -1; } exit_error: errno = savederrno; return err; } static int _reconnect_socket(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; sctp_connect_link_info_t *info = kn_link->transport_link; if (connect(info->connect_sock, (struct sockaddr *)&kn_link->dst_addr, sockaddr_len(&kn_link->dst_addr)) < 0) { savederrno = errno; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP socket %d received error: %s", info->connect_sock, strerror(savederrno)); if ((savederrno != EALREADY) && (savederrno != EINPROGRESS) && (savederrno != EISCONN)) { err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to connect SCTP socket %d: %s", info->connect_sock, strerror(savederrno)); } } errno = savederrno; return err; } static int _create_connect_socket(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; struct epoll_event ev; sctp_connect_link_info_t *info = kn_link->transport_link; int connect_sock; struct sockaddr_storage connect_addr; connect_sock = socket(kn_link->dst_addr.ss_family, SOCK_STREAM, IPPROTO_SCTP); if (connect_sock < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create send/recv socket: %s", strerror(savederrno)); goto exit_error; } if (_configure_sctp_socket(knet_h, connect_sock, &kn_link->dst_addr, kn_link->flags, "SCTP connect") < 0) { savederrno = errno; err = -1; goto exit_error; } memset(&connect_addr, 0, sizeof(struct sockaddr_storage)); if (knet_strtoaddr(kn_link->status.src_ipaddr, "0", &connect_addr, sockaddr_len(&connect_addr)) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to resolve connecting socket: %s", strerror(savederrno)); goto exit_error; } if (bind(connect_sock, (struct sockaddr *)&connect_addr, sockaddr_len(&connect_addr)) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to bind connecting socket: %s", strerror(savederrno)); goto exit_error; } if (_set_fd_tracker(knet_h, connect_sock, KNET_TRANSPORT_SCTP, SCTP_CONNECT_LINK_INFO, sockaddr_len(&kn_link->src_addr), info) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = connect_sock; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, connect_sock, &ev)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add connected socket to epoll pool: %s", strerror(errno)); } info->on_rx_epoll = 1; info->connect_sock = connect_sock; info->close_sock = 0; kn_link->outsock = info->connect_sock; if (_reconnect_socket(knet_h, kn_link) < 0) { savederrno = errno; err = -1; goto exit_error; } exit_error: if (err) { if (connect_sock >= 0) { close(connect_sock); } } errno = savederrno; return err; } static void _lock_sleep_relock(knet_handle_t knet_h) { int i = 0; /* Don't hold onto the lock while sleeping */ pthread_rwlock_unlock(&knet_h->global_rwlock); while (i < 5) { usleep(knet_h->threads_timer_res / 16); if (!pthread_rwlock_rdlock(&knet_h->global_rwlock)) { /* * lock acquired, we can go out */ return; } else { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get read lock!"); i++; } } /* * time to crash! if we cannot re-acquire the lock * there is no easy way out of this one */ assert(0); } transport_sock_error_t sctp_transport_tx_sock_error(knet_handle_t knet_h, int sockfd, int subsys, int recv_err, int recv_errno) { sctp_connect_link_info_t *connect_info = knet_h->knet_transport_fd_tracker[sockfd].data; sctp_accepted_link_info_t *accepted_info = knet_h->knet_transport_fd_tracker[sockfd].data; sctp_listen_link_info_t *listen_info; if (recv_err < 0) { switch (knet_h->knet_transport_fd_tracker[sockfd].data_type) { case SCTP_CONNECT_LINK_INFO: if (connect_info->link->transport_connected == 0) { return KNET_TRANSPORT_SOCK_ERROR_INTERNAL; } break; case SCTP_ACCEPTED_LINK_INFO: listen_info = accepted_info->link_info; if (listen_info->listen_sock != sockfd) { if (listen_info->on_rx_epoll == 0) { return KNET_TRANSPORT_SOCK_ERROR_INTERNAL; } } break; } if (recv_errno == EAGAIN) { -#ifdef DEBUG - log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Sock: %d is overloaded. Slowing TX down", sockfd); -#endif + log_trace(knet_h, KNET_SUB_TRANSP_SCTP, "Sock: %d is overloaded. Slowing TX down", sockfd); _lock_sleep_relock(knet_h); return KNET_TRANSPORT_SOCK_ERROR_RETRY; } return KNET_TRANSPORT_SOCK_ERROR_INTERNAL; } return KNET_TRANSPORT_SOCK_ERROR_IGNORE; } /* * socket error management functions * * both called with global read lock. * * NOTE: we need to remove the fd from the epoll as soon as possible * even before we notify the respective thread to take care of it * because scheduling can make it so that this thread will overload * and the threads supposed to take care of the error will never * be able to take action. * we CANNOT handle FDs here directly (close/reconnect/etc) due * to locking context. We need to delegate that to their respective * management threads within the global write lock. * * this function is called from: * - RX thread with recv_err <= 0 directly on recvmmsg error * - transport_rx_is_data when msg_len == 0 (recv_err = 1) * - transport_rx_is_data on notification (recv_err = 2) * * basically this small abuse of recv_err is to detect notifications * generated by sockets created by listen(). */ transport_sock_error_t sctp_transport_rx_sock_error(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno) { struct epoll_event ev; sctp_accepted_link_info_t *accepted_info = knet_h->knet_transport_fd_tracker[sockfd].data; sctp_listen_link_info_t *listen_info; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; switch (knet_h->knet_transport_fd_tracker[sockfd].data_type) { case SCTP_CONNECT_LINK_INFO: /* * all connect link have notifications enabled * and we accept only data from notification and * generic recvmmsg errors. * * Errors generated by msg_len 0 can be ignored because * they follow a notification (double notification) */ if (recv_err != 1) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Notifying connect thread that sockfd %d received an error", sockfd); if (sendto(handle_info->connectsockfd[1], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to notify connect thread: %s", strerror(errno)); } } break; case SCTP_ACCEPTED_LINK_INFO: listen_info = accepted_info->link_info; if (listen_info->listen_sock != sockfd) { if (recv_err != 1) { if (listen_info->on_rx_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = sockfd; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, sockfd, &ev)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove EOFed socket from epoll pool: %s", strerror(errno)); return -1; } listen_info->on_rx_epoll = 0; } log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Notifying listen thread that sockfd %d received an error", sockfd); if (sendto(handle_info->listensockfd[1], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to notify listen thread: %s", strerror(errno)); } } } else { /* * this means the listen() socket has generated * a notification. now what? :-) */ log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for listen() socket %d", sockfd); } break; default: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received unknown notification? %d", sockfd); break; } /* * Under RX pressure we need to give time to IPC to pick up the message */ _lock_sleep_relock(knet_h); return 0; } /* * NOTE: sctp_transport_rx_is_data is called with global rdlock * delegate any FD error management to sctp_transport_rx_sock_error * and keep this code to parsing incoming data only */ transport_rx_isdata_t sctp_transport_rx_is_data(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg) { size_t i; struct iovec *iov = msg->msg_hdr.msg_iov; size_t iovlen = msg->msg_hdr.msg_iovlen; struct sctp_assoc_change *sac; union sctp_notification *snp; sctp_accepted_link_info_t *listen_info = knet_h->knet_transport_fd_tracker[sockfd].data; sctp_connect_link_info_t *connect_info = knet_h->knet_transport_fd_tracker[sockfd].data; if (!(msg->msg_hdr.msg_flags & MSG_NOTIFICATION)) { if (msg->msg_len == 0) { /* * NOTE: with event notification enabled, we receive error twice: * 1) from the event notification * 2) followed by a 0 byte msg_len * * the event handler should take care to avoid #2 by stopping * the rx thread from processing more packets than necessary. */ if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) { if (connect_info->sock_shutdown) { return KNET_TRANSPORT_RX_OOB_DATA_CONTINUE; } } else { if (listen_info->link_info->sock_shutdown) { return KNET_TRANSPORT_RX_OOB_DATA_CONTINUE; } } /* * this is pretty much dead code and we should never hit it. * keep it for safety and avoid the rx thread to process * bad info / data. */ return KNET_TRANSPORT_RX_NOT_DATA_STOP; } /* * missing MSG_EOR has to be treated as a short read * from the socket and we need to fill in the mread buf * while we wait for MSG_EOR */ if (!(msg->msg_hdr.msg_flags & MSG_EOR)) { /* * copy the incoming data into mread_buf + mread_len (incremental) * and increase mread_len */ memmove(listen_info->mread_buf + listen_info->mread_len, iov->iov_base, msg->msg_len); listen_info->mread_len = listen_info->mread_len + msg->msg_len; return KNET_TRANSPORT_RX_NOT_DATA_CONTINUE; } /* * got EOR. * if mread_len is > 0 we are completing a packet from short reads * complete reassembling the packet in mread_buf, copy it back in the iov * and set the iov/msg len numbers (size) correctly */ if (listen_info->mread_len) { /* * add last fragment to mread_buf */ memmove(listen_info->mread_buf + listen_info->mread_len, iov->iov_base, msg->msg_len); listen_info->mread_len = listen_info->mread_len + msg->msg_len; /* * move all back into the iovec */ memmove(iov->iov_base, listen_info->mread_buf, listen_info->mread_len); msg->msg_len = listen_info->mread_len; listen_info->mread_len = 0; } return KNET_TRANSPORT_RX_IS_DATA; } if (!(msg->msg_hdr.msg_flags & MSG_EOR)) { return KNET_TRANSPORT_RX_NOT_DATA_STOP; } for (i = 0; i < iovlen; i++) { snp = iov[i].iov_base; switch (snp->sn_header.sn_type) { case SCTP_ASSOC_CHANGE: sac = &snp->sn_assoc_change; switch (sac->sac_state) { case SCTP_COMM_LOST: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: comm_lost", sockfd); if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) { connect_info->close_sock = 1; connect_info->link->transport_connected = 0; } sctp_transport_rx_sock_error(knet_h, sockfd, 2, 0); return KNET_TRANSPORT_RX_OOB_DATA_STOP; break; case SCTP_COMM_UP: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: comm_up", sockfd); if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) { connect_info->link->transport_connected = 1; } break; case SCTP_RESTART: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: restart", sockfd); break; case SCTP_SHUTDOWN_COMP: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: shutdown comp", sockfd); if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) { connect_info->close_sock = 1; } sctp_transport_rx_sock_error(knet_h, sockfd, 2, 0); return KNET_TRANSPORT_RX_OOB_DATA_STOP; break; case SCTP_CANT_STR_ASSOC: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: cant str assoc", sockfd); sctp_transport_rx_sock_error(knet_h, sockfd, 2, 0); break; default: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: unknown %d", sockfd, sac->sac_state); break; } break; case SCTP_SHUTDOWN_EVENT: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp shutdown event socket %d", sockfd); if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) { connect_info->link->transport_connected = 0; connect_info->sock_shutdown = 1; } else { listen_info->link_info->sock_shutdown = 1; } break; case SCTP_SEND_FAILED: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp send failed socket: %d", sockfd); break; case SCTP_PEER_ADDR_CHANGE: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp peer addr change socket %d", sockfd); break; case SCTP_REMOTE_ERROR: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp remote error socket %d", sockfd); break; default: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] unknown sctp event socket: %d type: %hu", sockfd, snp->sn_header.sn_type); break; } } return KNET_TRANSPORT_RX_OOB_DATA_CONTINUE; } int sctp_transport_link_is_down(knet_handle_t knet_h, struct knet_link *kn_link) { sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; sctp_connect_link_info_t *info = kn_link->transport_link; kn_link->transport_connected = 0; info->close_sock = 1; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Notifying connect thread that sockfd %d received a link down event", info->connect_sock); if (sendto(handle_info->connectsockfd[1], &info->connect_sock, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to notify connect thread: %s", strerror(errno)); } return 0; } /* * connect / outgoing socket management thread */ /* * _handle_connected_sctp* are called with a global write lock * from the connect_thread */ static void _handle_connected_sctp_socket(knet_handle_t knet_h, int connect_sock) { int err; unsigned int status, len = sizeof(status); sctp_connect_link_info_t *info = knet_h->knet_transport_fd_tracker[connect_sock].data; struct knet_link *kn_link = info->link; if (info->close_sock) { if (_close_connect_socket(knet_h, kn_link) < 0) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to close sock %d from _handle_connected_sctp_socket: %s", connect_sock, strerror(errno)); return; } info->close_sock = 0; if (_create_connect_socket(knet_h, kn_link) < 0) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to recreate connecting sock! %s", strerror(errno)); return; } } _reconnect_socket(knet_h, info->link); err = getsockopt(connect_sock, SOL_SOCKET, SO_ERROR, &status, &len); if (err) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP getsockopt() on connecting socket %d failed: %s", connect_sock, strerror(errno)); return; } if (status) { log_info(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP connect on %d to %s port %s failed: %s", connect_sock, kn_link->status.dst_ipaddr, kn_link->status.dst_port, strerror(status)); /* * No need to create a new socket if connect failed, * just retry connect */ return; } log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP handler fd %d now connected to %s port %s", connect_sock, kn_link->status.dst_ipaddr, kn_link->status.dst_port); } static void _handle_connected_sctp_notifications(knet_handle_t knet_h) { int sockfd = -1; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; if (recv(handle_info->connectsockfd[0], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Short read on connectsockfd"); return; } if (_is_valid_fd(knet_h, sockfd) < 1) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for connected socket fd error"); return; } /* * revalidate sockfd */ if ((sockfd < 0) || (sockfd >= KNET_MAX_FDS)) { return; } log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Processing connected error on socket: %d", sockfd); _handle_connected_sctp_socket(knet_h, sockfd); } static void *_sctp_connect_thread(void *data) { int savederrno; int i, nev; knet_handle_t knet_h = (knet_handle_t) data; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_STARTED); memset(&events, 0, sizeof(events)); while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(handle_info->connect_epollfd, events, KNET_EPOLL_MAX_EVENTS, knet_h->threads_timer_res / 1000); /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { continue; } if (nev < 0) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP connect handler EPOLL ERROR: %s", strerror(errno)); continue; } /* * Sort out which FD has a connection */ savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get write lock: %s", strerror(savederrno)); continue; } /* * minor optimization: deduplicate events * * in some cases we can receive multiple notifcations * of the same FD having issues or need handling. * It's enough to process it once even tho it's safe * to handle them multiple times. */ for (i = 0; i < nev; i++) { if (events[i].data.fd == handle_info->connectsockfd[0]) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received notification from rx_error for connected socket"); _handle_connected_sctp_notifications(knet_h); } else { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification on connected sockfd %d\n", events[i].data.fd); } } pthread_rwlock_unlock(&knet_h->global_rwlock); /* * this thread can generate events for itself. * we need to sleep in between loops to allow other threads * to be scheduled */ usleep(knet_h->reconnect_int * 1000); } set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_STOPPED); return NULL; } /* * listen/incoming connections management thread */ /* * Listener received a new connection * called with a write lock from main thread */ static void _handle_incoming_sctp(knet_handle_t knet_h, int listen_sock) { int err = 0, savederrno = 0; int new_fd; int i = -1; sctp_listen_link_info_t *info = knet_h->knet_transport_fd_tracker[listen_sock].data; struct epoll_event ev; struct sockaddr_storage ss; socklen_t sock_len = sizeof(ss); char addr_str[KNET_MAX_HOST_LEN]; char port_str[KNET_MAX_PORT_LEN]; sctp_accepted_link_info_t *accept_info = NULL; struct knet_host *host; struct knet_link *kn_link; int link_idx; sctp_connect_link_info_t *this_link_connect_info; sctp_listen_link_info_t *this_link_listen_info; int pass_acl = 0; memset(&ss, 0, sizeof(ss)); new_fd = accept(listen_sock, (struct sockaddr *)&ss, &sock_len); if (new_fd < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: accept error: %s", strerror(errno)); goto exit_error; } if (knet_addrtostr(&ss, sizeof(ss), addr_str, KNET_MAX_HOST_LEN, port_str, KNET_MAX_PORT_LEN) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: unable to gather socket info"); goto exit_error; } log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: received connection from: %s port: %s", addr_str, port_str); if (knet_h->use_access_lists) { for (host = knet_h->host_head; host != NULL; host = host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { kn_link = &host->link[link_idx]; if ((kn_link->configured) && (kn_link->transport == KNET_TRANSPORT_SCTP)) { this_link_connect_info = kn_link->transport_link; this_link_listen_info = this_link_connect_info->listener; if ((this_link_listen_info->listen_sock == listen_sock) && (check_validate(knet_h, kn_link, &ss))) { pass_acl = 1; break; } } } if (pass_acl) { break; } } if (!pass_acl) { savederrno = EINVAL; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Connection rejected from %s/%s", addr_str, port_str); close(new_fd); errno = savederrno; return; } } /* * Keep a track of all accepted FDs */ for (i=0; iaccepted_socks[i] == -1) { info->accepted_socks[i] = new_fd; break; } } if (i == MAX_ACCEPTED_SOCKS) { errno = EBUSY; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: too many connections!"); goto exit_error; } if (_configure_common_socket(knet_h, new_fd, 0, "SCTP incoming") < 0) { /* Inherit flags from listener? */ savederrno = errno; err = -1; goto exit_error; } if (_enable_sctp_notifications(knet_h, new_fd, "Incoming connection") < 0) { savederrno = errno; err = -1; goto exit_error; } accept_info = malloc(sizeof(sctp_accepted_link_info_t)); if (!accept_info) { savederrno = errno; err = -1; goto exit_error; } memset(accept_info, 0, sizeof(sctp_accepted_link_info_t)); accept_info->link_info = info; if (_set_fd_tracker(knet_h, new_fd, KNET_TRANSPORT_SCTP, SCTP_ACCEPTED_LINK_INFO, knet_h->knet_transport_fd_tracker[listen_sock].sockaddr_len, accept_info) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(errno)); goto exit_error; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = new_fd; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, new_fd, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: unable to add accepted socket %d to epoll pool: %s", new_fd, strerror(errno)); goto exit_error; } info->on_rx_epoll = 1; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: accepted new fd %d for %s/%s (listen fd: %d). index: %d", new_fd, addr_str, port_str, info->listen_sock, i); exit_error: if (err) { if ((i >= 0) && (i < MAX_ACCEPTED_SOCKS)) { info->accepted_socks[i] = -1; } /* * check the error to make coverity scan happy. * _set_fd_tracker cannot fail at this stage */ if (_set_fd_tracker(knet_h, new_fd, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0){ log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to update fdtracker for socket %d", new_fd); } free(accept_info); if (new_fd >= 0) { close(new_fd); } } errno = savederrno; return; } /* * Listen thread received a notification of a bad socket that needs closing * called with a write lock from main thread */ static void _handle_listen_sctp_errors(knet_handle_t knet_h) { int sockfd = -1; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; sctp_accepted_link_info_t *accept_info; sctp_listen_link_info_t *info; struct knet_host *host; int link_idx; int i; if (recv(handle_info->listensockfd[0], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Short read on listensockfd"); return; } if (_is_valid_fd(knet_h, sockfd) < 1) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for listen socket fd error"); return; } /* * revalidate sockfd */ if ((sockfd < 0) || (sockfd >= KNET_MAX_FDS)) { return; } log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Processing listen error on socket: %d", sockfd); accept_info = knet_h->knet_transport_fd_tracker[sockfd].data; info = accept_info->link_info; /* * clear all links using this accepted socket as * outbound dynamically connected socket */ for (host = knet_h->host_head; host != NULL; host = host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if ((host->link[link_idx].dynamic == KNET_LINK_DYNIP) && (host->link[link_idx].outsock == sockfd)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Found dynamic connection on host %d link %d (%d)", host->host_id, link_idx, sockfd); host->link[link_idx].status.dynconnected = 0; host->link[link_idx].transport_connected = 0; host->link[link_idx].outsock = 0; memset(&host->link[link_idx].dst_addr, 0, sizeof(struct sockaddr_storage)); } } } for (i=0; iaccepted_socks[i]) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Closing accepted socket %d", sockfd); /* * check the error to make coverity scan happy. * _set_fd_tracker cannot fail at this stage */ if (_set_fd_tracker(knet_h, sockfd, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to update fdtracker for socket %d", sockfd); } info->accepted_socks[i] = -1; free(accept_info); close(sockfd); break; /* Keeps covscan happy */ } } } static void *_sctp_listen_thread(void *data) { int savederrno; int i, nev; knet_handle_t knet_h = (knet_handle_t) data; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_STARTED); memset(&events, 0, sizeof(events)); while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(handle_info->listen_epollfd, events, KNET_EPOLL_MAX_EVENTS, knet_h->threads_timer_res / 1000); /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { continue; } if (nev < 0) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP listen handler EPOLL ERROR: %s", strerror(errno)); continue; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get write lock: %s", strerror(savederrno)); continue; } /* * Sort out which FD has an incoming connection */ for (i = 0; i < nev; i++) { if (events[i].data.fd == handle_info->listensockfd[0]) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received notification from rx_error for listener/accepted socket"); _handle_listen_sctp_errors(knet_h); } else { if (_is_valid_fd(knet_h, events[i].data.fd) == 1) { _handle_incoming_sctp(knet_h, events[i].data.fd); } else { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received listen notification from invalid socket"); } } } pthread_rwlock_unlock(&knet_h->global_rwlock); } set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_STOPPED); return NULL; } /* * sctp_link_listener_start/stop are called in global write lock * context from set_config and clear_config. */ static sctp_listen_link_info_t *sctp_link_listener_start(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; int listen_sock = -1; struct epoll_event ev; sctp_listen_link_info_t *info = NULL; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; /* * Only allocate a new listener if src address is different */ qb_list_for_each_entry(info, &handle_info->listen_links_list, list) { if (memcmp(&info->src_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)) == 0) { if ((check_add(knet_h, kn_link, -1, &kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) { return NULL; } return info; } } info = malloc(sizeof(sctp_listen_link_info_t)); if (!info) { err = -1; goto exit_error; } memset(info, 0, sizeof(sctp_listen_link_info_t)); memset(info->accepted_socks, -1, sizeof(info->accepted_socks)); memmove(&info->src_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)); listen_sock = socket(kn_link->src_addr.ss_family, SOCK_STREAM, IPPROTO_SCTP); if (listen_sock < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create listener socket: %s", strerror(savederrno)); goto exit_error; } if (_configure_sctp_socket(knet_h, listen_sock, &kn_link->src_addr, kn_link->flags, "SCTP listener") < 0) { savederrno = errno; err = -1; goto exit_error; } if (bind(listen_sock, (struct sockaddr *)&kn_link->src_addr, sockaddr_len(&kn_link->src_addr)) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to bind listener socket: %s", strerror(savederrno)); goto exit_error; } if (listen(listen_sock, 5) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to listen on listener socket: %s", strerror(savederrno)); goto exit_error; } if (_set_fd_tracker(knet_h, listen_sock, KNET_TRANSPORT_SCTP, SCTP_LISTENER_LINK_INFO, sockaddr_len(&kn_link->src_addr), info) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } if ((check_add(knet_h, kn_link, -1, &kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to configure default access lists: %s", strerror(savederrno)); goto exit_error; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = listen_sock; if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_ADD, listen_sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add listener to epoll pool: %s", strerror(savederrno)); goto exit_error; } info->on_listener_epoll = 1; info->listen_sock = listen_sock; qb_list_add(&info->list, &handle_info->listen_links_list); log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Listening on fd %d for %s:%s", listen_sock, kn_link->status.src_ipaddr, kn_link->status.src_port); exit_error: if (err) { if ((info) && (info->on_listener_epoll)) { epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, listen_sock, &ev); } if (listen_sock >= 0) { check_rmall(knet_h, kn_link); close(listen_sock); } if (info) { free(info); info = NULL; } } errno = savederrno; return info; } static int sctp_link_listener_stop(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; int found = 0, i; struct knet_host *host; int link_idx; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; sctp_connect_link_info_t *this_link_info = kn_link->transport_link; sctp_listen_link_info_t *info = this_link_info->listener; sctp_connect_link_info_t *link_info; struct epoll_event ev; for (host = knet_h->host_head; host != NULL; host = host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if (&host->link[link_idx] == kn_link) continue; link_info = host->link[link_idx].transport_link; if ((link_info) && (link_info->listener == info)) { found = 1; break; } } } if ((check_rm(knet_h, kn_link, &kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != ENOENT)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove default access lists for %d", info->listen_sock); } if (found) { this_link_info->listener = NULL; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP listener socket %d still in use", info->listen_sock); savederrno = EBUSY; err = -1; goto exit_error; } if (info->on_listener_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = info->listen_sock; if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, info->listen_sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove listener to epoll pool: %s", strerror(savederrno)); goto exit_error; } info->on_listener_epoll = 0; } if (_set_fd_tracker(knet_h, info->listen_sock, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } check_rmall(knet_h, kn_link); close(info->listen_sock); for (i=0; i< MAX_ACCEPTED_SOCKS; i++) { if (info->accepted_socks[i] > -1) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = info->accepted_socks[i]; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->accepted_socks[i], &ev)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove EOFed socket from epoll pool: %s", strerror(errno)); } info->on_rx_epoll = 0; free(knet_h->knet_transport_fd_tracker[info->accepted_socks[i]].data); close(info->accepted_socks[i]); if (_set_fd_tracker(knet_h, info->accepted_socks[i], KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } info->accepted_socks[i] = -1; } } qb_list_del(&info->list); free(info); this_link_info->listener = NULL; exit_error: errno = savederrno; return err; } /* * Links config/clear. Both called with global wrlock from link_set_config/clear_config */ int sctp_transport_link_set_config(knet_handle_t knet_h, struct knet_link *kn_link) { int savederrno = 0, err = 0; sctp_connect_link_info_t *info; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; info = malloc(sizeof(sctp_connect_link_info_t)); if (!info) { goto exit_error; } memset(info, 0, sizeof(sctp_connect_link_info_t)); kn_link->transport_link = info; info->link = kn_link; memmove(&info->dst_address, &kn_link->dst_addr, sizeof(struct sockaddr_storage)); info->connect_sock = -1; info->listener = sctp_link_listener_start(knet_h, kn_link); if (!info->listener) { savederrno = errno; err = -1; goto exit_error; } if (kn_link->dynamic == KNET_LINK_STATIC) { if (_create_connect_socket(knet_h, kn_link) < 0) { savederrno = errno; err = -1; goto exit_error; } kn_link->outsock = info->connect_sock; } qb_list_add(&info->list, &handle_info->connect_links_list); exit_error: if (err) { if (info) { if (info->connect_sock >= 0) { close(info->connect_sock); } if (info->listener) { sctp_link_listener_stop(knet_h, kn_link); } kn_link->transport_link = NULL; free(info); } } errno = savederrno; return err; } /* * called with global wrlock */ int sctp_transport_link_clear_config(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; sctp_connect_link_info_t *info; if (!kn_link) { errno = EINVAL; return -1; } info = kn_link->transport_link; if (!info) { errno = EINVAL; return -1; } if ((sctp_link_listener_stop(knet_h, kn_link) <0) && (errno != EBUSY)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove listener transport: %s", strerror(savederrno)); goto exit_error; } if (_close_connect_socket(knet_h, kn_link) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to close connected socket: %s", strerror(savederrno)); goto exit_error; } qb_list_del(&info->list); free(info); kn_link->transport_link = NULL; exit_error: errno = savederrno; return err; } /* * transport_free and transport_init are * called only from knet_handle_new and knet_handle_free. * all resources (hosts/links) should have been already freed at this point * and they are called in a write locked context, hence they * don't need their own locking. */ int sctp_transport_free(knet_handle_t knet_h) { sctp_handle_info_t *handle_info; void *thread_status; struct epoll_event ev; if (!knet_h->transports[KNET_TRANSPORT_SCTP]) { errno = EINVAL; return -1; } handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; /* * keep it here while we debug list usage and such */ if (!qb_list_empty(&handle_info->listen_links_list)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Internal error. listen links list is not empty"); } if (!qb_list_empty(&handle_info->connect_links_list)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Internal error. connect links list is not empty"); } if (handle_info->listen_thread) { pthread_cancel(handle_info->listen_thread); pthread_join(handle_info->listen_thread, &thread_status); } if (handle_info->connect_thread) { pthread_cancel(handle_info->connect_thread); pthread_join(handle_info->connect_thread, &thread_status); } if (handle_info->listensockfd[0] >= 0) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = handle_info->listensockfd[0]; epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, handle_info->listensockfd[0], &ev); } if (handle_info->connectsockfd[0] >= 0) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = handle_info->connectsockfd[0]; epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_DEL, handle_info->connectsockfd[0], &ev); } _close_socketpair(knet_h, handle_info->connectsockfd); _close_socketpair(knet_h, handle_info->listensockfd); if (handle_info->listen_epollfd >= 0) { close(handle_info->listen_epollfd); } if (handle_info->connect_epollfd >= 0) { close(handle_info->connect_epollfd); } free(handle_info->event_subscribe_buffer); free(handle_info); knet_h->transports[KNET_TRANSPORT_SCTP] = NULL; return 0; } static int _sctp_subscribe_init(knet_handle_t knet_h) { int test_socket, savederrno; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; char dummy_events[100]; struct sctp_event_subscribe *events; /* Below we set the first 6 fields of this expanding struct. * SCTP_EVENTS is deprecated, but SCTP_EVENT is not available * on Linux; on the other hand, FreeBSD and old Linux does not * accept small transfers, so we can't simply use this minimum * everywhere. Thus we query and store the native size. */ const unsigned int subscribe_min = 6; test_socket = socket(PF_INET, SOCK_STREAM, IPPROTO_SCTP); if (test_socket < 0) { if (errno == EPROTONOSUPPORT) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP not supported, skipping initialization"); return 0; } savederrno = errno; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create test socket: %s", strerror(savederrno)); return savederrno; } handle_info->event_subscribe_kernel_size = sizeof dummy_events; if (getsockopt(test_socket, IPPROTO_SCTP, SCTP_EVENTS, &dummy_events, &handle_info->event_subscribe_kernel_size)) { close(test_socket); savederrno = errno; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to query kernel size of struct sctp_event_subscribe: %s", strerror(savederrno)); return savederrno; } close(test_socket); if (handle_info->event_subscribe_kernel_size < subscribe_min) { savederrno = ERANGE; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "No kernel support for the necessary notifications: struct sctp_event_subscribe is %u bytes, %u needed", handle_info->event_subscribe_kernel_size, subscribe_min); return savederrno; } events = malloc(handle_info->event_subscribe_kernel_size); if (!events) { savederrno = errno; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Failed to allocate event subscribe buffer: %s", strerror(savederrno)); return savederrno; } memset(events, 0, handle_info->event_subscribe_kernel_size); events->sctp_data_io_event = 1; events->sctp_association_event = 1; events->sctp_address_event = 1; events->sctp_send_failure_event = 1; events->sctp_peer_error_event = 1; events->sctp_shutdown_event = 1; handle_info->event_subscribe_buffer = (char *)events; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Size of struct sctp_event_subscribe is %u in kernel, %zu in user space", handle_info->event_subscribe_kernel_size, sizeof(struct sctp_event_subscribe)); return 0; } int sctp_transport_init(knet_handle_t knet_h) { int err = 0, savederrno = 0; sctp_handle_info_t *handle_info; struct epoll_event ev; if (knet_h->transports[KNET_TRANSPORT_SCTP]) { errno = EEXIST; return -1; } handle_info = malloc(sizeof(sctp_handle_info_t)); if (!handle_info) { return -1; } memset(handle_info, 0,sizeof(sctp_handle_info_t)); knet_h->transports[KNET_TRANSPORT_SCTP] = handle_info; savederrno = _sctp_subscribe_init(knet_h); if (savederrno) { err = -1; goto exit_fail; } qb_list_init(&handle_info->listen_links_list); qb_list_init(&handle_info->connect_links_list); handle_info->listen_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS + 1); if (handle_info->listen_epollfd < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create epoll listen fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(handle_info->listen_epollfd)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set CLOEXEC on listen_epollfd: %s", strerror(savederrno)); goto exit_fail; } handle_info->connect_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS + 1); if (handle_info->connect_epollfd < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create epoll connect fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(handle_info->connect_epollfd)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set CLOEXEC on connect_epollfd: %s", strerror(savederrno)); goto exit_fail; } if (_init_socketpair(knet_h, handle_info->connectsockfd) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to init connect socketpair: %s", strerror(savederrno)); goto exit_fail; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = handle_info->connectsockfd[0]; if (epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_ADD, handle_info->connectsockfd[0], &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add connectsockfd[0] to connect epoll pool: %s", strerror(savederrno)); goto exit_fail; } if (_init_socketpair(knet_h, handle_info->listensockfd) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to init listen socketpair: %s", strerror(savederrno)); goto exit_fail; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = handle_info->listensockfd[0]; if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_ADD, handle_info->listensockfd[0], &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add listensockfd[0] to listen epoll pool: %s", strerror(savederrno)); goto exit_fail; } /* * Start connect & listener threads */ set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_REGISTERED); savederrno = pthread_create(&handle_info->listen_thread, 0, _sctp_listen_thread, (void *) knet_h); if (savederrno) { err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to start sctp listen thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_REGISTERED); savederrno = pthread_create(&handle_info->connect_thread, 0, _sctp_connect_thread, (void *) knet_h); if (savederrno) { err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to start sctp connect thread: %s", strerror(savederrno)); goto exit_fail; } exit_fail: if (err < 0) { sctp_transport_free(knet_h); } errno = savederrno; return err; } int sctp_transport_link_dyn_connect(knet_handle_t knet_h, int sockfd, struct knet_link *kn_link) { kn_link->outsock = sockfd; kn_link->status.dynconnected = 1; kn_link->transport_connected = 1; return 0; } #endif diff --git a/libknet/transport_udp.c b/libknet/transport_udp.c index bb4b800a..61ce3325 100644 --- a/libknet/transport_udp.c +++ b/libknet/transport_udp.c @@ -1,486 +1,482 @@ /* * Copyright (C) 2016-2023 Red Hat, Inc. All rights reserved. * * Author: Christine Caulfield * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include #if defined (IP_RECVERR) || defined (IPV6_RECVERR) #include #endif #include "libknet.h" #include "compat.h" #include "host.h" #include "link.h" #include "logging.h" #include "common.h" #include "netutils.h" #include "transport_common.h" #include "transport_udp.h" #include "transports.h" #include "threads_common.h" typedef struct udp_handle_info { struct qb_list_head links_list; } udp_handle_info_t; typedef struct udp_link_info { struct qb_list_head list; struct sockaddr_storage local_address; int socket_fd; int on_epoll; } udp_link_info_t; int udp_transport_link_set_config(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; int sock = -1; struct epoll_event ev; udp_link_info_t *info; udp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_UDP]; #if defined (IP_RECVERR) || defined (IPV6_RECVERR) int value; #endif /* * Only allocate a new link if the local address is different */ qb_list_for_each_entry(info, &handle_info->links_list, list) { if (memcmp(&info->local_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)) == 0) { log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Re-using existing UDP socket for new link"); kn_link->outsock = info->socket_fd; kn_link->transport_link = info; kn_link->transport_connected = 1; return 0; } } info = malloc(sizeof(udp_link_info_t)); if (!info) { err = -1; goto exit_error; } memset(info, 0, sizeof(udp_link_info_t)); sock = socket(kn_link->src_addr.ss_family, SOCK_DGRAM, 0); if (sock < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to create listener socket: %s", strerror(savederrno)); goto exit_error; } if (_configure_transport_socket(knet_h, sock, &kn_link->src_addr, kn_link->flags, "UDP") < 0) { savederrno = errno; err = -1; goto exit_error; } #ifdef IP_RECVERR if (kn_link->src_addr.ss_family == AF_INET) { value = 1; if (setsockopt(sock, SOL_IP, IP_RECVERR, &value, sizeof(value)) <0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set RECVERR on socket: %s", strerror(savederrno)); goto exit_error; } log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IP_RECVERR enabled on socket: %i", sock); } #else log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IP_RECVERR not available in this build/platform"); #endif #ifdef IPV6_RECVERR if (kn_link->src_addr.ss_family == AF_INET6) { value = 1; if (setsockopt(sock, SOL_IPV6, IPV6_RECVERR, &value, sizeof(value)) <0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set RECVERR on socket: %s", strerror(savederrno)); goto exit_error; } log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IPV6_RECVERR enabled on socket: %i", sock); } #else log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IPV6_RECVERR not available in this build/platform"); #endif if (bind(sock, (struct sockaddr *)&kn_link->src_addr, sockaddr_len(&kn_link->src_addr))) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to bind listener socket: %s", strerror(savederrno)); goto exit_error; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = sock; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to add listener to epoll pool: %s", strerror(savederrno)); goto exit_error; } info->on_epoll = 1; if (_set_fd_tracker(knet_h, sock, KNET_TRANSPORT_UDP, 0, sockaddr_len(&kn_link->src_addr), info) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } memmove(&info->local_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)); info->socket_fd = sock; qb_list_add(&info->list, &handle_info->links_list); kn_link->outsock = sock; kn_link->transport_link = info; kn_link->transport_connected = 1; exit_error: if (err) { if (info) { if (info->on_epoll) { epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, sock, &ev); } free(info); } if (sock >= 0) { close(sock); } } errno = savederrno; return err; } int udp_transport_link_clear_config(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; int found = 0; struct knet_host *host; int link_idx; udp_link_info_t *info = kn_link->transport_link; struct epoll_event ev; for (host = knet_h->host_head; host != NULL; host = host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if (&host->link[link_idx] == kn_link) continue; if (host->link[link_idx].transport_link == info) { found = 1; break; } } } if (found) { log_debug(knet_h, KNET_SUB_TRANSP_UDP, "UDP socket %d still in use", info->socket_fd); savederrno = EBUSY; err = -1; goto exit_error; } if (info->on_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = info->socket_fd; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->socket_fd, &ev) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to remove UDP socket from epoll poll: %s", strerror(errno)); goto exit_error; } info->on_epoll = 0; } if (_set_fd_tracker(knet_h, info->socket_fd, KNET_MAX_TRANSPORTS, 0, sockaddr_len(&kn_link->src_addr), NULL) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } close(info->socket_fd); qb_list_del(&info->list); free(kn_link->transport_link); exit_error: errno = savederrno; return err; } int udp_transport_free(knet_handle_t knet_h) { udp_handle_info_t *handle_info; if (!knet_h->transports[KNET_TRANSPORT_UDP]) { errno = EINVAL; return -1; } handle_info = knet_h->transports[KNET_TRANSPORT_UDP]; /* * keep it here while we debug list usage and such */ if (!qb_list_empty(&handle_info->links_list)) { log_err(knet_h, KNET_SUB_TRANSP_UDP, "Internal error. handle list is not empty"); return -1; } free(handle_info); knet_h->transports[KNET_TRANSPORT_UDP] = NULL; return 0; } int udp_transport_init(knet_handle_t knet_h) { udp_handle_info_t *handle_info; if (knet_h->transports[KNET_TRANSPORT_UDP]) { errno = EEXIST; return -1; } handle_info = malloc(sizeof(udp_handle_info_t)); if (!handle_info) { return -1; } memset(handle_info, 0, sizeof(udp_handle_info_t)); knet_h->transports[KNET_TRANSPORT_UDP] = handle_info; qb_list_init(&handle_info->links_list); return 0; } #if defined (IP_RECVERR) || defined (IPV6_RECVERR) static int read_errs_from_sock(knet_handle_t knet_h, int sockfd) { int err = 0, savederrno = 0; int got_err = 0; char buffer[1024]; struct iovec iov; struct msghdr msg; struct cmsghdr *cmsg; struct sock_extended_err *sock_err; struct icmphdr icmph; struct sockaddr_storage remote; struct sockaddr_storage *origin; char addr_str[KNET_MAX_HOST_LEN]; char port_str[KNET_MAX_PORT_LEN]; char addr_remote_str[KNET_MAX_HOST_LEN]; char port_remote_str[KNET_MAX_PORT_LEN]; iov.iov_base = &icmph; iov.iov_len = sizeof(icmph); msg.msg_name = (void*)&remote; msg.msg_namelen = sizeof(remote); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_flags = 0; msg.msg_control = buffer; msg.msg_controllen = sizeof(buffer); for (;;) { err = recvmsg(sockfd, &msg, MSG_ERRQUEUE); savederrno = errno; if (err < 0) { if (!got_err) { errno = savederrno; return -1; } else { return 0; } } got_err = 1; for (cmsg = CMSG_FIRSTHDR(&msg);cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (((cmsg->cmsg_level == SOL_IP) && (cmsg->cmsg_type == IP_RECVERR)) || ((cmsg->cmsg_level == SOL_IPV6 && (cmsg->cmsg_type == IPV6_RECVERR)))) { sock_err = (struct sock_extended_err*)(void *)CMSG_DATA(cmsg); if (sock_err) { switch (sock_err->ee_origin) { case SO_EE_ORIGIN_NONE: /* no origin */ case SO_EE_ORIGIN_LOCAL: /* local source (EMSGSIZE) */ if (sock_err->ee_errno == EMSGSIZE || sock_err->ee_errno == EPERM) { if (pthread_mutex_lock(&knet_h->kmtu_mutex) != 0) { log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Unable to get mutex lock"); knet_h->kernel_mtu = 0; break; } else { knet_h->kernel_mtu = sock_err->ee_info; log_debug(knet_h, KNET_SUB_TRANSP_UDP, "detected kernel MTU: %u", knet_h->kernel_mtu); pthread_mutex_unlock(&knet_h->kmtu_mutex); } force_pmtud_run(knet_h, KNET_SUB_TRANSP_UDP, 0, 0); } /* * those errors are way too noisy */ break; case SO_EE_ORIGIN_ICMP: /* ICMP */ case SO_EE_ORIGIN_ICMP6: /* ICMP6 */ origin = (struct sockaddr_storage *)(void *)SO_EE_OFFENDER(sock_err); if (knet_addrtostr(origin, sizeof(*origin), addr_str, KNET_MAX_HOST_LEN, port_str, KNET_MAX_PORT_LEN) < 0) { log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from unknown source: %s", strerror(sock_err->ee_errno)); } else { if (knet_addrtostr(&remote, sizeof(remote), addr_remote_str, KNET_MAX_HOST_LEN, port_remote_str, KNET_MAX_PORT_LEN) < 0) { log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from %s: %s destination unknown", addr_str, strerror(sock_err->ee_errno)); } else { log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from %s: %s %s", addr_str, strerror(sock_err->ee_errno), addr_remote_str); if ((sock_err->ee_errno == ECONNREFUSED) || /* knet is not running on the other node */ (sock_err->ee_errno == ECONNABORTED) || /* local kernel closed the socket */ (sock_err->ee_errno == ENONET) || /* network does not exist */ (sock_err->ee_errno == ENETUNREACH) || /* network unreachable */ (sock_err->ee_errno == EHOSTUNREACH) || /* host unreachable */ (sock_err->ee_errno == EHOSTDOWN) || /* host down (from kernel/net/ipv4/icmp.c */ (sock_err->ee_errno == ENETDOWN)) { /* network down */ struct knet_host *host = NULL; struct knet_link *kn_link = NULL; int link_idx, found = 0; for (host = knet_h->host_head; host != NULL; host = host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { kn_link = &host->link[link_idx]; if (kn_link->outsock == sockfd) { if (!cmpaddr(&remote, &kn_link->dst_addr)) { found = 1; break; } } } if (found) { break; } } if ((host) && (kn_link) && (kn_link->status.connected)) { log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Setting down host %u link %i", host->host_id, kn_link->link_id); /* * setting transport_connected = 0 will trigger * thread_heartbeat link_down process. * * the process terminates calling into transport_link_down * below that will set transport_connected = 1 */ kn_link->transport_connected = 0; } } } } break; } } else { log_debug(knet_h, KNET_SUB_TRANSP_UDP, "No data in MSG_ERRQUEUE"); } } } } } #else static int read_errs_from_sock(knet_handle_t knet_h, int sockfd) { return 0; } #endif transport_sock_error_t udp_transport_rx_sock_error(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno) { if (recv_errno == EAGAIN) { read_errs_from_sock(knet_h, sockfd); } return KNET_TRANSPORT_SOCK_ERROR_IGNORE; } transport_sock_error_t udp_transport_tx_sock_error(knet_handle_t knet_h, int sockfd, int subsys, int recv_err, int recv_errno) { if (recv_err < 0) { - log_trace(knet_h, KNET_SUB_TRANSP_UDP, "tx_sock_error, subsys=%d, recv_err=%d, recv_errno=%d", subsys, recv_err, recv_errno); + log_trace(knet_h, KNET_SUB_TRANSP_UDP, "tx_sock_error, subsys=%d, recv_err=%d: %s", subsys, recv_err, strerror(recv_errno)); if ((recv_errno == EMSGSIZE) || ((recv_errno == EPERM) && ((subsys == KNET_SUB_TX) || (subsys == KNET_SUB_PMTUD)))) { read_errs_from_sock(knet_h, sockfd); return KNET_TRANSPORT_SOCK_ERROR_IGNORE; } if ((recv_errno == EINVAL) || (recv_errno == EPERM) || (recv_errno == ENETUNREACH) || (recv_errno == ENETDOWN) || (recv_errno == EHOSTUNREACH)) { -#ifdef DEBUG if ((recv_errno == ENETUNREACH) || (recv_errno == ENETDOWN)) { - log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Sock: %d is unreachable.", sockfd); + log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Sock: %d is unreachable.", sockfd); } -#endif return KNET_TRANSPORT_SOCK_ERROR_INTERNAL; } if ((recv_errno == ENOBUFS) || (recv_errno == EAGAIN)) { -#ifdef DEBUG - log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Sock: %d is overloaded. Slowing TX down", sockfd); -#endif + log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Sock: %d is overloaded. Slowing TX down", sockfd); usleep(knet_h->threads_timer_res / 16); } else { read_errs_from_sock(knet_h, sockfd); } return KNET_TRANSPORT_SOCK_ERROR_RETRY; } return KNET_TRANSPORT_SOCK_ERROR_IGNORE; } transport_rx_isdata_t udp_transport_rx_is_data(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg) { if (msg->msg_len == 0) return KNET_TRANSPORT_RX_NOT_DATA_CONTINUE; return KNET_TRANSPORT_RX_IS_DATA; } int udp_transport_link_dyn_connect(knet_handle_t knet_h, int sockfd, struct knet_link *kn_link) { kn_link->status.dynconnected = 1; return 0; } int udp_transport_link_is_down(knet_handle_t knet_h, struct knet_link *kn_link) { /* * see comments about handling ICMP error messages */ kn_link->transport_connected = 1; return 0; }