Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/libknet/internals.h b/libknet/internals.h
index 7f689124..116623b2 100644
--- a/libknet/internals.h
+++ b/libknet/internals.h
@@ -1,495 +1,496 @@
/*
* Copyright (C) 2010-2024 Red Hat, Inc. All rights reserved.
*
* Authors: Fabio M. Di Nitto <fabbione@kronosnet.org>
* Federico Simoncelli <fsimon@kronosnet.org>
*
* This software licensed under LGPL-2.0+
*/
#ifndef __KNET_INTERNALS_H__
#define __KNET_INTERNALS_H__
/*
* NOTE: you shouldn't need to include this header normally
*/
#include <pthread.h>
#include <stddef.h>
#include <qb/qblist.h>
#include "libknet.h"
#include "onwire.h"
#include "compat.h"
#include "threads_common.h"
#define KNET_DATABUFSIZE KNET_MAX_PACKET_SIZE + KNET_HEADER_ALL_SIZE
#define KNET_DATABUFSIZE_CRYPT_PAD 1024
#define KNET_DATABUFSIZE_CRYPT KNET_DATABUFSIZE + KNET_DATABUFSIZE_CRYPT_PAD
#define KNET_DATABUFSIZE_COMPRESS_PAD 1024
#define KNET_DATABUFSIZE_COMPRESS KNET_DATABUFSIZE + KNET_DATABUFSIZE_COMPRESS_PAD
#define KNET_RING_RCVBUFF 8388608
#define PCKT_FRAG_MAX UINT8_MAX
#define PCKT_RX_BUFS 512
#define KNET_EPOLL_MAX_EVENTS KNET_DATAFD_MAX + 1
/*
* Size of threads stack. Value is choosen by experimenting, how much is needed
* to sucesfully finish test suite, and at the time of writing patch it was
* ~300KiB. To have some room for future enhancement it is increased
* by factor of 3 and rounded.
*/
#define KNET_THREAD_STACK_SIZE (1024 * 1024)
typedef void *knet_transport_link_t; /* per link transport handle */
typedef void *knet_transport_t; /* per knet_h transport handle */
struct knet_transport_ops; /* Forward because of circular dependancy */
struct knet_mmsghdr {
struct msghdr msg_hdr; /* Message header */
unsigned int msg_len; /* Number of bytes transmitted */
};
struct knet_link {
/* required */
struct sockaddr_storage src_addr;
struct sockaddr_storage dst_addr;
/* configurable */
unsigned int dynamic; /* see KNET_LINK_DYN_ define above */
uint8_t priority; /* higher priority == preferred for A/P */
unsigned long long ping_interval; /* interval */
unsigned long long pong_timeout; /* timeout */
unsigned long long pong_timeout_adj; /* timeout adjusted for latency */
uint8_t pong_timeout_backoff; /* see link.h for definition */
unsigned int latency_max_samples; /* precision */
uint8_t pong_count; /* how many ping/pong to send/receive before link is up */
uint64_t flags;
void *access_list_match_entry_head; /* pointer to access list match_entry list head */
/* status */
struct knet_link_status status;
/* internals */
pthread_mutex_t link_stats_mutex; /* used to update link stats */
uint8_t link_id;
uint8_t transport; /* #defined constant from API */
knet_transport_link_t transport_link; /* link_info_t from transport */
int outsock;
unsigned int configured:1; /* set to 1 if src/dst have been configured transport initialized on this link*/
unsigned int transport_connected:1; /* set to 1 if lower level transport is connected */
uint8_t received_pong;
struct timespec ping_last;
/* used by PMTUD thread as temp per-link variables and should always contain the onwire_len value! */
uint32_t proto_overhead; /* IP + UDP/SCTP overhead. NOT to be confused
with stats.proto_overhead that includes also knet headers
and crypto headers */
struct timespec pmtud_last;
uint32_t last_ping_size;
uint32_t last_good_mtu;
uint32_t last_bad_mtu;
uint32_t last_sent_mtu;
uint32_t last_recv_mtu;
uint32_t pmtud_crypto_timeout_multiplier;/* used by PMTUd to adjust timeouts on high loads */
uint8_t has_valid_mtu;
};
#define KNET_CBUFFER_SIZE 4096
struct knet_host_defrag_buf {
char buf[KNET_DATABUFSIZE];
uint8_t in_use; /* 0 buffer is free, 1 is in use */
seq_num_t pckt_seq; /* identify the pckt we are receiving */
uint8_t frag_recv; /* how many frags did we receive */
uint8_t frag_map[PCKT_FRAG_MAX];/* bitmap of what we received? */
uint8_t last_first; /* special case if we receive the last fragment first */
ssize_t frag_size; /* normal frag size (not the last one) */
ssize_t last_frag_size; /* the last fragment might not be aligned with MTU size */
struct timespec last_update; /* keep time of the last pckt */
};
struct knet_host {
/* required */
knet_node_id_t host_id;
/* configurable */
uint8_t link_handler_policy;
char name[KNET_MAX_HOST_LEN];
/* status */
struct knet_host_status status;
/*
* onwire info
*/
uint8_t onwire_ver; /* node current onwire version */
uint8_t onwire_max_ver; /* node supports up to this version */
/* internals */
char circular_buffer[KNET_CBUFFER_SIZE];
seq_num_t rx_seq_num;
seq_num_t untimed_rx_seq_num;
seq_num_t timed_rx_seq_num;
uint8_t got_data;
/* defrag/reassembly buffers */
struct knet_host_defrag_buf *defrag_bufs;
uint16_t allocated_defrag_bufs;
/* track use % of allocated defrag buffers */
uint8_t in_use_defrag_buffers[UINT8_MAX];
uint8_t in_use_defrag_buffers_samples;
uint8_t in_use_defrag_buffers_index;
char circular_buffer_defrag[KNET_CBUFFER_SIZE];
/* link stuff */
struct knet_link link[KNET_MAX_LINK];
uint8_t active_link_entries;
uint8_t active_links[KNET_MAX_LINK];
struct knet_host *next;
};
struct knet_sock {
int sockfd[2]; /* sockfd[0] will always be application facing
* and sockfd[1] internal if sockpair has been created by knet */
int is_socket; /* check if it's a socket for recvmmsg usage */
int is_created; /* knet created this socket and has to clean up on exit/del */
int in_use; /* set to 1 if it's use, 0 if free */
int has_error; /* set to 1 if there were errors reading from the sock
* and socket has been removed from epoll */
};
struct knet_fd_trackers {
uint8_t transport; /* transport type (UDP/SCTP...) */
uint8_t data_type; /* internal use for transport to define what data are associated
* with this fd */
socklen_t sockaddr_len; /* Size of sockaddr_in[6] structure for this socket */
void *data; /* pointer to the data */
+ int ifindex; /* interface index for this bound address */
};
#define KNET_MAX_FDS KNET_MAX_HOST * KNET_MAX_LINK * 4
#define KNET_MAX_COMPRESS_METHODS UINT8_MAX
#define KNET_MAX_CRYPTO_INSTANCES 2
struct knet_handle_stats_extra {
uint64_t tx_crypt_pmtu_packets;
uint64_t tx_crypt_pmtu_reply_packets;
uint64_t tx_crypt_ping_packets;
uint64_t tx_crypt_pong_packets;
};
#define KNET_RX_ODD_PACKETS_THRESHOLD 20
#define KNET_USAGE_SAMPLES_DEFAULT UINT8_MAX
#define KNET_USAGE_SAMPLES_TIMESPAN_DEFAULT 10 /* seconds */
struct knet_handle {
knet_node_id_t host_id;
unsigned int enabled:1;
struct knet_sock sockfd[KNET_DATAFD_MAX + 1];
int logfd;
uint8_t log_levels[KNET_MAX_SUBSYSTEMS];
int dstsockfd[2];
int send_to_links_epollfd;
int recv_from_links_epollfd;
int dst_link_handler_epollfd;
uint8_t use_access_lists; /* set to 0 for disable, 1 for enable */
unsigned int pmtud_interval;
unsigned int manual_mtu;
unsigned int data_mtu; /* contains the max data size that we can send onwire
* without frags */
struct knet_host *host_head;
struct knet_host *host_index[KNET_MAX_HOST];
knet_transport_t transports[KNET_MAX_TRANSPORTS+1];
struct knet_fd_trackers knet_transport_fd_tracker[KNET_MAX_FDS]; /* track status for each fd handled by transports */
struct knet_handle_stats stats;
struct knet_handle_stats_extra stats_extra;
pthread_mutex_t handle_stats_mutex; /* used to protect handle stats */
uint32_t reconnect_int;
knet_node_id_t host_ids[KNET_MAX_HOST];
size_t host_ids_entries;
struct knet_header *recv_from_sock_buf;
struct knet_header *send_to_links_buf[PCKT_FRAG_MAX];
struct knet_header *recv_from_links_buf[PCKT_RX_BUFS];
struct knet_header *pingbuf;
struct knet_header *pmtudbuf;
uint8_t threads_status[KNET_THREAD_MAX];
uint8_t threads_flush_queue[KNET_THREAD_MAX];
useconds_t threads_timer_res;
pthread_mutex_t threads_status_mutex;
pthread_t send_to_links_thread;
pthread_t recv_from_links_thread;
pthread_t heartbt_thread;
pthread_t dst_link_handler_thread;
pthread_t pmtud_link_handler_thread;
pthread_rwlock_t global_rwlock; /* global config lock */
pthread_mutex_t pmtud_mutex; /* pmtud mutex to handle conditional send/recv + timeout */
pthread_cond_t pmtud_cond; /* conditional for above */
pthread_mutex_t tx_mutex; /* used to protect knet_send_sync and TX thread */
pthread_mutex_t hb_mutex; /* used to protect heartbeat thread and seq_num broadcasting */
pthread_mutex_t backoff_mutex; /* used to protect dst_link->pong_timeout_adj */
pthread_mutex_t kmtu_mutex; /* used to protect kernel_mtu */
pthread_mutex_t onwire_mutex; /* used to protect onwire version */
uint8_t onwire_ver; /* currently agreed onwire version across known nodes */
uint8_t onwire_min_ver; /* min and max are constant and don´t need any mutex protection. */
uint8_t onwire_max_ver; /* we define them as part of internal handle so that we can mingle with them for testing purposes */
uint8_t onwire_force_ver; /* manually configure onwire_ver */
uint8_t onwire_ver_remap; /* when this is on, all mapping will use version 1 for now */
uint8_t rx_odd_packets; /* used to warn if too many weird packets are being received */
uint32_t kernel_mtu; /* contains the MTU detected by the kernel on a given link */
int pmtud_waiting;
int pmtud_running;
int pmtud_forcerun;
int pmtud_abort;
struct crypto_instance *crypto_instance[KNET_MAX_CRYPTO_INSTANCES + 1]; /* store an extra pointer to allow 0|1|2 values without too much magic in the code */
uint8_t crypto_in_use_config; /* crypto config to use for TX */
uint8_t crypto_only; /* allow only crypto (1) or also clear (0) traffic */
size_t sec_block_size;
size_t sec_hash_size;
size_t sec_salt_size;
unsigned char *send_to_links_buf_crypt[PCKT_FRAG_MAX];
unsigned char *recv_from_links_buf_crypt;
unsigned char *recv_from_links_buf_decrypt;
unsigned char *pingbuf_crypt;
unsigned char *pmtudbuf_crypt;
int compress_model;
int compress_level;
size_t compress_threshold;
void *compress_int_data[KNET_MAX_COMPRESS_METHODS]; /* for compress method private data */
unsigned char *recv_from_links_buf_decompress;
unsigned char *send_to_links_buf_compress;
seq_num_t tx_seq_num;
pthread_mutex_t tx_seq_num_mutex;
uint16_t defrag_bufs_min;
uint16_t defrag_bufs_max;
uint8_t defrag_bufs_shrink_threshold;
uint8_t defrag_bufs_usage_samples;
uint8_t defrag_bufs_usage_samples_timespan;
defrag_bufs_reclaim_policy_t defrag_bufs_reclaim_policy;
struct timespec defrag_bufs_last_run;
uint8_t has_loop_link;
uint8_t loop_link;
void *dst_host_filter_fn_private_data;
int (*dst_host_filter_fn) (
void *private_data,
const unsigned char *outdata,
ssize_t outdata_len,
uint8_t tx_rx,
knet_node_id_t this_host_id,
knet_node_id_t src_node_id,
int8_t *channel,
knet_node_id_t *dst_host_ids,
size_t *dst_host_ids_entries);
void *pmtud_notify_fn_private_data;
void (*pmtud_notify_fn) (
void *private_data,
unsigned int data_mtu);
void *host_status_change_notify_fn_private_data;
void (*host_status_change_notify_fn) (
void *private_data,
knet_node_id_t host_id,
uint8_t reachable,
uint8_t remote,
uint8_t external);
void *link_status_change_notify_fn_private_data;
void (*link_status_change_notify_fn) (
void *private_data,
knet_node_id_t host_id,
uint8_t link_id,
uint8_t connected,
uint8_t remote,
uint8_t external);
void *sock_notify_fn_private_data;
void (*sock_notify_fn) (
void *private_data,
int datafd,
int8_t channel,
uint8_t tx_rx,
int error,
int errorno);
void *onwire_ver_notify_fn_private_data;
void (*onwire_ver_notify_fn) (
void *private_data,
uint8_t onwire_min_ver,
uint8_t onwire_max_ver,
uint8_t onwire_ver);
int fini_in_progress;
uint64_t flags;
struct qb_list_head list;
const char *plugin_path;
};
struct handle_tracker {
struct qb_list_head head;
};
/*
* lib_config stuff shared across everything
*/
extern pthread_rwlock_t shlib_rwlock; /* global shared lib load lock */
extern pthread_mutex_t handle_config_mutex;
extern struct handle_tracker handle_list;
extern uint8_t handle_list_init;
int _is_valid_handle(knet_handle_t knet_h);
int _init_shlib_tracker(knet_handle_t knet_h);
void _fini_shlib_tracker(void);
/*
* NOTE: every single operation must be implementend
* for every protocol.
*/
/*
* for now knet supports only IP protocols (udp/sctp)
* in future there might be others like ARP
* or TIPC.
* keep this around as transport information
* to use for access lists and other operations
*/
#define TRANSPORT_PROTO_LOOPBACK 0
#define TRANSPORT_PROTO_IP_PROTO 1
/*
* some transports like SCTP can filter incoming
* connections before knet has to process
* any packets.
* GENERIC_ACL -> packet has to be read and filterted
* PROTO_ACL -> transport provides filtering at lower levels
* and packet does not need to be processed
*/
typedef enum {
USE_NO_ACL,
USE_GENERIC_ACL,
USE_PROTO_ACL
} transport_acl;
/*
* make it easier to map values in transports.c
*/
#define TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED 0
#define TRANSPORT_PROTO_IS_CONNECTION_ORIENTED 1
/*
* Returns from transport_tx_sock_error
*/
typedef enum {
KNET_TRANSPORT_RX_ERROR = -1,
KNET_TRANSPORT_SOCK_ERROR_INTERNAL = -1,
KNET_TRANSPORT_SOCK_ERROR_IGNORE = 0,
KNET_TRANSPORT_SOCK_ERROR_RETRY = 1,
} transport_sock_error_t;
/*
* Returns from transport_rx_is_data
*/
typedef enum {
KNET_TRANSPORT_RX_ISDATA_ERROR = -1,
KNET_TRANSPORT_RX_NOT_DATA_CONTINUE = 0,
KNET_TRANSPORT_RX_NOT_DATA_STOP = 1,
KNET_TRANSPORT_RX_IS_DATA = 2,
/* These two are only really used by SCTP */
KNET_TRANSPORT_RX_OOB_DATA_CONTINUE = 3,
KNET_TRANSPORT_RX_OOB_DATA_STOP = 4
} transport_rx_isdata_t;
/*
* All functions that return int return 0 for success, and -1 for failure.
*/
typedef struct knet_transport_ops {
/*
* transport generic information
*/
const char *transport_name;
const uint8_t transport_id;
const uint8_t built_in;
uint8_t transport_protocol;
transport_acl transport_acl_type;
/*
* connection oriented protocols like SCTP
* don´t need dst_addr in sendto calls and
* on some OSes are considered EINVAL.
*/
uint8_t transport_is_connection_oriented;
uint32_t transport_mtu_overhead;
/*
* transport init must allocate the new transport
* and perform all internal initializations
* (threads, lists, etc).
*/
int (*transport_init)(knet_handle_t knet_h);
/*
* transport free must releases _all_ resources
* allocated by tranport_init
*/
int (*transport_free)(knet_handle_t knet_h);
/*
* link operations should take care of all the
* sockets and epoll management for a given link/transport set
* transport_link_disable should return -1 and errno = EBUSY
* if listener is still in use, and any other errno in case
* the link cannot be disabled.
*
* set_config/clear_config are invoked in global write lock context
*/
int (*transport_link_set_config)(knet_handle_t knet_h, struct knet_link *link);
int (*transport_link_clear_config)(knet_handle_t knet_h, struct knet_link *link);
/*
* transport callback for incoming dynamic connections
* this is called in global read lock context
*/
int (*transport_link_dyn_connect)(knet_handle_t knet_h, int sockfd, struct knet_link *link);
/*
* per transport error handling of recvmmsg
* (see _handle_recv_from_links comments for details)
*/
/*
* transport_rx_sock_error is invoked when recvmmsg returns <= 0
*
* transport_rx_sock_error is invoked with both global_rdlock
*/
int (*transport_rx_sock_error)(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno);
/*
* transport_tx_sock_error is invoked with global_rwlock and
* it's invoked when sendto or sendmmsg returns =< 0
*
* it should return a transport_sock_error_t
* any sleep or wait action should happen inside the transport code
*/
transport_sock_error_t (*transport_tx_sock_error)(knet_handle_t knet_h, int sockfd, int subsys, int recv_err, int recv_errno);
/*
* this function is called on _every_ received packet
* to verify if the packet is data or internal protocol error handling
*
* it should return a transport_tx_isdata_t
*
* transport_rx_is_data is invoked with both global_rwlock
* and fd_tracker read lock (from RX thread)
*/
transport_rx_isdata_t (*transport_rx_is_data)(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg);
/*
* this function is called by links.c when a link down event is recorded
* to notify the transport that packets are not going through, and give
* transport the opportunity to take actions.
*/
int (*transport_link_is_down)(knet_handle_t knet_h, struct knet_link *link);
} knet_transport_ops_t;
struct pretty_names {
const char *name;
uint8_t val;
};
#endif
diff --git a/libknet/links.c b/libknet/links.c
index 92e0930d..711d9de6 100644
--- a/libknet/links.c
+++ b/libknet/links.c
@@ -1,1587 +1,1612 @@
/*
* Copyright (C) 2012-2024 Red Hat, Inc. All rights reserved.
*
* Authors: Fabio M. Di Nitto <fabbione@kronosnet.org>
* Federico Simoncelli <fsimon@kronosnet.org>
*
* This software licensed under LGPL-2.0+
*/
#include "config.h"
#include <errno.h>
#include <netdb.h>
#include <string.h>
#include <pthread.h>
#include "netutils.h"
#include "internals.h"
#include "logging.h"
#include "links.h"
#include "transports.h"
#include "host.h"
#include "threads_common.h"
#include "links_acl.h"
+#ifdef IP_PKTINFO
+#include <ifaddrs.h>
+#include <net/if.h>
+#endif
+
+static int find_ifindex(struct sockaddr_storage *addr)
+{
+#ifdef IP_PKTINFO
+ struct ifaddrs *ifrs, *ifa;
+
+ if (getifaddrs(&ifrs) == 0) {
+ for (ifa = ifrs; ifa != NULL; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr && cmpaddr(addr, (struct sockaddr_storage *)ifa->ifa_addr) == 0) {
+ int ifindex = if_nametoindex(ifa->ifa_name);
+ freeifaddrs(ifrs);
+ return ifindex;
+ }
+ }
+ freeifaddrs(ifrs);
+ }
+#endif
+ return -1;
+}
+
int _link_updown(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
unsigned int enabled, unsigned int connected, unsigned int lock_stats)
{
struct knet_host *host = knet_h->host_index[host_id];
struct knet_link *link = &host->link[link_id];
int notify_status = link->status.connected;
int savederrno = 0;
if ((link->status.enabled == enabled) &&
(link->status.connected == connected))
return 0;
if ((link->status.enabled) &&
(knet_h->link_status_change_notify_fn)) {
if (link->status.connected != connected) {
notify_status = connected; /* connection state */
}
if (!enabled) {
notify_status = 0; /* disable == disconnected */
}
knet_h->link_status_change_notify_fn(
knet_h->link_status_change_notify_fn_private_data,
host_id,
link_id,
notify_status,
host->status.remote,
host->status.external);
}
link->status.enabled = enabled;
link->status.connected = connected;
_host_dstcache_update_async(knet_h, host);
if ((link->status.dynconnected) &&
(!link->status.connected)) {
link->status.dynconnected = 0;
}
if (!connected) {
transport_link_is_down(knet_h, link);
} else {
/* Reset MTU in case new link can't use full line MTU */
log_info(knet_h, KNET_SUB_LINK, "Resetting MTU for link %u because host %u joined", link_id, host_id);
force_pmtud_run(knet_h, KNET_SUB_LINK, 1, 1);
}
if (lock_stats) {
savederrno = pthread_mutex_lock(&link->link_stats_mutex);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get stats mutex lock for host %u link %u: %s",
host_id, link_id, strerror(savederrno));
errno = savederrno;
return -1;
}
}
if (connected) {
time(&link->status.stats.last_up_times[link->status.stats.last_up_time_index]);
link->status.stats.up_count++;
if (++link->status.stats.last_up_time_index >= MAX_LINK_EVENTS) {
link->status.stats.last_up_time_index = 0;
}
+ knet_h->knet_transport_fd_tracker[link->outsock].ifindex = find_ifindex(&link->src_addr);
} else {
time(&link->status.stats.last_down_times[link->status.stats.last_down_time_index]);
link->status.stats.down_count++;
if (++link->status.stats.last_down_time_index >= MAX_LINK_EVENTS) {
link->status.stats.last_down_time_index = 0;
}
}
if (lock_stats) {
pthread_mutex_unlock(&link->link_stats_mutex);
}
return 0;
}
void _link_clear_stats(knet_handle_t knet_h)
{
struct knet_host *host;
struct knet_link *link;
uint32_t host_id;
uint8_t link_id;
for (host_id = 0; host_id < KNET_MAX_HOST; host_id++) {
host = knet_h->host_index[host_id];
if (!host) {
continue;
}
for (link_id = 0; link_id < KNET_MAX_LINK; link_id++) {
link = &host->link[link_id];
memset(&link->status.stats, 0, sizeof(struct knet_link_stats));
}
}
}
int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
uint8_t transport,
struct sockaddr_storage *src_addr,
struct sockaddr_storage *dst_addr,
uint64_t flags)
{
int savederrno = 0, err = 0, i, wipelink = 0, link_idx;
struct knet_host *host, *tmp_host;
struct knet_link *link = NULL;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (!src_addr) {
errno = EINVAL;
return -1;
}
if (dst_addr && (src_addr->ss_family != dst_addr->ss_family)) {
log_err(knet_h, KNET_SUB_LINK, "Source address family does not match destination address family");
errno = EINVAL;
return -1;
}
if (transport >= KNET_MAX_TRANSPORTS) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
if (transport == KNET_TRANSPORT_LOOPBACK && knet_h->host_id != host_id) {
log_err(knet_h, KNET_SUB_LINK, "Cannot create loopback link to remote node");
err = -1;
savederrno = EINVAL;
goto exit_unlock;
}
if (knet_h->host_id == host_id && knet_h->has_loop_link) {
log_err(knet_h, KNET_SUB_LINK, "Cannot create more than 1 link when loopback is active");
err = -1;
savederrno = EINVAL;
goto exit_unlock;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
if (transport == KNET_TRANSPORT_LOOPBACK && knet_h->host_id == host_id) {
for (i=0; i<KNET_MAX_LINK; i++) {
if (host->link[i].configured) {
log_err(knet_h, KNET_SUB_LINK, "Cannot add loopback link when other links are already configured.");
err = -1;
savederrno = EINVAL;
goto exit_unlock;
}
}
}
link = &host->link[link_id];
if (link->configured != 0) {
err =-1;
savederrno = EBUSY;
log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
if (link->status.enabled != 0) {
err =-1;
savederrno = EBUSY;
log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently in use: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
/*
* errors happening after this point should trigger
* a memset of the link
*/
wipelink = 1;
copy_sockaddr(&link->src_addr, src_addr);
err = knet_addrtostr(src_addr, sizeof(struct sockaddr_storage),
link->status.src_ipaddr, KNET_MAX_HOST_LEN,
link->status.src_port, KNET_MAX_PORT_LEN);
if (err) {
if (err == EAI_SYSTEM) {
savederrno = errno;
log_warn(knet_h, KNET_SUB_LINK,
"Unable to resolve host: %u link: %u source addr/port: %s",
host_id, link_id, strerror(savederrno));
} else {
savederrno = EINVAL;
log_warn(knet_h, KNET_SUB_LINK,
"Unable to resolve host: %u link: %u source addr/port: %s",
host_id, link_id, gai_strerror(err));
}
err = -1;
goto exit_unlock;
}
if (!dst_addr) {
link->dynamic = KNET_LINK_DYNIP;
} else {
link->dynamic = KNET_LINK_STATIC;
copy_sockaddr(&link->dst_addr, dst_addr);
err = knet_addrtostr(dst_addr, sizeof(struct sockaddr_storage),
link->status.dst_ipaddr, KNET_MAX_HOST_LEN,
link->status.dst_port, KNET_MAX_PORT_LEN);
if (err) {
if (err == EAI_SYSTEM) {
savederrno = errno;
log_warn(knet_h, KNET_SUB_LINK,
"Unable to resolve host: %u link: %u destination addr/port: %s",
host_id, link_id, strerror(savederrno));
} else {
savederrno = EINVAL;
log_warn(knet_h, KNET_SUB_LINK,
"Unable to resolve host: %u link: %u destination addr/port: %s",
host_id, link_id, gai_strerror(err));
}
err = -1;
goto exit_unlock;
}
}
link->pmtud_crypto_timeout_multiplier = KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN;
link->pong_count = KNET_LINK_DEFAULT_PONG_COUNT;
link->has_valid_mtu = 0;
link->ping_interval = KNET_LINK_DEFAULT_PING_INTERVAL * 1000; /* microseconds */
link->pong_timeout = KNET_LINK_DEFAULT_PING_TIMEOUT * 1000; /* microseconds */
link->pong_timeout_backoff = KNET_LINK_PONG_TIMEOUT_BACKOFF;
link->pong_timeout_adj = link->pong_timeout * link->pong_timeout_backoff; /* microseconds */
link->latency_max_samples = KNET_LINK_DEFAULT_PING_PRECISION;
link->status.stats.latency_samples = 0;
link->flags = flags;
/*
* check for DYNIP vs STATIC collisions.
* example: link0 is static, user attempts to configure link1 as dynamic with the same source
* address/port.
* This configuration is invalid and would cause ACL collisions.
*/
for (tmp_host = knet_h->host_head; tmp_host != NULL; tmp_host = tmp_host->next) {
for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) {
if (&tmp_host->link[link_idx] == link)
continue;
if ((!memcmp(&tmp_host->link[link_idx].src_addr, &link->src_addr, sizeof(struct sockaddr_storage))) &&
(tmp_host->link[link_idx].dynamic != link->dynamic)) {
savederrno = EINVAL;
err = -1;
log_err(knet_h, KNET_SUB_LINK, "Failed to configure host %u link %u dyn %u. Conflicts with host %u link %u dyn %u: %s",
host_id, link_id, link->dynamic, tmp_host->host_id, link_idx, tmp_host->link[link_idx].dynamic, strerror(savederrno));
goto exit_unlock;
}
}
}
savederrno = pthread_mutex_init(&link->link_stats_mutex, NULL);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to initialize link stats mutex: %s", strerror(savederrno));
err = -1;
goto exit_unlock;
}
if (transport_link_set_config(knet_h, link, transport) < 0) {
savederrno = errno;
err = -1;
goto exit_transport_err;
}
/*
* we can only configure default access lists if we know both endpoints
* and the protocol uses GENERIC_ACL, otherwise the protocol has
* to setup their own access lists above in transport_link_set_config.
*/
if ((transport_get_acl_type(knet_h, transport) == USE_GENERIC_ACL) &&
(link->dynamic == KNET_LINK_STATIC)) {
log_debug(knet_h, KNET_SUB_LINK, "Configuring default access lists for host: %u link: %u socket: %d",
host_id, link_id, link->outsock);
if ((check_add(knet_h, link, -1,
&link->dst_addr, &link->dst_addr,
CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) {
log_warn(knet_h, KNET_SUB_LINK, "Failed to configure default access lists for host: %u link: %u", host_id, link_id);
savederrno = errno;
err = -1;
goto exit_acl_error;
}
}
/*
* no errors should happen after link is configured
*/
link->configured = 1;
log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u is configured",
host_id, link_id);
if (transport == KNET_TRANSPORT_LOOPBACK) {
knet_h->has_loop_link = 1;
knet_h->loop_link = link_id;
host->status.reachable = 1;
link->status.mtu = KNET_PMTUD_SIZE_V6;
} else {
/*
* calculate the minimum MTU that is safe to use,
* based on RFCs and that each network device should
* be able to support without any troubles
*/
if (link->dynamic == KNET_LINK_STATIC) {
/*
* with static link we can be more precise than using
* the generic calc_min_mtu()
*/
switch (link->dst_addr.ss_family) {
case AF_INET6:
link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V6 - (KNET_PMTUD_OVERHEAD_V6 + link->proto_overhead));
break;
case AF_INET:
link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V4 + link->proto_overhead));
break;
}
} else {
/*
* for dynamic links we start with the minimum MTU
* possible and PMTUd will kick in immediately
* after connection status is 1
*/
link->status.mtu = calc_min_mtu(knet_h);
}
link->has_valid_mtu = 1;
}
exit_acl_error:
/*
* if creating access lists has error, we only need to clean
* the transport and the stuff below.
*/
if (err < 0) {
if ((transport_link_clear_config(knet_h, link) < 0) &&
(errno != EBUSY)) {
log_warn(knet_h, KNET_SUB_LINK, "Failed to deconfigure transport for host %u link %u: %s", host_id, link_id, strerror(errno));
}
}
exit_transport_err:
/*
* if transport has errors, transport will clean after itself
* and we only need to clean the mutex
*/
if (err < 0) {
pthread_mutex_destroy(&link->link_stats_mutex);
}
exit_unlock:
/*
* re-init the link on error
*/
if ((err < 0) && (wipelink)) {
memset(link, 0, sizeof(struct knet_link));
link->link_id = link_id;
}
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_get_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
uint8_t *transport,
struct sockaddr_storage *src_addr,
struct sockaddr_storage *dst_addr,
uint8_t *dynamic,
uint64_t *flags)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (!src_addr) {
errno = EINVAL;
return -1;
}
if (!dynamic) {
errno = EINVAL;
return -1;
}
if (!transport) {
errno = EINVAL;
return -1;
}
if (!flags) {
errno = EINVAL;
return -1;
}
savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
if ((link->dynamic == KNET_LINK_STATIC) && (!dst_addr)) {
savederrno = EINVAL;
err = -1;
goto exit_unlock;
}
memmove(src_addr, &link->src_addr, sockaddr_len(&link->src_addr));
*transport = link->transport;
*flags = link->flags;
if (link->dynamic == KNET_LINK_STATIC) {
*dynamic = 0;
memmove(dst_addr, &link->dst_addr, sockaddr_len(&link->dst_addr));
} else {
*dynamic = 1;
}
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_clear_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
int sock;
uint8_t transport;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (link->configured != 1) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
if (link->status.enabled != 0) {
err = -1;
savederrno = EBUSY;
log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently in use: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
/*
* remove well known access lists here.
* After the transport has done clearing the config,
* then we can remove any leftover access lists if the link
* is no longer in use.
*/
if ((transport_get_acl_type(knet_h, link->transport) == USE_GENERIC_ACL) &&
(link->dynamic == KNET_LINK_STATIC)) {
if ((check_rm(knet_h, link,
&link->dst_addr, &link->dst_addr,
CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != ENOENT)) {
err = -1;
savederrno = errno;
log_err(knet_h, KNET_SUB_LINK, "Host %u link %u: unable to remove default access list",
host_id, link_id);
goto exit_unlock;
}
}
/*
* cache it for later as we don't know if the transport
* will clear link info during clear_config.
*/
sock = link->outsock;
transport = link->transport;
if ((transport_link_clear_config(knet_h, link) < 0) &&
(errno != EBUSY)) {
savederrno = errno;
err = -1;
goto exit_unlock;
}
/*
* remove any other access lists when the socket is no
* longer in use by the transport.
*/
if ((transport_get_acl_type(knet_h, transport) == USE_GENERIC_ACL) &&
(knet_h->knet_transport_fd_tracker[sock].transport == KNET_MAX_TRANSPORTS)) {
check_rmall(knet_h, link);
}
pthread_mutex_destroy(&link->link_stats_mutex);
memset(link, 0, sizeof(struct knet_link));
link->link_id = link_id;
if (knet_h->has_loop_link && host_id == knet_h->host_id && link_id == knet_h->loop_link) {
knet_h->has_loop_link = 0;
if (host->active_link_entries == 0) {
host->status.reachable = 0;
}
}
log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u config has been wiped",
host_id, link_id);
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_set_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
unsigned int enabled)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (enabled > 1) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
if (link->status.enabled == enabled) {
err = 0;
goto exit_unlock;
}
err = _link_updown(knet_h, host_id, link_id, enabled, link->status.connected, 0);
savederrno = errno;
if (enabled) {
goto exit_unlock;
}
log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u is disabled",
host_id, link_id);
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_get_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
unsigned int *enabled)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (!enabled) {
errno = EINVAL;
return -1;
}
savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
*enabled = link->status.enabled;
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_set_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
uint8_t pong_count)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (pong_count < 1) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
link->pong_count = pong_count;
log_debug(knet_h, KNET_SUB_LINK,
"host: %u link: %u pong count update: %u",
host_id, link_id, link->pong_count);
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_get_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
uint8_t *pong_count)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (!pong_count) {
errno = EINVAL;
return -1;
}
savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
*pong_count = link->pong_count;
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_set_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
time_t interval, time_t timeout, unsigned int precision)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (!interval) {
errno = EINVAL;
return -1;
}
if (!timeout) {
errno = ENOSYS;
return -1;
}
if (!precision) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
if ((useconds_t)(interval * 1000) < knet_h->threads_timer_res) {
log_warn(knet_h, KNET_SUB_LINK,
"host: %u link: %u interval: %lu too small (%s). interval lower than thread_timer_res (%u ms) has no effect",
host_id, link_id, interval, strerror(savederrno), (knet_h->threads_timer_res / 1000));
}
if ((useconds_t)(timeout * 1000) < knet_h->threads_timer_res) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK,
"host: %u link: %u pong timeout: %lu too small (%s). timeout cannot be less than thread_timer_res (%u ms)",
host_id, link_id, timeout, strerror(savederrno), (knet_h->threads_timer_res / 1000));
goto exit_unlock;
}
link->ping_interval = interval * 1000; /* microseconds */
link->pong_timeout = timeout * 1000; /* microseconds */
link->latency_max_samples = precision;
log_debug(knet_h, KNET_SUB_LINK,
"host: %u link: %u timeout update - interval: %llu timeout: %llu precision: %u",
host_id, link_id, link->ping_interval, link->pong_timeout, precision);
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_get_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
time_t *interval, time_t *timeout, unsigned int *precision)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (!interval) {
errno = EINVAL;
return -1;
}
if (!timeout) {
errno = EINVAL;
return -1;
}
if (!precision) {
errno = EINVAL;
return -1;
}
savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
*interval = link->ping_interval / 1000; /* microseconds */
*timeout = link->pong_timeout / 1000;
*precision = link->latency_max_samples;
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_set_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
uint8_t priority)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
uint8_t old_priority;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
old_priority = link->priority;
if (link->priority == priority) {
err = 0;
goto exit_unlock;
}
link->priority = priority;
if (_host_dstcache_update_sync(knet_h, host)) {
savederrno = errno;
log_debug(knet_h, KNET_SUB_LINK,
"Unable to update link priority (host: %u link: %u priority: %u): %s",
host_id, link_id, link->priority, strerror(savederrno));
link->priority = old_priority;
err = -1;
goto exit_unlock;
}
log_debug(knet_h, KNET_SUB_LINK,
"host: %u link: %u priority set to: %u",
host_id, link_id, link->priority);
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_get_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
uint8_t *priority)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (!priority) {
errno = EINVAL;
return -1;
}
savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
*priority = link->priority;
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_get_link_list(knet_handle_t knet_h, knet_node_id_t host_id,
uint8_t *link_ids, size_t *link_ids_entries)
{
int savederrno = 0, err = 0, i, count = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (!link_ids) {
errno = EINVAL;
return -1;
}
if (!link_ids_entries) {
errno = EINVAL;
return -1;
}
savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
for (i = 0; i < KNET_MAX_LINK; i++) {
link = &host->link[i];
if (!link->configured) {
continue;
}
link_ids[count] = i;
count++;
}
*link_ids_entries = count;
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_get_status(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
struct knet_link_status *status, size_t struct_size)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
if (!status) {
errno = EINVAL;
return -1;
}
savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
savederrno = pthread_mutex_lock(&link->link_stats_mutex);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get stats mutex lock for host %u link %u: %s",
host_id, link_id, strerror(savederrno));
err = -1;
goto exit_unlock;
}
memmove(status, &link->status, struct_size);
pthread_mutex_unlock(&link->link_stats_mutex);
/* Calculate totals - no point in doing this on-the-fly */
status->stats.rx_total_packets =
status->stats.rx_data_packets +
status->stats.rx_ping_packets +
status->stats.rx_pong_packets +
status->stats.rx_pmtu_packets;
status->stats.tx_total_packets =
status->stats.tx_data_packets +
status->stats.tx_ping_packets +
status->stats.tx_pong_packets +
status->stats.tx_pmtu_packets;
status->stats.rx_total_bytes =
status->stats.rx_data_bytes +
status->stats.rx_ping_bytes +
status->stats.rx_pong_bytes +
status->stats.rx_pmtu_bytes;
status->stats.tx_total_bytes =
status->stats.tx_data_bytes +
status->stats.tx_ping_bytes +
status->stats.tx_pong_bytes +
status->stats.tx_pmtu_bytes;
status->stats.tx_total_errors =
status->stats.tx_data_errors +
status->stats.tx_ping_errors +
status->stats.tx_pong_errors +
status->stats.tx_pmtu_errors;
status->stats.tx_total_retries =
status->stats.tx_data_retries +
status->stats.tx_ping_retries +
status->stats.tx_pong_retries +
status->stats.tx_pmtu_retries;
/* Tell the caller our full size in case they have an old version */
status->size = sizeof(struct knet_link_status);
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
int knet_link_enable_status_change_notify(knet_handle_t knet_h,
void *link_status_change_notify_fn_private_data,
void (*link_status_change_notify_fn) (
void *private_data,
knet_node_id_t host_id,
uint8_t link_id,
uint8_t connected,
uint8_t remote,
uint8_t external))
{
int savederrno = 0;
if (!_is_valid_handle(knet_h)) {
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
knet_h->link_status_change_notify_fn_private_data = link_status_change_notify_fn_private_data;
knet_h->link_status_change_notify_fn = link_status_change_notify_fn;
if (knet_h->link_status_change_notify_fn) {
log_debug(knet_h, KNET_SUB_LINK, "link_status_change_notify_fn enabled");
} else {
log_debug(knet_h, KNET_SUB_LINK, "link_status_change_notify_fn disabled");
}
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = 0;
return 0;
}
int knet_link_insert_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
int index,
struct sockaddr_storage *ss1,
struct sockaddr_storage *ss2,
check_type_t type, check_acceptreject_t acceptreject)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (!ss1) {
errno = EINVAL;
return -1;
}
if ((type != CHECK_TYPE_ADDRESS) &&
(type != CHECK_TYPE_MASK) &&
(type != CHECK_TYPE_RANGE)) {
errno = EINVAL;
return -1;
}
if ((acceptreject != CHECK_ACCEPT) &&
(acceptreject != CHECK_REJECT)) {
errno = EINVAL;
return -1;
}
if ((type != CHECK_TYPE_ADDRESS) && (!ss2)) {
errno = EINVAL;
return -1;
}
if ((type == CHECK_TYPE_RANGE) &&
(ss1->ss_family != ss2->ss_family)) {
errno = EINVAL;
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
if (link->dynamic != KNET_LINK_DYNIP) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
err = check_add(knet_h, link, index,
ss1, ss2, type, acceptreject);
savederrno = errno;
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = savederrno;
return err;
}
int knet_link_add_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
struct sockaddr_storage *ss1,
struct sockaddr_storage *ss2,
check_type_t type, check_acceptreject_t acceptreject)
{
return knet_link_insert_acl(knet_h, host_id, link_id, -1, ss1, ss2, type, acceptreject);
}
int knet_link_rm_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
struct sockaddr_storage *ss1,
struct sockaddr_storage *ss2,
check_type_t type, check_acceptreject_t acceptreject)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (!ss1) {
errno = EINVAL;
return -1;
}
if ((type != CHECK_TYPE_ADDRESS) &&
(type != CHECK_TYPE_MASK) &&
(type != CHECK_TYPE_RANGE)) {
errno = EINVAL;
return -1;
}
if ((acceptreject != CHECK_ACCEPT) &&
(acceptreject != CHECK_REJECT)) {
errno = EINVAL;
return -1;
}
if ((type != CHECK_TYPE_ADDRESS) && (!ss2)) {
errno = EINVAL;
return -1;
}
if ((type == CHECK_TYPE_RANGE) &&
(ss1->ss_family != ss2->ss_family)) {
errno = EINVAL;
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
if (link->dynamic != KNET_LINK_DYNIP) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
err = check_rm(knet_h, link,
ss1, ss2, type, acceptreject);
savederrno = errno;
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = savederrno;
return err;
}
int knet_link_clear_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id)
{
int savederrno = 0, err = 0;
struct knet_host *host;
struct knet_link *link;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (link_id >= KNET_MAX_LINK) {
errno = EINVAL;
return -1;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
host = knet_h->host_index[host_id];
if (!host) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s",
host_id, strerror(savederrno));
goto exit_unlock;
}
link = &host->link[link_id];
if (!link->configured) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
if (link->dynamic != KNET_LINK_DYNIP) {
err = -1;
savederrno = EINVAL;
log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s",
host_id, link_id, strerror(savederrno));
goto exit_unlock;
}
check_rmall(knet_h, link);
exit_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = savederrno;
return err;
}
diff --git a/libknet/threads_rx.c b/libknet/threads_rx.c
index 5cd2850a..c99e818f 100644
--- a/libknet/threads_rx.c
+++ b/libknet/threads_rx.c
@@ -1,1241 +1,1248 @@
/*
* Copyright (C) 2012-2024 Red Hat, Inc. All rights reserved.
*
* Authors: Fabio M. Di Nitto <fabbione@kronosnet.org>
* Federico Simoncelli <fsimon@kronosnet.org>
*
* This software licensed under LGPL-2.0+
*/
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/uio.h>
#include <pthread.h>
#include "compat.h"
#include "compress.h"
#include "crypto.h"
#include "host.h"
#include "links.h"
#include "links_acl.h"
#include "logging.h"
#include "transports.h"
#include "transport_common.h"
#include "threads_common.h"
#include "threads_heartbeat.h"
#include "threads_pmtud.h"
#include "threads_rx.h"
#include "netutils.h"
#include "onwire_v1.h"
/*
* RECV
*/
/*
* return 1 if a > b
* return -1 if b > a
* return 0 if they are equal
*/
static inline int _timecmp(struct timespec a, struct timespec b)
{
if (a.tv_sec != b.tv_sec) {
if (a.tv_sec > b.tv_sec) {
return 1;
} else {
return -1;
}
} else {
if (a.tv_nsec > b.tv_nsec) {
return 1;
} else if (a.tv_nsec < b.tv_nsec) {
return -1;
} else {
return 0;
}
}
}
/*
* calculate use % of defrag buffers per host
* and if % is <= knet_h->defrag_bufs_shrink_threshold for the last second, then half the size
*/
static void _shrink_defrag_buffers(knet_handle_t knet_h)
{
struct knet_host *host;
struct knet_host_defrag_buf *new_bufs = NULL;
struct timespec now;
unsigned long long time_diff; /* nanoseconds */
uint16_t i, x, in_use_bufs;
uint32_t sum;
/*
* first run.
*/
if ((knet_h->defrag_bufs_last_run.tv_sec == 0) &&
(knet_h->defrag_bufs_last_run.tv_nsec == 0)) {
clock_gettime(CLOCK_MONOTONIC, &knet_h->defrag_bufs_last_run);
return;
}
clock_gettime(CLOCK_MONOTONIC, &now);
timespec_diff(knet_h->defrag_bufs_last_run, now, &time_diff);
if (time_diff < (((unsigned long long)knet_h->defrag_bufs_usage_samples_timespan * 1000000000) / knet_h->defrag_bufs_usage_samples)) {
return;
}
/*
* record the last run
*/
memmove(&knet_h->defrag_bufs_last_run, &now, sizeof(struct timespec));
/*
* do the real work:
*/
for (host = knet_h->host_head; host != NULL; host = host->next) {
/*
* Update buffer usage stats. We do this for all nodes.
*/
in_use_bufs = 0;
for (i = 0; i < host->allocated_defrag_bufs; i++) {
if (host->defrag_bufs[i].in_use) {
in_use_bufs++;
}
}
/*
* record only %
*/
host->in_use_defrag_buffers[host->in_use_defrag_buffers_index] = (in_use_bufs * 100 / host->allocated_defrag_bufs);
host->in_use_defrag_buffers_index++;
/*
* make sure to stay within buffer
*/
if (host->in_use_defrag_buffers_index == knet_h->defrag_bufs_usage_samples) {
host->in_use_defrag_buffers_index = 0;
}
/*
* only allow shrinking if we have enough samples
*/
if (host->in_use_defrag_buffers_samples < knet_h->defrag_bufs_usage_samples) {
host->in_use_defrag_buffers_samples++;
continue;
}
/*
* only allow shrinking if in use bufs are <= knet_h->defrag_bufs_shrink_threshold%
*/
if (knet_h->defrag_bufs_reclaim_policy == RECLAIM_POLICY_AVERAGE) {
sum = 0;
for (i = 0; i < knet_h->defrag_bufs_usage_samples; i++) {
sum += host->in_use_defrag_buffers[i];
}
sum = sum / knet_h->defrag_bufs_usage_samples;
if (sum > knet_h->defrag_bufs_shrink_threshold) {
continue;
}
} else {
sum = 0;
for (i = 0; i < knet_h->defrag_bufs_usage_samples; i++) {
if (host->in_use_defrag_buffers[i] > knet_h->defrag_bufs_shrink_threshold) {
sum = 1;
}
}
if (sum) {
continue;
}
}
/*
* only allow shrinking if allocated bufs > min_defrag_bufs
*/
if (host->allocated_defrag_bufs == knet_h->defrag_bufs_min) {
continue;
}
/*
* compat all the in_use buffers at the beginning.
* we the checks above, we are 100% sure they fit
*/
x = 0;
for (i = 0; i < host->allocated_defrag_bufs; i++) {
if (host->defrag_bufs[i].in_use) {
memmove(&host->defrag_bufs[x], &host->defrag_bufs[i], sizeof(struct knet_host_defrag_buf));
x++;
}
}
/*
* memory allocation is not critical. it just means the system is under
* memory pressure and we will need to wait our turn to free memory... how odd :)
*/
new_bufs = realloc(host->defrag_bufs, sizeof(struct knet_host_defrag_buf) * (host->allocated_defrag_bufs / 2));
if (!new_bufs) {
log_err(knet_h, KNET_SUB_RX, "Unable to decrease defrag buffers for host %u: %s",
host->host_id, strerror(errno));
continue;
}
host->defrag_bufs = new_bufs;
host->allocated_defrag_bufs = host->allocated_defrag_bufs / 2;
/*
* clear buffer use stats. Old ones are no good for new one
*/
_clear_defrag_bufs_stats(host);
log_debug(knet_h, KNET_SUB_RX, "Defrag buffers for host %u decreased from %u to: %u",
host->host_id, host->allocated_defrag_bufs * 2, host->allocated_defrag_bufs);
}
}
/*
* check if we can double the defrag buffers.
*
* return 0 if we cannot reallocate
* return 1 if we have more buffers
*/
static int _realloc_defrag_buffers(knet_handle_t knet_h, struct knet_host *src_host)
{
struct knet_host_defrag_buf *new_bufs = NULL;
int i;
/*
* max_defrag_bufs is a power of 2
* allocated_defrag_bufs doubles on each iteration.
* Sooner or later (and hopefully never) allocated with be == to max.
*/
if (src_host->allocated_defrag_bufs < knet_h->defrag_bufs_max) {
new_bufs = realloc(src_host->defrag_bufs,
src_host->allocated_defrag_bufs * 2 * sizeof(struct knet_host_defrag_buf));
if (!new_bufs) {
log_err(knet_h, KNET_SUB_RX, "Unable to increase defrag buffers for host %u: %s",
src_host->host_id, strerror(errno));
return 0;
}
/*
* keep the math simple here between arrays, pointers and what not.
* Init each buffer individually.
*/
for (i = src_host->allocated_defrag_bufs; i < src_host->allocated_defrag_bufs * 2; i++) {
memset(&new_bufs[i], 0, sizeof(struct knet_host_defrag_buf));
}
src_host->allocated_defrag_bufs = src_host->allocated_defrag_bufs * 2;
src_host->defrag_bufs = new_bufs;
/*
* clear buffer use stats. Old ones are no good for new one
*/
_clear_defrag_bufs_stats(src_host);
log_debug(knet_h, KNET_SUB_RX, "Defrag buffers for host %u increased from %u to: %u",
src_host->host_id, src_host->allocated_defrag_bufs / 2, src_host->allocated_defrag_bufs);
return 1;
}
return 0;
}
/*
* this functions needs to return an index
* to a knet_host_defrag_buf. (-1 on errors)
*/
static int _find_pckt_defrag_buf(knet_handle_t knet_h, struct knet_host *src_host, seq_num_t seq_num)
{
int i, oldest;
uint16_t cur_allocated_defrag_bufs = src_host->allocated_defrag_bufs;
/*
* check if there is a buffer already in use handling the same seq_num
*/
for (i = 0; i < src_host->allocated_defrag_bufs; i++) {
if (src_host->defrag_bufs[i].in_use) {
if (src_host->defrag_bufs[i].pckt_seq == seq_num) {
return i;
}
}
}
/*
* If there is no buffer that's handling the current seq_num
* either it's new or it's been reclaimed already.
* check if it's been reclaimed/seen before using the defrag circular
* buffer. If the pckt has been seen before, the buffer expired (ETIME)
* and there is no point to try to defrag it again.
*/
if (!_seq_num_lookup(knet_h, src_host, seq_num, 1, 0)) {
errno = ETIME;
return -1;
}
/*
* register the pckt as seen
*/
_seq_num_set(src_host, seq_num, 1);
/*
* see if there is a free buffer
*/
for (i = 0; i < src_host->allocated_defrag_bufs; i++) {
if (!src_host->defrag_bufs[i].in_use) {
return i;
}
}
/*
* check if we can increase num of buffers
*/
if (_realloc_defrag_buffers(knet_h, src_host)) {
return cur_allocated_defrag_bufs + 1;
}
/*
* at this point, there are no free buffers, the pckt is new
* and we need to reclaim a buffer, and we will take the one
* with the oldest timestamp. It's as good as any.
*/
oldest = 0;
for (i = 0; i < src_host->allocated_defrag_bufs; i++) {
if (_timecmp(src_host->defrag_bufs[i].last_update, src_host->defrag_bufs[oldest].last_update) < 0) {
oldest = i;
}
}
src_host->defrag_bufs[oldest].in_use = 0;
return oldest;
}
static int _pckt_defrag(knet_handle_t knet_h, struct knet_host *src_host, seq_num_t seq_num, unsigned char *data, ssize_t *len, uint8_t frags, uint8_t frag_seq)
{
struct knet_host_defrag_buf *defrag_buf;
int defrag_buf_idx;
defrag_buf_idx = _find_pckt_defrag_buf(knet_h, src_host, seq_num);
if (defrag_buf_idx < 0) {
return 1;
}
defrag_buf = &src_host->defrag_bufs[defrag_buf_idx];
/*
* if the buf is not is use, then make sure it's clean
*/
if (!defrag_buf->in_use) {
memset(defrag_buf, 0, sizeof(struct knet_host_defrag_buf));
defrag_buf->in_use = 1;
defrag_buf->pckt_seq = seq_num;
}
/*
* update timestamp on the buffer
*/
clock_gettime(CLOCK_MONOTONIC, &defrag_buf->last_update);
/*
* check if we already received this fragment
*/
if (defrag_buf->frag_map[frag_seq]) {
/*
* if we have received this fragment and we didn't clear the buffer
* it means that we don't have all fragments yet
*/
return 1;
}
/*
* we need to handle the last packet with gloves due to its different size
*/
if (frag_seq == frags) {
defrag_buf->last_frag_size = *len;
/*
* in the event when the last packet arrives first,
* we still don't know the offset vs the other fragments (based on MTU),
* so we store the fragment at the end of the buffer where it's safe
* and take a copy of the len so that we can restore its offset later.
* remember we can't use the local MTU for this calculation because pMTU
* can be asymettric between the same hosts.
*/
if (!defrag_buf->frag_size) {
defrag_buf->last_first = 1;
memmove(defrag_buf->buf + (KNET_MAX_PACKET_SIZE - *len),
data,
*len);
}
} else {
defrag_buf->frag_size = *len;
}
if (defrag_buf->frag_size) {
memmove(defrag_buf->buf + ((frag_seq - 1) * defrag_buf->frag_size),
data, *len);
}
defrag_buf->frag_recv++;
defrag_buf->frag_map[frag_seq] = 1;
/*
* check if we received all the fragments
*/
if (defrag_buf->frag_recv == frags) {
/*
* special case the last pckt
*/
if (defrag_buf->last_first) {
memmove(defrag_buf->buf + ((frags - 1) * defrag_buf->frag_size),
defrag_buf->buf + (KNET_MAX_PACKET_SIZE - defrag_buf->last_frag_size),
defrag_buf->last_frag_size);
}
/*
* recalculate packet lenght
*/
*len = ((frags - 1) * defrag_buf->frag_size) + defrag_buf->last_frag_size;
/*
* copy the pckt back in the user data
*/
memmove(data, defrag_buf->buf, *len);
/*
* free this buffer
*/
defrag_buf->in_use = 0;
return 0;
}
return 1;
}
static int _handle_data_stats(knet_handle_t knet_h, struct knet_link *src_link, ssize_t len, uint64_t decrypt_time)
{
int stats_err;
/* data stats at the top for consistency with TX */
src_link->status.stats.rx_data_packets++;
src_link->status.stats.rx_data_bytes += len;
if (decrypt_time) {
stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex);
if (stats_err < 0) {
log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err));
return -1;
}
/* Only update the crypto overhead for data packets. Mainly to be
consistent with TX */
if (decrypt_time < knet_h->stats.rx_crypt_time_min) {
knet_h->stats.rx_crypt_time_min = decrypt_time;
}
if (decrypt_time > knet_h->stats.rx_crypt_time_max) {
knet_h->stats.rx_crypt_time_max = decrypt_time;
}
knet_h->stats.rx_crypt_time_ave =
(knet_h->stats.rx_crypt_time_ave * knet_h->stats.rx_crypt_packets +
decrypt_time) / (knet_h->stats.rx_crypt_packets+1);
knet_h->stats.rx_crypt_packets++;
pthread_mutex_unlock(&knet_h->handle_stats_mutex);
}
return 0;
}
static int _decompress_data(knet_handle_t knet_h, uint8_t decompress_type, unsigned char *data, ssize_t *len, ssize_t header_size)
{
int err = 0, stats_err = 0;
if (decompress_type) {
ssize_t decmp_outlen = KNET_DATABUFSIZE_COMPRESS;
struct timespec start_time;
struct timespec end_time;
uint64_t decompress_time;
clock_gettime(CLOCK_MONOTONIC, &start_time);
err = decompress(knet_h, decompress_type,
data,
*len - header_size,
knet_h->recv_from_links_buf_decompress,
&decmp_outlen);
clock_gettime(CLOCK_MONOTONIC, &end_time);
timespec_diff(start_time, end_time, &decompress_time);
stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex);
if (stats_err < 0) {
log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err));
return -1;
}
if (!err) {
/* Collect stats */
if (decompress_time < knet_h->stats.rx_compress_time_min) {
knet_h->stats.rx_compress_time_min = decompress_time;
}
if (decompress_time > knet_h->stats.rx_compress_time_max) {
knet_h->stats.rx_compress_time_max = decompress_time;
}
knet_h->stats.rx_compress_time_ave =
(knet_h->stats.rx_compress_time_ave * knet_h->stats.rx_compressed_packets +
decompress_time) / (knet_h->stats.rx_compressed_packets+1);
knet_h->stats.rx_compressed_packets++;
knet_h->stats.rx_compressed_original_bytes += decmp_outlen;
knet_h->stats.rx_compressed_size_bytes += *len - KNET_HEADER_SIZE;
memmove(data, knet_h->recv_from_links_buf_decompress, decmp_outlen);
*len = decmp_outlen + header_size;
} else {
knet_h->stats.rx_failed_to_decompress++;
pthread_mutex_unlock(&knet_h->handle_stats_mutex);
log_err(knet_h, KNET_SUB_COMPRESS, "Unable to decompress packet (%d): %s",
err, strerror(errno));
return -1;
}
pthread_mutex_unlock(&knet_h->handle_stats_mutex);
}
return 0;
}
static int _check_destination(knet_handle_t knet_h, struct knet_header *inbuf, unsigned char *data, ssize_t len, ssize_t header_size, int8_t *channel)
{
knet_node_id_t dst_host_ids[KNET_MAX_HOST];
size_t dst_host_ids_entries = 0;
int bcast = 1;
size_t host_idx;
int found = 0;
if (knet_h->dst_host_filter_fn) {
bcast = knet_h->dst_host_filter_fn(
knet_h->dst_host_filter_fn_private_data,
data,
len - header_size,
KNET_NOTIFY_RX,
knet_h->host_id,
inbuf->kh_node,
channel,
dst_host_ids,
&dst_host_ids_entries);
if (bcast < 0) {
log_debug(knet_h, KNET_SUB_RX, "Error from dst_host_filter_fn: %d", bcast);
return -1;
}
if ((!bcast) && (!dst_host_ids_entries)) {
log_debug(knet_h, KNET_SUB_RX, "Message is unicast but no dst_host_ids_entries");
return -1;
}
/* check if we are dst for this packet */
if (!bcast) {
if (dst_host_ids_entries > KNET_MAX_HOST) {
log_debug(knet_h, KNET_SUB_RX, "dst_host_filter_fn returned too many destinations");
return -1;
}
for (host_idx = 0; host_idx < dst_host_ids_entries; host_idx++) {
if (dst_host_ids[host_idx] == knet_h->host_id) {
found = 1;
break;
}
}
if (!found) {
log_debug(knet_h, KNET_SUB_RX, "Packet is not for us");
return -1;
}
}
}
return 0;
}
static int _deliver_data(knet_handle_t knet_h, unsigned char *data, ssize_t len, ssize_t header_size, int8_t channel)
{
struct iovec iov_out[1];
ssize_t outlen = 0;
memset(iov_out, 0, sizeof(iov_out));
retry:
iov_out[0].iov_base = (void *) data + outlen;
iov_out[0].iov_len = len - (outlen + header_size);
outlen = writev(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], iov_out, 1);
if ((outlen > 0) && (outlen < (ssize_t)iov_out[0].iov_len)) {
log_debug(knet_h, KNET_SUB_RX,
"Unable to send all data to the application in one go. Expected: %zu Sent: %zd\n",
iov_out[0].iov_len, outlen);
goto retry;
}
if (outlen <= 0) {
knet_h->sock_notify_fn(knet_h->sock_notify_fn_private_data,
knet_h->sockfd[channel].sockfd[0],
channel,
KNET_NOTIFY_RX,
outlen,
errno);
return -1;
}
if ((size_t)outlen != iov_out[0].iov_len) {
return -1;
}
return 0;
}
static int _fast_data_up(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link)
{
if (src_link->received_pong) {
log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u received data during valid ping/pong activity. Force link up.", src_host->host_id, src_link->link_id);
_link_updown(knet_h, src_host->host_id, src_link->link_id, src_link->status.enabled, 1, 0);
return 1;
}
// host is not eligible for fast data up
return 0;
}
static void _process_data(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len, uint64_t decrypt_time)
{
int8_t channel;
uint8_t decompress_type = 0;
ssize_t header_size;
seq_num_t seq_num;
uint8_t frags, frag_seq;
unsigned char *data;
if (_handle_data_stats(knet_h, src_link, len, decrypt_time) < 0) {
return;
}
/*
* register host is sending data. Required to determine if we need
* to reset circular buffers. (see onwire_v1.c)
*/
src_host->got_data = 1;
if (knet_h->onwire_ver_remap) {
get_data_header_info_v1(knet_h, inbuf, &header_size, &channel, &seq_num, &decompress_type, &frags, &frag_seq);
data = get_data_v1(knet_h, inbuf);
} else {
switch (inbuf->kh_version) {
case 1:
get_data_header_info_v1(knet_h, inbuf, &header_size, &channel, &seq_num, &decompress_type, &frags, &frag_seq);
data = get_data_v1(knet_h, inbuf);
break;
default:
log_warn(knet_h, KNET_SUB_RX, "processing data onwire version %u not supported", inbuf->kh_version);
return;
break;
}
}
if (!_seq_num_lookup(knet_h, src_host, seq_num, 0, 0)) {
if (src_host->link_handler_policy != KNET_LINK_POLICY_ACTIVE) {
log_debug(knet_h, KNET_SUB_RX, "Packet has already been delivered");
}
return;
}
if (frags > 1) {
/*
* len as received from the socket also includes extra stuff
* that the defrag code doesn't care about. So strip it
* here and readd only for repadding once we are done
* defragging
*
* the defrag code assumes that data packets have all the same size
* except the last one that might be smaller.
*
*/
len = len - header_size;
if (_pckt_defrag(knet_h, src_host, seq_num, data, &len, frags, frag_seq)) {
return;
}
len = len + header_size;
}
if (_decompress_data(knet_h, decompress_type, data, &len, header_size) < 0) {
return;
}
if (!src_host->status.reachable) {
if (!_fast_data_up(knet_h, src_host, src_link)) {
log_debug(knet_h, KNET_SUB_RX, "Source host %u not reachable yet. Discarding packet.", src_host->host_id);
return;
}
}
if (knet_h->enabled != 1) /* data forward is disabled */
return;
if (_check_destination(knet_h, inbuf, data, len, header_size, &channel) < 0) {
return;
}
if (!knet_h->sockfd[channel].in_use) {
log_debug(knet_h, KNET_SUB_RX,
"received packet for channel %d but there is no local sock connected",
channel);
return;
}
#ifdef ONWIRE_V1_EXTRA_DEBUG
if (inbuf->khp_data_v1_checksum != compute_chksum(data, len - header_size)) {
log_err(knet_h, KNET_SUB_RX, "Received incorrect data checksum after reassembly from host: %u seq: %u", src_host->host_id, seq_num);
/*
* give a chance to the log threads to pick up the message
*/
sleep(1);
abort();
}
#endif
if (_deliver_data(knet_h, data, len, header_size, channel) < 0) {
return;
}
_seq_num_set(src_host, seq_num, 0);
}
static struct knet_header *_decrypt_packet(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *len, uint64_t *decrypt_time)
{
int try_decrypt = 0;
int i = 0;
struct timespec start_time;
struct timespec end_time;
ssize_t outlen;
for (i = 1; i <= KNET_MAX_CRYPTO_INSTANCES; i++) {
if (knet_h->crypto_instance[i]) {
try_decrypt = 1;
break;
}
}
if ((!try_decrypt) && (knet_h->crypto_only == KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC)) {
log_debug(knet_h, KNET_SUB_RX, "RX thread configured to accept only crypto packets, but no crypto configs are configured!");
return NULL;
}
if (try_decrypt) {
clock_gettime(CLOCK_MONOTONIC, &start_time);
if (crypto_authenticate_and_decrypt(knet_h,
(unsigned char *)inbuf,
*len,
knet_h->recv_from_links_buf_decrypt,
&outlen) < 0) {
log_debug(knet_h, KNET_SUB_RX, "Unable to decrypt/auth packet");
if (knet_h->crypto_only == KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC) {
return NULL;
}
log_debug(knet_h, KNET_SUB_RX, "Attempting to process packet as clear data");
} else {
clock_gettime(CLOCK_MONOTONIC, &end_time);
timespec_diff(start_time, end_time, decrypt_time);
*len = outlen;
inbuf = (struct knet_header *)knet_h->recv_from_links_buf_decrypt;
}
}
return inbuf;
}
static int _packet_checks(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t len)
{
#ifdef ONWIRE_V1_EXTRA_DEBUG
uint32_t rx_packet_checksum, expected_packet_checksum;
#endif
if (len < (ssize_t)(KNET_HEADER_SIZE + 1)) {
log_debug(knet_h, KNET_SUB_RX, "Packet is too short: %ld", (long)len);
return -1;
}
#ifdef ONWIRE_V1_EXTRA_DEBUG
inbuf->kh_node = htons(inbuf->kh_node);
rx_packet_checksum = inbuf->kh_checksum;
inbuf->kh_checksum = 0;
expected_packet_checksum = compute_chksum((const unsigned char *)inbuf, len);
if (rx_packet_checksum != expected_packet_checksum) {
log_err(knet_h, KNET_SUB_RX, "Received packet with incorrect checksum. Received: %u Expected: %u", rx_packet_checksum, expected_packet_checksum);
/*
* give a chance to the log threads to pick up the message
*/
sleep(1);
abort();
}
inbuf->kh_node = ntohs(inbuf->kh_node);
#endif
/*
* old versions of knet did not advertise max_ver and max_ver is set to 0.
*/
if (!inbuf->kh_max_ver) {
inbuf->kh_max_ver = 1;
}
/*
* if the node joining max version is lower than the min version
* then we reject the node
*/
if (inbuf->kh_max_ver < knet_h->onwire_min_ver) {
log_warn(knet_h, KNET_SUB_RX,
"Received packet version %u from node %u, lower than currently minimal supported onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node);
return -1;
}
/*
* if the node joining with version higher than our max version
* then we reject the node
*/
if (inbuf->kh_version > knet_h->onwire_max_ver) {
log_warn(knet_h, KNET_SUB_RX,
"Received packet version %u from node %u, higher than currently maximum supported onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node);
return -1;
}
/*
* if the node joining with version lower than the current in use version
* then we reject the node
*
* NOTE: should we make this configurable and support downgrades?
*/
if ((!knet_h->onwire_force_ver) &&
(inbuf->kh_version < knet_h->onwire_ver) &&
(inbuf->kh_max_ver > inbuf->kh_version)) {
log_warn(knet_h, KNET_SUB_RX,
"Received packet version %u from node %u, lower than currently in use onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node);
return -1;
}
return 0;
}
static void _handle_dynip(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, int sockfd, const struct knet_mmsghdr *msg)
{
if (src_link->dynamic == KNET_LINK_DYNIP) {
if (cmpaddr(&src_link->dst_addr, msg->msg_hdr.msg_name) != 0) {
log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u appears to have changed ip address",
src_host->host_id, src_link->link_id);
memmove(&src_link->dst_addr, msg->msg_hdr.msg_name, sizeof(struct sockaddr_storage));
if (knet_addrtostr(&src_link->dst_addr, sockaddr_len(&src_link->dst_addr),
src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN,
src_link->status.dst_port, KNET_MAX_PORT_LEN) != 0) {
log_debug(knet_h, KNET_SUB_RX, "Unable to resolve ???");
snprintf(src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN - 1, "Unknown!!!");
snprintf(src_link->status.dst_port, KNET_MAX_PORT_LEN - 1, "??");
} else {
log_info(knet_h, KNET_SUB_RX,
"host: %u link: %u new connection established from: %s:%s",
src_host->host_id, src_link->link_id,
src_link->status.dst_ipaddr, src_link->status.dst_port);
}
}
/*
* transport has already accepted the connection here
* otherwise we would not be receiving packets
*/
transport_link_dyn_connect(knet_h, sockfd, src_link);
}
}
/*
* processing incoming packets vs access lists
*/
static int _check_rx_acl(knet_handle_t knet_h, struct knet_link *src_link, const struct knet_mmsghdr *msg)
{
if (knet_h->use_access_lists) {
if (!check_validate(knet_h, src_link, msg->msg_hdr.msg_name)) {
char src_ipaddr[KNET_MAX_HOST_LEN];
char src_port[KNET_MAX_PORT_LEN];
memset(src_ipaddr, 0, KNET_MAX_HOST_LEN);
memset(src_port, 0, KNET_MAX_PORT_LEN);
if (knet_addrtostr(msg->msg_hdr.msg_name, sockaddr_len(msg->msg_hdr.msg_name),
src_ipaddr, KNET_MAX_HOST_LEN,
src_port, KNET_MAX_PORT_LEN) < 0) {
log_warn(knet_h, KNET_SUB_RX, "Packet rejected: unable to resolve host/port");
} else {
log_warn(knet_h, KNET_SUB_RX, "Packet rejected from %s:%s", src_ipaddr, src_port);
}
return 0;
}
}
return 1;
}
static void _parse_recv_from_links(knet_handle_t knet_h, int sockfd, const struct knet_mmsghdr *msg)
{
int savederrno = 0, stats_err = 0;
struct knet_host *src_host;
struct knet_link *src_link;
uint64_t decrypt_time = 0;
struct knet_header *inbuf = msg->msg_hdr.msg_iov->iov_base;
ssize_t len = msg->msg_len;
int i, found_link = 0;
inbuf = _decrypt_packet(knet_h, inbuf, &len, &decrypt_time);
if (!inbuf) {
char src_ipaddr[KNET_MAX_HOST_LEN];
char src_port[KNET_MAX_PORT_LEN];
memset(src_ipaddr, 0, KNET_MAX_HOST_LEN);
memset(src_port, 0, KNET_MAX_PORT_LEN);
if (knet_addrtostr(msg->msg_hdr.msg_name, sockaddr_len(msg->msg_hdr.msg_name),
src_ipaddr, KNET_MAX_HOST_LEN,
src_port, KNET_MAX_PORT_LEN) < 0) {
log_err(knet_h, KNET_SUB_RX, "Unable to decrypt packet from unknown host/port (size %zu)!", len);
} else {
log_err(knet_h, KNET_SUB_RX, "Unable to decrypt packet from %s:%s (size %zu)!", src_ipaddr, src_port, len);
}
return;
}
inbuf->kh_node = ntohs(inbuf->kh_node);
if (_packet_checks(knet_h, inbuf, len) < 0) {
if (knet_h->rx_odd_packets < KNET_RX_ODD_PACKETS_THRESHOLD) {
knet_h->rx_odd_packets++;
} else {
log_warn(knet_h, KNET_SUB_RX, "This node has received more than %u packets that have failed basic sanity checks", KNET_RX_ODD_PACKETS_THRESHOLD);
log_warn(knet_h, KNET_SUB_RX, "It is highly recommended to check if all nodes are using the same crypto configuration");
knet_h->rx_odd_packets = 0;
}
return;
}
/*
* determine source host
*/
src_host = knet_h->host_index[inbuf->kh_node];
if (src_host == NULL) { /* host not found */
log_debug(knet_h, KNET_SUB_RX, "Unable to find source host for this packet");
return;
}
/*
* deteremine source link
*/
if (inbuf->kh_type == KNET_HEADER_TYPE_PING) {
_handle_onwire_version(knet_h, src_host, inbuf);
if (knet_h->onwire_ver_remap) {
src_link = get_link_from_pong_v1(knet_h, src_host, inbuf);
} else {
switch (inbuf->kh_version) {
case 1:
src_link = get_link_from_pong_v1(knet_h, src_host, inbuf);
break;
default:
log_warn(knet_h, KNET_SUB_RX, "Parsing ping onwire version %u not supported", inbuf->kh_version);
return;
break;
}
}
if (!_check_rx_acl(knet_h, src_link, msg)) {
return;
}
_handle_dynip(knet_h, src_host, src_link, sockfd, msg);
} else { /* all other packets */
for (i = 0; i < KNET_MAX_LINK; i++) {
src_link = &src_host->link[i];
if (cmpaddr(&src_link->dst_addr, msg->msg_hdr.msg_name) == 0) {
found_link = 1;
break;
}
}
if (found_link) {
/*
* this check is currently redundant.. Keep it here for now
*/
if (!_check_rx_acl(knet_h, src_link, msg)) {
return;
}
} else {
log_debug(knet_h, KNET_SUB_RX, "Unable to determine source link for data packet. Discarding packet.");
return;
}
}
stats_err = pthread_mutex_lock(&src_link->link_stats_mutex);
if (stats_err) {
log_err(knet_h, KNET_SUB_RX, "Unable to get stats mutex lock for host %u link %u: %s",
src_host->host_id, src_link->link_id, strerror(savederrno));
return;
}
switch (inbuf->kh_type) {
case KNET_HEADER_TYPE_DATA:
_process_data(knet_h, src_host, src_link, inbuf, len, decrypt_time);
break;
case KNET_HEADER_TYPE_PING:
process_ping(knet_h, src_host, src_link, inbuf, len);
break;
case KNET_HEADER_TYPE_PONG:
process_pong(knet_h, src_host, src_link, inbuf, len);
break;
case KNET_HEADER_TYPE_PMTUD:
src_link->status.stats.rx_pmtu_packets++;
src_link->status.stats.rx_pmtu_bytes += len;
/* Unlock so we don't deadlock with tx_mutex */
pthread_mutex_unlock(&src_link->link_stats_mutex);
process_pmtud(knet_h, src_link, inbuf);
return; /* Don't need to unlock link_stats_mutex */
break;
case KNET_HEADER_TYPE_PMTUD_REPLY:
src_link->status.stats.rx_pmtu_packets++;
src_link->status.stats.rx_pmtu_bytes += len;
/* pmtud_mutex can't be acquired while we hold a link_stats_mutex (ordering) */
pthread_mutex_unlock(&src_link->link_stats_mutex);
process_pmtud_reply(knet_h, src_link, inbuf);
return;
break;
default:
pthread_mutex_unlock(&src_link->link_stats_mutex);
return;
break;
}
pthread_mutex_unlock(&src_link->link_stats_mutex);
}
static void _handle_recv_from_links(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg)
{
int err, savederrno;
int i, msg_recv, transport;
if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) {
log_debug(knet_h, KNET_SUB_RX, "Unable to get global read lock");
return;
}
if (_is_valid_fd(knet_h, sockfd) < 1) {
/*
* this is normal if a fd got an event and before we grab the read lock
* and the link is removed by another thread
*/
goto exit_unlock;
}
transport = knet_h->knet_transport_fd_tracker[sockfd].transport;
/*
* reset msg_namelen to buffer size because after recvmmsg
* each msg_namelen will contain sizeof sockaddr_in or sockaddr_in6
*/
for (i = 0; i < PCKT_RX_BUFS; i++) {
msg[i].msg_hdr.msg_namelen = knet_h->knet_transport_fd_tracker[sockfd].sockaddr_len;
}
msg_recv = _recvmmsg(sockfd, &msg[0], PCKT_RX_BUFS, MSG_DONTWAIT | MSG_NOSIGNAL);
savederrno = errno;
/*
* WARNING: man page for recvmmsg is wrong. Kernel implementation here:
* recvmmsg can return:
* -1 on error
* 0 if the previous run of recvmmsg recorded an error on the socket
* N number of messages (see exception below).
*
* If there is an error from recvmsg after receiving a frame or more, the recvmmsg
* loop is interrupted, error recorded in the socket (getsockopt(SO_ERROR) and
* it will be visibile in the next run.
*
* Need to be careful how we handle errors at this stage.
*
* error messages need to be handled on a per transport/protocol base
* at this point we have different layers of error handling
* - msg_recv < 0 -> error from this run
* msg_recv = 0 -> error from previous run and error on socket needs to be cleared
* - per-transport message data
* example: msg[i].msg_hdr.msg_flags & MSG_NOTIFICATION or msg_len for SCTP == EOF,
* but for UDP it is perfectly legal to receive a 0 bytes message.. go figure
* - NOTE: on SCTP MSG_NOTIFICATION we get msg_recv == PCKT_FRAG_MAX messages and no
* errno set. That means the error api needs to be able to abort the loop below.
*/
if (msg_recv <= 0) {
transport_rx_sock_error(knet_h, transport, sockfd, msg_recv, savederrno);
goto exit_unlock;
}
for (i = 0; i < msg_recv; i++) {
err = transport_rx_is_data(knet_h, transport, sockfd, &msg[i]);
/*
* TODO: make this section silent once we are confident
* all protocols packet handlers are good
*/
switch(err) {
case KNET_TRANSPORT_RX_ISDATA_ERROR: /* on error */
log_debug(knet_h, KNET_SUB_RX, "Transport reported error parsing packet");
goto exit_unlock;
break;
case KNET_TRANSPORT_RX_NOT_DATA_CONTINUE: /* packet is not data and we should continue the packet process loop */
log_debug(knet_h, KNET_SUB_RX, "Transport reported no data, continue");
break;
case KNET_TRANSPORT_RX_NOT_DATA_STOP: /* packet is not data and we should STOP the packet process loop */
log_debug(knet_h, KNET_SUB_RX, "Transport reported no data, stop");
goto exit_unlock;
break;
case KNET_TRANSPORT_RX_IS_DATA: /* packet is data and should be parsed as such */
_parse_recv_from_links(knet_h, sockfd, &msg[i]);
break;
case KNET_TRANSPORT_RX_OOB_DATA_CONTINUE:
log_debug(knet_h, KNET_SUB_RX, "Transport is processing sock OOB data, continue");
break;
case KNET_TRANSPORT_RX_OOB_DATA_STOP:
log_debug(knet_h, KNET_SUB_RX, "Transport has completed processing sock OOB data, stop");
goto exit_unlock;
break;
}
}
exit_unlock:
_shrink_defrag_buffers(knet_h);
pthread_rwlock_unlock(&knet_h->global_rwlock);
}
void *_handle_recv_from_links_thread(void *data)
{
int i, nev;
knet_handle_t knet_h = (knet_handle_t) data;
struct epoll_event events[KNET_EPOLL_MAX_EVENTS];
struct sockaddr_storage address[PCKT_RX_BUFS];
struct knet_mmsghdr msg[PCKT_RX_BUFS];
struct iovec iov_in[PCKT_RX_BUFS];
+#ifdef IP_PKTINFO
+ unsigned char control_in[CMSG_SPACE(sizeof(struct in_6pktinfo))][PCKT_RX_BUFS];
+#endif
set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_STARTED);
memset(&msg, 0, sizeof(msg));
memset(&events, 0, sizeof(events));
for (i = 0; i < PCKT_RX_BUFS; i++) {
iov_in[i].iov_base = (void *)knet_h->recv_from_links_buf[i];
iov_in[i].iov_len = KNET_DATABUFSIZE;
memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr));
msg[i].msg_hdr.msg_name = &address[i];
msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); /* Real value filled in before actual use */
msg[i].msg_hdr.msg_iov = &iov_in[i];
msg[i].msg_hdr.msg_iovlen = 1;
+#ifdef IP_PKTINFO
+ msg[i].msg_hdr.msg_control = &control_in[0][i];
+ msg[i].msg_hdr.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); /* Largest of the two pktinfo structs */
+#endif
}
while (!shutdown_in_progress(knet_h)) {
nev = epoll_wait(knet_h->recv_from_links_epollfd, events, KNET_EPOLL_MAX_EVENTS, knet_h->threads_timer_res / 1000);
/*
* the RX threads only need to notify that there has been at least
* one successful run after queue flush has been requested.
* See setfwd in handle.c
*/
if (get_thread_flush_queue(knet_h, KNET_THREAD_RX) == KNET_THREAD_QUEUE_FLUSH) {
set_thread_flush_queue(knet_h, KNET_THREAD_RX, KNET_THREAD_QUEUE_FLUSHED);
}
/*
* we use timeout to detect if thread is shutting down
*/
if (nev == 0) {
continue;
}
for (i = 0; i < nev; i++) {
_handle_recv_from_links(knet_h, events[i].data.fd, msg);
}
}
set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_STOPPED);
return NULL;
}
ssize_t knet_recv(knet_handle_t knet_h, char *buff, const size_t buff_len, const int8_t channel)
{
int savederrno = 0;
ssize_t err = 0;
struct iovec iov_in;
if (!_is_valid_handle(knet_h)) {
return -1;
}
if (buff == NULL) {
errno = EINVAL;
return -1;
}
if (buff_len <= 0) {
errno = EINVAL;
return -1;
}
if (buff_len > KNET_MAX_PACKET_SIZE) {
errno = EINVAL;
return -1;
}
if (channel < 0) {
errno = EINVAL;
return -1;
}
if (channel >= KNET_DATAFD_MAX) {
errno = EINVAL;
return -1;
}
savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s",
strerror(savederrno));
errno = savederrno;
return -1;
}
if (!knet_h->sockfd[channel].in_use) {
savederrno = EINVAL;
err = -1;
goto out_unlock;
}
memset(&iov_in, 0, sizeof(iov_in));
iov_in.iov_base = (void *)buff;
iov_in.iov_len = buff_len;
err = readv(knet_h->sockfd[channel].sockfd[0], &iov_in, 1);
savederrno = errno;
out_unlock:
pthread_rwlock_unlock(&knet_h->global_rwlock);
errno = err ? savederrno : 0;
return err;
}
diff --git a/libknet/transport_common.c b/libknet/transport_common.c
index fba01a36..d850cba9 100644
--- a/libknet/transport_common.c
+++ b/libknet/transport_common.c
@@ -1,447 +1,448 @@
/*
* Copyright (C) 2016-2024 Red Hat, Inc. All rights reserved.
*
* Author: Fabio M. Di Nitto <fabbione@kronosnet.org>
*
* This software licensed under LGPL-2.0+
*/
#include "config.h"
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include "libknet.h"
#include "compat.h"
#include "host.h"
#include "link.h"
#include "logging.h"
#include "common.h"
#include "transport_common.h"
/*
* reuse Jan Friesse's compat layer as wrapper to drop usage of sendmmsg
*
* TODO: kill those wrappers once we work on packet delivery guarantees
*/
int _recvmmsg(int sockfd, struct knet_mmsghdr *msgvec, unsigned int vlen, unsigned int flags)
{
int savederrno = 0, err = 0;
unsigned int i;
for (i = 0; i < vlen; i++) {
err = recvmsg(sockfd, &msgvec[i].msg_hdr, flags);
savederrno = errno;
if (err >= 0) {
msgvec[i].msg_len = err;
if (err == 0) {
/* No point in reading anything more until we know this has been dealt with
or we'll just get a vector full of them. Several in fact */
i++;
break;
}
} else {
if ((i > 0) &&
((errno == EAGAIN) || (errno == EWOULDBLOCK))) {
savederrno = 0;
}
break;
}
}
errno = savederrno;
return ((i > 0) ? (int)i : err);
}
int _sendmmsg(int sockfd, int connection_oriented, struct knet_mmsghdr *msgvec, unsigned int vlen, unsigned int flags)
{
int savederrno = 0, err = 0;
unsigned int i;
struct msghdr temp_msg;
struct msghdr *use_msghdr;
for (i = 0; i < vlen; i++) {
if (connection_oriented == TRANSPORT_PROTO_IS_CONNECTION_ORIENTED) {
memcpy(&temp_msg, &msgvec[i].msg_hdr, sizeof(struct msghdr));
temp_msg.msg_name = NULL;
temp_msg.msg_namelen = 0;
use_msghdr = &temp_msg;
} else {
use_msghdr = &msgvec[i].msg_hdr;
}
err = sendmsg(sockfd, use_msghdr, flags);
savederrno = errno;
if (err < 0) {
break;
}
}
errno = savederrno;
return ((i > 0) ? (int)i : err);
}
/* Assume neither of these constants can ever be zero */
#ifndef SO_RCVBUFFORCE
#define SO_RCVBUFFORCE 0
#endif
#ifndef SO_SNDBUFFORCE
#define SO_SNDBUFFORCE 0
#endif
static int _configure_sockbuf(knet_handle_t knet_h, int sock, int option, int force, int target)
{
int savederrno = 0;
int new_value;
socklen_t value_len = sizeof new_value;
if (setsockopt(sock, SOL_SOCKET, option, &target, sizeof target) != 0) {
savederrno = errno;
log_err(knet_h, KNET_SUB_TRANSPORT,
"Error setting socket buffer via option %d to value %d: %s\n",
option, target, strerror(savederrno));
errno = savederrno;
return -1;
}
if (getsockopt(sock, SOL_SOCKET, option, &new_value, &value_len) != 0) {
savederrno = errno;
log_err(knet_h, KNET_SUB_TRANSPORT,
"Error getting socket buffer via option %d: %s\n",
option, strerror(savederrno));
errno = savederrno;
return -1;
}
if (value_len != sizeof new_value) {
log_err(knet_h, KNET_SUB_TRANSPORT,
"Socket option %d returned unexpected size %u\n",
option, value_len);
errno = ERANGE;
return -1;
}
if (target <= new_value) {
return 0;
}
if (!force || !(knet_h->flags & KNET_HANDLE_FLAG_PRIVILEGED)) {
log_err(knet_h, KNET_SUB_TRANSPORT,
"Failed to set socket buffer via option %d to value %d: capped at %d",
option, target, new_value);
if (!(knet_h->flags & KNET_HANDLE_FLAG_PRIVILEGED)) {
log_err(knet_h, KNET_SUB_TRANSPORT,
"Continuing regardless, as the handle is not privileged."
" Expect poor performance!");
return 0;
} else {
errno = ENAMETOOLONG;
return -1;
}
}
if (setsockopt(sock, SOL_SOCKET, force, &target, sizeof target) < 0) {
savederrno = errno;
log_err(knet_h, KNET_SUB_TRANSPORT,
"Failed to set socket buffer via force option %d: %s",
force, strerror(savederrno));
if (savederrno == EPERM) {
errno = ENAMETOOLONG;
} else {
errno = savederrno;
}
return -1;
}
return 0;
}
int _configure_common_socket(knet_handle_t knet_h, int sock, uint64_t flags, const char *type)
{
int err = 0, savederrno = 0;
int value;
if (_fdset_cloexec(sock)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set %s CLOEXEC socket opts: %s",
type, strerror(savederrno));
goto exit_error;
}
if (_fdset_nonblock(sock)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set %s NONBLOCK socket opts: %s",
type, strerror(savederrno));
goto exit_error;
}
if (_configure_sockbuf(knet_h, sock, SO_RCVBUF, SO_RCVBUFFORCE, KNET_RING_RCVBUFF)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set %s receive buffer: %s",
type, strerror(savederrno));
goto exit_error;
}
if (_configure_sockbuf(knet_h, sock, SO_SNDBUF, SO_SNDBUFFORCE, KNET_RING_RCVBUFF)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set %s send buffer: %s",
type, strerror(savederrno));
goto exit_error;
}
if (flags & KNET_LINK_FLAG_TRAFFICHIPRIO) {
#ifdef KNET_LINUX
#ifdef SO_PRIORITY
value = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &value, sizeof(value)) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set %s priority: %s",
type, strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSPORT, "TC_PRIO_INTERACTIVE enabled on socket: %i", sock);
#else
log_debug(knet_h, KNET_SUB_TRANSPORT, "TC_PRIO_INTERACTIVE not available in this build/platform");
#endif
#endif
#if defined(IP_TOS) && defined(IPTOS_LOWDELAY)
value = IPTOS_LOWDELAY;
if (setsockopt(sock, IPPROTO_IP, IP_TOS, &value, sizeof(value)) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set %s priority: %s",
type, strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSPORT, "IPTOS_LOWDELAY enabled on socket: %i", sock);
#else
log_debug(knet_h, KNET_SUB_TRANSPORT, "IPTOS_LOWDELAY not available in this build/platform");
#endif
}
exit_error:
errno = savederrno;
return err;
}
int _configure_transport_socket(knet_handle_t knet_h, int sock, struct sockaddr_storage *address, uint64_t flags, const char *type)
{
int err = 0, savederrno = 0;
int value;
if (_configure_common_socket(knet_h, sock, flags, type) < 0) {
savederrno = errno;
err = -1;
goto exit_error;
}
#ifdef KNET_LINUX
#ifdef IP_FREEBIND
value = 1;
if (setsockopt(sock, SOL_IP, IP_FREEBIND, &value, sizeof(value)) <0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set FREEBIND on %s socket: %s",
type, strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSPORT, "FREEBIND enabled on socket: %i", sock);
#else
log_debug(knet_h, KNET_SUB_TRANSPORT, "FREEBIND not available in this build/platform");
#endif
#endif
#ifdef KNET_BSD
#ifdef IP_BINDANY /* BSD */
value = 1;
if (setsockopt(sock, IPPROTO_IP, IP_BINDANY, &value, sizeof(value)) <0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set BINDANY on %s socket: %s",
type, strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSPORT, "BINDANY enabled on socket: %i", sock);
#else
log_debug(knet_h, KNET_SUB_TRANSPORT, "BINDANY not available in this build/platform");
#endif
#endif
if (address->ss_family == AF_INET6) {
value = 1;
if (setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
&value, sizeof(value)) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set %s IPv6 only: %s",
type, strerror(savederrno));
goto exit_error;
}
#ifdef KNET_LINUX
#ifdef IPV6_MTU_DISCOVER
value = IPV6_PMTUDISC_PROBE;
if (setsockopt(sock, SOL_IPV6, IPV6_MTU_DISCOVER, &value, sizeof(value)) <0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set PMTUDISC on %s socket: %s",
type, strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSPORT, "IPV6_MTU_DISCOVER enabled on socket: %i", sock);
#else
log_debug(knet_h, KNET_SUB_TRANSPORT, "IPV6_MTU_DISCOVER not available in this build/platform");
#endif
#endif
#ifdef IPV6_DONTFRAG
value = 1;
if (setsockopt(sock, IPPROTO_IPV6, IPV6_DONTFRAG, &value, sizeof(value)) <0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set DONTFRAG on %s socket: %s",
type, strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSPORT, "IPV6_DONTFRAG enabled on socket: %i", sock);
#else
log_debug(knet_h, KNET_SUB_TRANSPORT, "IPV6_DONTFRAG not available in this build/platform");
#endif
} else {
#ifdef KNET_LINUX
#ifdef IP_MTU_DISCOVER
value = IP_PMTUDISC_PROBE;
if (setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &value, sizeof(value)) <0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set PMTUDISC on %s socket: %s",
type, strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSPORT, "PMTUDISC enabled on socket: %i", sock);
#else
log_debug(knet_h, KNET_SUB_TRANSPORT, "PMTUDISC not available in this build/platform");
#endif
#endif
#ifdef KNET_BSD
#ifdef IP_DONTFRAG
value = 1;
if (setsockopt(sock, IPPROTO_IP, IP_DONTFRAG, &value, sizeof(value)) <0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set DONTFRAG on %s socket: %s",
type, strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSPORT, "DONTFRAG enabled on socket: %i", sock);
#else
log_debug(knet_h, KNET_SUB_TRANSPORT, "DONTFRAG not available in this build/platform");
#endif
#endif
}
exit_error:
errno = savederrno;
return err;
}
int _init_socketpair(knet_handle_t knet_h, int *sock)
{
int err = 0, savederrno = 0;
int i;
if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sock) != 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize socketpair: %s",
strerror(savederrno));
goto exit_fail;
}
for (i = 0; i < 2; i++) {
if (_configure_common_socket(knet_h, sock[i], 0, "local socketpair") < 0) {
savederrno = errno;
err = -1;
goto exit_fail;
}
}
exit_fail:
errno = savederrno;
return err;
}
void _close_socketpair(knet_handle_t knet_h, int *sock)
{
int i;
for (i = 0; i < 2; i++) {
if (sock[i]) {
close(sock[i]);
sock[i] = 0;
}
}
}
/*
* must be called with global read lock
*
* return -1 on error
* return 0 if fd is invalid
* return 1 if fd is valid
*/
int _is_valid_fd(knet_handle_t knet_h, int sockfd)
{
int ret = 0;
if (sockfd < 0) {
errno = EINVAL;
return -1;
}
if (sockfd >= KNET_MAX_FDS) {
errno = EINVAL;
return -1;
}
if (knet_h->knet_transport_fd_tracker[sockfd].transport >= KNET_MAX_TRANSPORTS) {
ret = 0;
} else {
ret = 1;
}
return ret;
}
/*
* must be called with global write lock
*/
-int _set_fd_tracker(knet_handle_t knet_h, int sockfd, uint8_t transport, uint8_t data_type, socklen_t socklen, void *data)
+int _set_fd_tracker(knet_handle_t knet_h, int sockfd, uint8_t transport, uint8_t data_type, socklen_t socklen, void *data, int ifindex)
{
if (sockfd < 0) {
errno = EINVAL;
return -1;
}
if (sockfd >= KNET_MAX_FDS) {
errno = EINVAL;
return -1;
}
knet_h->knet_transport_fd_tracker[sockfd].transport = transport;
knet_h->knet_transport_fd_tracker[sockfd].data_type = data_type;
knet_h->knet_transport_fd_tracker[sockfd].sockaddr_len = socklen;
knet_h->knet_transport_fd_tracker[sockfd].data = data;
+ knet_h->knet_transport_fd_tracker[sockfd].ifindex = ifindex;
return 0;
}
diff --git a/libknet/transport_common.h b/libknet/transport_common.h
index 8783457c..b454cffd 100644
--- a/libknet/transport_common.h
+++ b/libknet/transport_common.h
@@ -1,24 +1,24 @@
/*
* Copyright (C) 2016-2024 Red Hat, Inc. All rights reserved.
*
* Authors: Fabio M. Di Nitto <fabbione@kronosnet.org>
*
* This software licensed under LGPL-2.0+
*/
#ifndef __KNET_TRANSPORT_COMMON_H__
#define __KNET_TRANSPORT_COMMON_H__
int _configure_common_socket(knet_handle_t knet_h, int sock, uint64_t flags, const char *type);
int _configure_transport_socket(knet_handle_t knet_h, int sock, struct sockaddr_storage *address, uint64_t flags, const char *type);
int _init_socketpair(knet_handle_t knet_h, int *sock);
void _close_socketpair(knet_handle_t knet_h, int *sock);
-int _set_fd_tracker(knet_handle_t knet_h, int sockfd, uint8_t transport, uint8_t data_type, socklen_t socklen, void *data);
+int _set_fd_tracker(knet_handle_t knet_h, int sockfd, uint8_t transport, uint8_t data_type, socklen_t socklen, void *data, int ifindex);
int _is_valid_fd(knet_handle_t knet_h, int sockfd);
int _sendmmsg(int sockfd, int connection_oriented, struct knet_mmsghdr *msgvec, unsigned int vlen, unsigned int flags);
int _recvmmsg(int sockfd, struct knet_mmsghdr *msgvec, unsigned int vlen, unsigned int flags);
#endif
diff --git a/libknet/transport_sctp.c b/libknet/transport_sctp.c
index 7d800f0e..caf067cb 100644
--- a/libknet/transport_sctp.c
+++ b/libknet/transport_sctp.c
@@ -1,1637 +1,1637 @@
/*
* Copyright (C) 2016-2024 Red Hat, Inc. All rights reserved.
*
* Author: Christine Caulfield <ccaulfie@redhat.com>
*
* This software licensed under LGPL-2.0+
*/
#include "config.h"
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <stdlib.h>
#include <assert.h>
#include "compat.h"
#include "host.h"
#include "links.h"
#include "links_acl.h"
#include "links_acl_ip.h"
#include "logging.h"
#include "netutils.h"
#include "common.h"
#include "transport_common.h"
#include "transports.h"
#include "threads_common.h"
#ifdef HAVE_NETINET_SCTP_H
#include <netinet/sctp.h>
#include "transport_sctp.h"
typedef struct sctp_handle_info {
struct qb_list_head listen_links_list;
struct qb_list_head connect_links_list;
int connect_epollfd;
int connectsockfd[2];
int listen_epollfd;
int listensockfd[2];
pthread_t connect_thread;
pthread_t listen_thread;
socklen_t event_subscribe_kernel_size;
char *event_subscribe_buffer;
} sctp_handle_info_t;
/*
* use by fd_tracker data type
*/
#define SCTP_NO_LINK_INFO 0
#define SCTP_LISTENER_LINK_INFO 1
#define SCTP_ACCEPTED_LINK_INFO 2
#define SCTP_CONNECT_LINK_INFO 3
/*
* this value is per listener
*/
#define MAX_ACCEPTED_SOCKS 256
typedef struct sctp_listen_link_info {
struct qb_list_head list;
int listen_sock;
int accepted_socks[MAX_ACCEPTED_SOCKS];
struct sockaddr_storage src_address;
int on_listener_epoll;
int on_rx_epoll;
int sock_shutdown;
} sctp_listen_link_info_t;
typedef struct sctp_accepted_link_info {
char mread_buf[KNET_DATABUFSIZE];
ssize_t mread_len;
sctp_listen_link_info_t *link_info;
} sctp_accepted_link_info_t ;
typedef struct sctp_connect_link_info {
struct qb_list_head list;
sctp_listen_link_info_t *listener;
struct knet_link *link;
struct sockaddr_storage dst_address;
int connect_sock;
int on_rx_epoll;
int close_sock;
int sock_shutdown;
} sctp_connect_link_info_t;
/*
* socket handling functions
*
* those functions do NOT perform locking. locking
* should be handled in the right context from callers
*/
/*
* sockets are removed from rx_epoll from callers
* see also error handling functions
*/
static int _close_connect_socket(knet_handle_t knet_h, struct knet_link *kn_link)
{
int err = 0, savederrno = 0;
struct epoll_event ev;
sctp_connect_link_info_t *info = kn_link->transport_link;
if (info->connect_sock != -1) {
if (info->on_rx_epoll) {
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = info->connect_sock;
if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->connect_sock, &ev)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove connected socket from epoll pool: %s",
strerror(savederrno));
goto exit_error;
}
info->on_rx_epoll = 0;
}
- if (_set_fd_tracker(knet_h, info->connect_sock, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0) {
+ if (_set_fd_tracker(knet_h, info->connect_sock, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL, -1) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s",
strerror(savederrno));
} else {
close(info->connect_sock);
info->connect_sock = -1;
}
}
exit_error:
errno = savederrno;
return err;
}
static int _enable_sctp_notifications(knet_handle_t knet_h, int sock, const char *type)
{
int err = 0, savederrno = 0;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
if (setsockopt(sock, IPPROTO_SCTP, SCTP_EVENTS,
handle_info->event_subscribe_buffer,
handle_info->event_subscribe_kernel_size) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to enable %s events: %s",
type, strerror(savederrno));
}
errno = savederrno;
return err;
}
static int _configure_sctp_socket(knet_handle_t knet_h, int sock, struct sockaddr_storage *address, uint64_t flags, const char *type)
{
int err = 0, savederrno = 0;
int value;
int level;
#ifdef SOL_SCTP
level = SOL_SCTP;
#else
level = IPPROTO_SCTP;
#endif
if (_configure_transport_socket(knet_h, sock, address, flags, type) < 0) {
savederrno = errno;
err = -1;
goto exit_error;
}
value = 1;
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &value, sizeof(value)) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set reuseaddr on socket %d: %s",
sock, strerror(savederrno));
goto exit_error;
}
value = 1;
if (setsockopt(sock, level, SCTP_NODELAY, &value, sizeof(value)) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set sctp nodelay: %s",
strerror(savederrno));
goto exit_error;
}
if (_enable_sctp_notifications(knet_h, sock, type) < 0) {
savederrno = errno;
err = -1;
}
exit_error:
errno = savederrno;
return err;
}
static int _reconnect_socket(knet_handle_t knet_h, struct knet_link *kn_link)
{
int err = 0, savederrno = 0;
sctp_connect_link_info_t *info = kn_link->transport_link;
if (connect(info->connect_sock, (struct sockaddr *)&kn_link->dst_addr, sockaddr_len(&kn_link->dst_addr)) < 0) {
savederrno = errno;
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP socket %d received error: %s", info->connect_sock, strerror(savederrno));
if ((savederrno != EALREADY) && (savederrno != EINPROGRESS) && (savederrno != EISCONN)) {
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to connect SCTP socket %d: %s",
info->connect_sock, strerror(savederrno));
}
}
errno = savederrno;
return err;
}
static int _create_connect_socket(knet_handle_t knet_h, struct knet_link *kn_link)
{
int err = 0, savederrno = 0;
struct epoll_event ev;
sctp_connect_link_info_t *info = kn_link->transport_link;
int connect_sock;
struct sockaddr_storage connect_addr;
connect_sock = socket(kn_link->dst_addr.ss_family, SOCK_STREAM, IPPROTO_SCTP);
if (connect_sock < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create send/recv socket: %s",
strerror(savederrno));
goto exit_error;
}
if (_configure_sctp_socket(knet_h, connect_sock, &kn_link->dst_addr, kn_link->flags, "SCTP connect") < 0) {
savederrno = errno;
err = -1;
goto exit_error;
}
memset(&connect_addr, 0, sizeof(struct sockaddr_storage));
if (knet_strtoaddr(kn_link->status.src_ipaddr, "0", &connect_addr, sockaddr_len(&connect_addr)) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to resolve connecting socket: %s",
strerror(savederrno));
goto exit_error;
}
if (bind(connect_sock, (struct sockaddr *)&connect_addr, sockaddr_len(&connect_addr)) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to bind connecting socket: %s",
strerror(savederrno));
goto exit_error;
}
- if (_set_fd_tracker(knet_h, connect_sock, KNET_TRANSPORT_SCTP, SCTP_CONNECT_LINK_INFO, sockaddr_len(&kn_link->src_addr), info) < 0) {
+ if (_set_fd_tracker(knet_h, connect_sock, KNET_TRANSPORT_SCTP, SCTP_CONNECT_LINK_INFO, sockaddr_len(&kn_link->src_addr), info, -1) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s",
strerror(savederrno));
goto exit_error;
}
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = connect_sock;
if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, connect_sock, &ev)) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add connected socket to epoll pool: %s",
strerror(errno));
}
info->on_rx_epoll = 1;
info->connect_sock = connect_sock;
info->close_sock = 0;
kn_link->outsock = info->connect_sock;
if (_reconnect_socket(knet_h, kn_link) < 0) {
savederrno = errno;
err = -1;
goto exit_error;
}
exit_error:
if (err) {
if (connect_sock >= 0) {
close(connect_sock);
}
}
errno = savederrno;
return err;
}
static void _lock_sleep_relock(knet_handle_t knet_h)
{
int i = 0;
/* Don't hold onto the lock while sleeping */
pthread_rwlock_unlock(&knet_h->global_rwlock);
while (i < 5) {
usleep(knet_h->threads_timer_res / 16);
if (!pthread_rwlock_rdlock(&knet_h->global_rwlock)) {
/*
* lock acquired, we can go out
*/
return;
} else {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get read lock!");
i++;
}
}
/*
* time to crash! if we cannot re-acquire the lock
* there is no easy way out of this one
*/
assert(0);
}
transport_sock_error_t sctp_transport_tx_sock_error(knet_handle_t knet_h, int sockfd, int subsys, int recv_err, int recv_errno)
{
sctp_connect_link_info_t *connect_info = knet_h->knet_transport_fd_tracker[sockfd].data;
sctp_accepted_link_info_t *accepted_info = knet_h->knet_transport_fd_tracker[sockfd].data;
sctp_listen_link_info_t *listen_info;
if (recv_err < 0) {
switch (knet_h->knet_transport_fd_tracker[sockfd].data_type) {
case SCTP_CONNECT_LINK_INFO:
if (connect_info->link->transport_connected == 0) {
return KNET_TRANSPORT_SOCK_ERROR_INTERNAL;
}
break;
case SCTP_ACCEPTED_LINK_INFO:
listen_info = accepted_info->link_info;
if (listen_info->listen_sock != sockfd) {
if (listen_info->on_rx_epoll == 0) {
return KNET_TRANSPORT_SOCK_ERROR_INTERNAL;
}
}
break;
}
if (recv_errno == EAGAIN) {
log_trace(knet_h, KNET_SUB_TRANSP_SCTP, "Sock: %d is overloaded. Slowing TX down", sockfd);
_lock_sleep_relock(knet_h);
return KNET_TRANSPORT_SOCK_ERROR_RETRY;
}
return KNET_TRANSPORT_SOCK_ERROR_INTERNAL;
}
return KNET_TRANSPORT_SOCK_ERROR_IGNORE;
}
/*
* socket error management functions
*
* both called with global read lock.
*
* NOTE: we need to remove the fd from the epoll as soon as possible
* even before we notify the respective thread to take care of it
* because scheduling can make it so that this thread will overload
* and the threads supposed to take care of the error will never
* be able to take action.
* we CANNOT handle FDs here directly (close/reconnect/etc) due
* to locking context. We need to delegate that to their respective
* management threads within the global write lock.
*
* this function is called from:
* - RX thread with recv_err <= 0 directly on recvmmsg error
* - transport_rx_is_data when msg_len == 0 (recv_err = 1)
* - transport_rx_is_data on notification (recv_err = 2)
*
* basically this small abuse of recv_err is to detect notifications
* generated by sockets created by listen().
*/
transport_sock_error_t sctp_transport_rx_sock_error(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno)
{
struct epoll_event ev;
sctp_accepted_link_info_t *accepted_info = knet_h->knet_transport_fd_tracker[sockfd].data;
sctp_listen_link_info_t *listen_info;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
switch (knet_h->knet_transport_fd_tracker[sockfd].data_type) {
case SCTP_CONNECT_LINK_INFO:
/*
* all connect link have notifications enabled
* and we accept only data from notification and
* generic recvmmsg errors.
*
* Errors generated by msg_len 0 can be ignored because
* they follow a notification (double notification)
*/
if (recv_err != 1) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Notifying connect thread that sockfd %d received an error", sockfd);
if (sendto(handle_info->connectsockfd[1], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(int)) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to notify connect thread: %s", strerror(errno));
}
}
break;
case SCTP_ACCEPTED_LINK_INFO:
listen_info = accepted_info->link_info;
if (listen_info->listen_sock != sockfd) {
if (recv_err != 1) {
if (listen_info->on_rx_epoll) {
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = sockfd;
if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, sockfd, &ev)) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove EOFed socket from epoll pool: %s",
strerror(errno));
return -1;
}
listen_info->on_rx_epoll = 0;
}
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Notifying listen thread that sockfd %d received an error", sockfd);
if (sendto(handle_info->listensockfd[1], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(int)) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to notify listen thread: %s", strerror(errno));
}
}
} else {
/*
* this means the listen() socket has generated
* a notification. now what? :-)
*/
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for listen() socket %d", sockfd);
}
break;
default:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received unknown notification? %d", sockfd);
break;
}
/*
* Under RX pressure we need to give time to IPC to pick up the message
*/
_lock_sleep_relock(knet_h);
return 0;
}
/*
* NOTE: sctp_transport_rx_is_data is called with global rdlock
* delegate any FD error management to sctp_transport_rx_sock_error
* and keep this code to parsing incoming data only
*/
transport_rx_isdata_t sctp_transport_rx_is_data(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg)
{
size_t i;
struct iovec *iov = msg->msg_hdr.msg_iov;
size_t iovlen = msg->msg_hdr.msg_iovlen;
struct sctp_assoc_change *sac;
union sctp_notification *snp;
sctp_accepted_link_info_t *listen_info = knet_h->knet_transport_fd_tracker[sockfd].data;
sctp_connect_link_info_t *connect_info = knet_h->knet_transport_fd_tracker[sockfd].data;
if (!(msg->msg_hdr.msg_flags & MSG_NOTIFICATION)) {
if (msg->msg_len == 0) {
/*
* NOTE: with event notification enabled, we receive error twice:
* 1) from the event notification
* 2) followed by a 0 byte msg_len
*
* the event handler should take care to avoid #2 by stopping
* the rx thread from processing more packets than necessary.
*/
if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) {
if (connect_info->sock_shutdown) {
return KNET_TRANSPORT_RX_OOB_DATA_CONTINUE;
}
} else {
if (listen_info->link_info->sock_shutdown) {
return KNET_TRANSPORT_RX_OOB_DATA_CONTINUE;
}
}
/*
* this is pretty much dead code and we should never hit it.
* keep it for safety and avoid the rx thread to process
* bad info / data.
*/
return KNET_TRANSPORT_RX_NOT_DATA_STOP;
}
/*
* missing MSG_EOR has to be treated as a short read
* from the socket and we need to fill in the mread buf
* while we wait for MSG_EOR
*/
if (!(msg->msg_hdr.msg_flags & MSG_EOR)) {
/*
* copy the incoming data into mread_buf + mread_len (incremental)
* and increase mread_len
*/
memmove(listen_info->mread_buf + listen_info->mread_len, iov->iov_base, msg->msg_len);
listen_info->mread_len = listen_info->mread_len + msg->msg_len;
return KNET_TRANSPORT_RX_NOT_DATA_CONTINUE;
}
/*
* got EOR.
* if mread_len is > 0 we are completing a packet from short reads
* complete reassembling the packet in mread_buf, copy it back in the iov
* and set the iov/msg len numbers (size) correctly
*/
if (listen_info->mread_len) {
/*
* add last fragment to mread_buf
*/
memmove(listen_info->mread_buf + listen_info->mread_len, iov->iov_base, msg->msg_len);
listen_info->mread_len = listen_info->mread_len + msg->msg_len;
/*
* move all back into the iovec
*/
memmove(iov->iov_base, listen_info->mread_buf, listen_info->mread_len);
msg->msg_len = listen_info->mread_len;
listen_info->mread_len = 0;
}
return KNET_TRANSPORT_RX_IS_DATA;
}
if (!(msg->msg_hdr.msg_flags & MSG_EOR)) {
return KNET_TRANSPORT_RX_NOT_DATA_STOP;
}
for (i = 0; i < iovlen; i++) {
snp = iov[i].iov_base;
switch (snp->sn_header.sn_type) {
case SCTP_ASSOC_CHANGE:
sac = &snp->sn_assoc_change;
switch (sac->sac_state) {
case SCTP_COMM_LOST:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: comm_lost", sockfd);
if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) {
connect_info->close_sock = 1;
connect_info->link->transport_connected = 0;
}
sctp_transport_rx_sock_error(knet_h, sockfd, 2, 0);
return KNET_TRANSPORT_RX_OOB_DATA_STOP;
break;
case SCTP_COMM_UP:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: comm_up", sockfd);
if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) {
connect_info->link->transport_connected = 1;
}
break;
case SCTP_RESTART:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: restart", sockfd);
break;
case SCTP_SHUTDOWN_COMP:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: shutdown comp", sockfd);
if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) {
connect_info->close_sock = 1;
}
sctp_transport_rx_sock_error(knet_h, sockfd, 2, 0);
return KNET_TRANSPORT_RX_OOB_DATA_STOP;
break;
case SCTP_CANT_STR_ASSOC:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: cant str assoc", sockfd);
sctp_transport_rx_sock_error(knet_h, sockfd, 2, 0);
break;
default:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change socket %d: unknown %d", sockfd, sac->sac_state);
break;
}
break;
case SCTP_SHUTDOWN_EVENT:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp shutdown event socket %d", sockfd);
if (knet_h->knet_transport_fd_tracker[sockfd].data_type == SCTP_CONNECT_LINK_INFO) {
connect_info->link->transport_connected = 0;
connect_info->sock_shutdown = 1;
} else {
listen_info->link_info->sock_shutdown = 1;
}
break;
case SCTP_SEND_FAILED:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp send failed socket: %d", sockfd);
break;
case SCTP_PEER_ADDR_CHANGE:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp peer addr change socket %d", sockfd);
break;
case SCTP_REMOTE_ERROR:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp remote error socket %d", sockfd);
break;
default:
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] unknown sctp event socket: %d type: %hu", sockfd, snp->sn_header.sn_type);
break;
}
}
return KNET_TRANSPORT_RX_OOB_DATA_CONTINUE;
}
int sctp_transport_link_is_down(knet_handle_t knet_h, struct knet_link *kn_link)
{
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
sctp_connect_link_info_t *info = kn_link->transport_link;
kn_link->transport_connected = 0;
info->close_sock = 1;
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Notifying connect thread that sockfd %d received a link down event", info->connect_sock);
if (sendto(handle_info->connectsockfd[1], &info->connect_sock, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(int)) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to notify connect thread: %s", strerror(errno));
}
return 0;
}
/*
* connect / outgoing socket management thread
*/
/*
* _handle_connected_sctp* are called with a global write lock
* from the connect_thread
*/
static void _handle_connected_sctp_socket(knet_handle_t knet_h, int connect_sock)
{
int err;
unsigned int status, len = sizeof(status);
sctp_connect_link_info_t *info = knet_h->knet_transport_fd_tracker[connect_sock].data;
struct knet_link *kn_link = info->link;
if (info->close_sock) {
if (_close_connect_socket(knet_h, kn_link) < 0) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to close sock %d from _handle_connected_sctp_socket: %s", connect_sock, strerror(errno));
return;
}
info->close_sock = 0;
if (_create_connect_socket(knet_h, kn_link) < 0) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to recreate connecting sock! %s", strerror(errno));
return;
}
}
_reconnect_socket(knet_h, info->link);
err = getsockopt(connect_sock, SOL_SOCKET, SO_ERROR, &status, &len);
if (err) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP getsockopt() on connecting socket %d failed: %s",
connect_sock, strerror(errno));
return;
}
if (status) {
log_info(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP connect on %d to %s port %s failed: %s",
connect_sock, kn_link->status.dst_ipaddr, kn_link->status.dst_port,
strerror(status));
/*
* No need to create a new socket if connect failed,
* just retry connect
*/
return;
}
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP handler fd %d now connected to %s port %s",
connect_sock,
kn_link->status.dst_ipaddr, kn_link->status.dst_port);
}
static void _handle_connected_sctp_notifications(knet_handle_t knet_h)
{
int sockfd = -1;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
if (recv(handle_info->connectsockfd[0], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL) != sizeof(int)) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Short read on connectsockfd");
return;
}
if (_is_valid_fd(knet_h, sockfd) < 1) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for connected socket fd error");
return;
}
/*
* revalidate sockfd
*/
if ((sockfd < 0) || (sockfd >= KNET_MAX_FDS)) {
return;
}
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Processing connected error on socket: %d", sockfd);
_handle_connected_sctp_socket(knet_h, sockfd);
}
static void *_sctp_connect_thread(void *data)
{
int savederrno;
int i, nev;
knet_handle_t knet_h = (knet_handle_t) data;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
struct epoll_event events[KNET_EPOLL_MAX_EVENTS];
set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_STARTED);
memset(&events, 0, sizeof(events));
while (!shutdown_in_progress(knet_h)) {
nev = epoll_wait(handle_info->connect_epollfd, events, KNET_EPOLL_MAX_EVENTS, knet_h->threads_timer_res / 1000);
/*
* we use timeout to detect if thread is shutting down
*/
if (nev == 0) {
continue;
}
if (nev < 0) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP connect handler EPOLL ERROR: %s",
strerror(errno));
continue;
}
/*
* Sort out which FD has a connection
*/
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get write lock: %s",
strerror(savederrno));
continue;
}
/*
* minor optimization: deduplicate events
*
* in some cases we can receive multiple notifcations
* of the same FD having issues or need handling.
* It's enough to process it once even tho it's safe
* to handle them multiple times.
*/
for (i = 0; i < nev; i++) {
if (events[i].data.fd == handle_info->connectsockfd[0]) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received notification from rx_error for connected socket");
_handle_connected_sctp_notifications(knet_h);
} else {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification on connected sockfd %d\n", events[i].data.fd);
}
}
pthread_rwlock_unlock(&knet_h->global_rwlock);
/*
* this thread can generate events for itself.
* we need to sleep in between loops to allow other threads
* to be scheduled
*/
usleep(knet_h->reconnect_int * 1000);
}
set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_STOPPED);
return NULL;
}
/*
* listen/incoming connections management thread
*/
/*
* Listener received a new connection
* called with a write lock from main thread
*/
static void _handle_incoming_sctp(knet_handle_t knet_h, int listen_sock)
{
int err = 0, savederrno = 0;
int new_fd;
int i = -1;
sctp_listen_link_info_t *info = knet_h->knet_transport_fd_tracker[listen_sock].data;
struct epoll_event ev;
struct sockaddr_storage ss;
socklen_t sock_len = sizeof(ss);
char addr_str[KNET_MAX_HOST_LEN];
char port_str[KNET_MAX_PORT_LEN];
sctp_accepted_link_info_t *accept_info = NULL;
struct knet_host *host;
struct knet_link *kn_link;
int link_idx;
sctp_connect_link_info_t *this_link_connect_info;
sctp_listen_link_info_t *this_link_listen_info;
int pass_acl = 0;
memset(&ss, 0, sizeof(ss));
new_fd = accept(listen_sock, (struct sockaddr *)&ss, &sock_len);
if (new_fd < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: accept error: %s", strerror(errno));
goto exit_error;
}
if (knet_addrtostr(&ss, sizeof(ss),
addr_str, KNET_MAX_HOST_LEN,
port_str, KNET_MAX_PORT_LEN) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: unable to gather socket info");
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: received connection from: %s port: %s",
addr_str, port_str);
if (knet_h->use_access_lists) {
for (host = knet_h->host_head; host != NULL; host = host->next) {
for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) {
kn_link = &host->link[link_idx];
if ((kn_link->configured) && (kn_link->transport == KNET_TRANSPORT_SCTP)) {
this_link_connect_info = kn_link->transport_link;
this_link_listen_info = this_link_connect_info->listener;
if ((this_link_listen_info->listen_sock == listen_sock) &&
(check_validate(knet_h, kn_link, &ss))) {
pass_acl = 1;
break;
}
}
}
if (pass_acl) {
break;
}
}
if (!pass_acl) {
savederrno = EINVAL;
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Connection rejected from %s/%s", addr_str, port_str);
close(new_fd);
errno = savederrno;
return;
}
}
/*
* Keep a track of all accepted FDs
*/
for (i=0; i<MAX_ACCEPTED_SOCKS; i++) {
if (info->accepted_socks[i] == -1) {
info->accepted_socks[i] = new_fd;
break;
}
}
if (i == MAX_ACCEPTED_SOCKS) {
errno = EBUSY;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: too many connections!");
goto exit_error;
}
if (_configure_common_socket(knet_h, new_fd, 0, "SCTP incoming") < 0) { /* Inherit flags from listener? */
savederrno = errno;
err = -1;
goto exit_error;
}
if (_enable_sctp_notifications(knet_h, new_fd, "Incoming connection") < 0) {
savederrno = errno;
err = -1;
goto exit_error;
}
accept_info = malloc(sizeof(sctp_accepted_link_info_t));
if (!accept_info) {
savederrno = errno;
err = -1;
goto exit_error;
}
memset(accept_info, 0, sizeof(sctp_accepted_link_info_t));
accept_info->link_info = info;
if (_set_fd_tracker(knet_h, new_fd, KNET_TRANSPORT_SCTP, SCTP_ACCEPTED_LINK_INFO,
knet_h->knet_transport_fd_tracker[listen_sock].sockaddr_len,
- accept_info) < 0) {
+ accept_info, -1) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s",
strerror(errno));
goto exit_error;
}
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = new_fd;
if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, new_fd, &ev)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: unable to add accepted socket %d to epoll pool: %s",
new_fd, strerror(errno));
goto exit_error;
}
info->on_rx_epoll = 1;
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: accepted new fd %d for %s/%s (listen fd: %d). index: %d",
new_fd, addr_str, port_str, info->listen_sock, i);
exit_error:
if (err) {
if ((i >= 0) && (i < MAX_ACCEPTED_SOCKS)) {
info->accepted_socks[i] = -1;
}
/*
* check the error to make coverity scan happy.
* _set_fd_tracker cannot fail at this stage
*/
- if (_set_fd_tracker(knet_h, new_fd, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0){
+ if (_set_fd_tracker(knet_h, new_fd, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL, -1) < 0){
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to update fdtracker for socket %d", new_fd);
}
free(accept_info);
if (new_fd >= 0) {
close(new_fd);
}
}
errno = savederrno;
return;
}
/*
* Listen thread received a notification of a bad socket that needs closing
* called with a write lock from main thread
*/
static void _handle_listen_sctp_errors(knet_handle_t knet_h)
{
int sockfd = -1;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
sctp_accepted_link_info_t *accept_info;
sctp_listen_link_info_t *info;
struct knet_host *host;
int link_idx;
int i;
if (recv(handle_info->listensockfd[0], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL) != sizeof(int)) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Short read on listensockfd");
return;
}
if (_is_valid_fd(knet_h, sockfd) < 1) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for listen socket fd error");
return;
}
/*
* revalidate sockfd
*/
if ((sockfd < 0) || (sockfd >= KNET_MAX_FDS)) {
return;
}
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Processing listen error on socket: %d", sockfd);
accept_info = knet_h->knet_transport_fd_tracker[sockfd].data;
info = accept_info->link_info;
/*
* clear all links using this accepted socket as
* outbound dynamically connected socket
*/
for (host = knet_h->host_head; host != NULL; host = host->next) {
for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) {
if ((host->link[link_idx].dynamic == KNET_LINK_DYNIP) &&
(host->link[link_idx].outsock == sockfd)) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Found dynamic connection on host %d link %d (%d)",
host->host_id, link_idx, sockfd);
host->link[link_idx].status.dynconnected = 0;
host->link[link_idx].transport_connected = 0;
host->link[link_idx].outsock = 0;
memset(&host->link[link_idx].dst_addr, 0, sizeof(struct sockaddr_storage));
}
}
}
for (i=0; i<MAX_ACCEPTED_SOCKS; i++) {
if (sockfd == info->accepted_socks[i]) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Closing accepted socket %d", sockfd);
/*
* check the error to make coverity scan happy.
* _set_fd_tracker cannot fail at this stage
*/
- if (_set_fd_tracker(knet_h, sockfd, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0) {
+ if (_set_fd_tracker(knet_h, sockfd, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL, -1) < 0) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to update fdtracker for socket %d", sockfd);
}
info->accepted_socks[i] = -1;
free(accept_info);
close(sockfd);
break; /* Keeps covscan happy */
}
}
}
static void *_sctp_listen_thread(void *data)
{
int savederrno;
int i, nev;
knet_handle_t knet_h = (knet_handle_t) data;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
struct epoll_event events[KNET_EPOLL_MAX_EVENTS];
set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_STARTED);
memset(&events, 0, sizeof(events));
while (!shutdown_in_progress(knet_h)) {
nev = epoll_wait(handle_info->listen_epollfd, events, KNET_EPOLL_MAX_EVENTS, knet_h->threads_timer_res / 1000);
/*
* we use timeout to detect if thread is shutting down
*/
if (nev == 0) {
continue;
}
if (nev < 0) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP listen handler EPOLL ERROR: %s",
strerror(errno));
continue;
}
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get write lock: %s",
strerror(savederrno));
continue;
}
/*
* Sort out which FD has an incoming connection
*/
for (i = 0; i < nev; i++) {
if (events[i].data.fd == handle_info->listensockfd[0]) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received notification from rx_error for listener/accepted socket");
_handle_listen_sctp_errors(knet_h);
} else {
if (_is_valid_fd(knet_h, events[i].data.fd) == 1) {
_handle_incoming_sctp(knet_h, events[i].data.fd);
} else {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received listen notification from invalid socket");
}
}
}
pthread_rwlock_unlock(&knet_h->global_rwlock);
}
set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_STOPPED);
return NULL;
}
/*
* sctp_link_listener_start/stop are called in global write lock
* context from set_config and clear_config.
*/
static sctp_listen_link_info_t *sctp_link_listener_start(knet_handle_t knet_h, struct knet_link *kn_link)
{
int err = 0, savederrno = 0;
int listen_sock = -1;
struct epoll_event ev;
sctp_listen_link_info_t *info = NULL;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
/*
* Only allocate a new listener if src address is different
*/
qb_list_for_each_entry(info, &handle_info->listen_links_list, list) {
if (memcmp(&info->src_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)) == 0) {
if ((check_add(knet_h, kn_link, -1,
&kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) {
return NULL;
}
return info;
}
}
info = malloc(sizeof(sctp_listen_link_info_t));
if (!info) {
err = -1;
goto exit_error;
}
memset(info, 0, sizeof(sctp_listen_link_info_t));
memset(info->accepted_socks, -1, sizeof(info->accepted_socks));
memmove(&info->src_address, &kn_link->src_addr, sizeof(struct sockaddr_storage));
listen_sock = socket(kn_link->src_addr.ss_family, SOCK_STREAM, IPPROTO_SCTP);
if (listen_sock < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create listener socket: %s",
strerror(savederrno));
goto exit_error;
}
if (_configure_sctp_socket(knet_h, listen_sock, &kn_link->src_addr, kn_link->flags, "SCTP listener") < 0) {
savederrno = errno;
err = -1;
goto exit_error;
}
if (bind(listen_sock, (struct sockaddr *)&kn_link->src_addr, sockaddr_len(&kn_link->src_addr)) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to bind listener socket: %s",
strerror(savederrno));
goto exit_error;
}
if (listen(listen_sock, 5) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to listen on listener socket: %s",
strerror(savederrno));
goto exit_error;
}
- if (_set_fd_tracker(knet_h, listen_sock, KNET_TRANSPORT_SCTP, SCTP_LISTENER_LINK_INFO, sockaddr_len(&kn_link->src_addr), info) < 0) {
+ if (_set_fd_tracker(knet_h, listen_sock, KNET_TRANSPORT_SCTP, SCTP_LISTENER_LINK_INFO, sockaddr_len(&kn_link->src_addr), info, -1) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s",
strerror(savederrno));
goto exit_error;
}
if ((check_add(knet_h, kn_link, -1,
&kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to configure default access lists: %s",
strerror(savederrno));
goto exit_error;
}
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = listen_sock;
if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_ADD, listen_sock, &ev)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add listener to epoll pool: %s",
strerror(savederrno));
goto exit_error;
}
info->on_listener_epoll = 1;
info->listen_sock = listen_sock;
qb_list_add(&info->list, &handle_info->listen_links_list);
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Listening on fd %d for %s:%s", listen_sock, kn_link->status.src_ipaddr, kn_link->status.src_port);
exit_error:
if (err) {
if ((info) && (info->on_listener_epoll)) {
epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, listen_sock, &ev);
}
if (listen_sock >= 0) {
check_rmall(knet_h, kn_link);
close(listen_sock);
}
if (info) {
free(info);
info = NULL;
}
}
errno = savederrno;
return info;
}
static int sctp_link_listener_stop(knet_handle_t knet_h, struct knet_link *kn_link)
{
int err = 0, savederrno = 0;
int found = 0, i;
struct knet_host *host;
int link_idx;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
sctp_connect_link_info_t *this_link_info = kn_link->transport_link;
sctp_listen_link_info_t *info = this_link_info->listener;
sctp_connect_link_info_t *link_info;
struct epoll_event ev;
for (host = knet_h->host_head; host != NULL; host = host->next) {
for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) {
if (&host->link[link_idx] == kn_link)
continue;
link_info = host->link[link_idx].transport_link;
if ((link_info) &&
(link_info->listener == info)) {
found = 1;
break;
}
}
}
if ((check_rm(knet_h, kn_link,
&kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != ENOENT)) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove default access lists for %d", info->listen_sock);
}
if (found) {
this_link_info->listener = NULL;
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP listener socket %d still in use", info->listen_sock);
savederrno = EBUSY;
err = -1;
goto exit_error;
}
if (info->on_listener_epoll) {
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = info->listen_sock;
if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, info->listen_sock, &ev)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove listener to epoll pool: %s",
strerror(savederrno));
goto exit_error;
}
info->on_listener_epoll = 0;
}
- if (_set_fd_tracker(knet_h, info->listen_sock, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0) {
+ if (_set_fd_tracker(knet_h, info->listen_sock, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL, -1) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s",
strerror(savederrno));
goto exit_error;
}
check_rmall(knet_h, kn_link);
close(info->listen_sock);
for (i=0; i< MAX_ACCEPTED_SOCKS; i++) {
if (info->accepted_socks[i] > -1) {
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = info->accepted_socks[i];
if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->accepted_socks[i], &ev)) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove EOFed socket from epoll pool: %s",
strerror(errno));
}
info->on_rx_epoll = 0;
free(knet_h->knet_transport_fd_tracker[info->accepted_socks[i]].data);
close(info->accepted_socks[i]);
- if (_set_fd_tracker(knet_h, info->accepted_socks[i], KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL) < 0) {
+ if (_set_fd_tracker(knet_h, info->accepted_socks[i], KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, 0, NULL, -1) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s",
strerror(savederrno));
goto exit_error;
}
info->accepted_socks[i] = -1;
}
}
qb_list_del(&info->list);
free(info);
this_link_info->listener = NULL;
exit_error:
errno = savederrno;
return err;
}
/*
* Links config/clear. Both called with global wrlock from link_set_config/clear_config
*/
int sctp_transport_link_set_config(knet_handle_t knet_h, struct knet_link *kn_link)
{
int savederrno = 0, err = 0;
sctp_connect_link_info_t *info;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
info = malloc(sizeof(sctp_connect_link_info_t));
if (!info) {
goto exit_error;
}
memset(info, 0, sizeof(sctp_connect_link_info_t));
kn_link->transport_link = info;
info->link = kn_link;
memmove(&info->dst_address, &kn_link->dst_addr, sizeof(struct sockaddr_storage));
info->connect_sock = -1;
info->listener = sctp_link_listener_start(knet_h, kn_link);
if (!info->listener) {
savederrno = errno;
err = -1;
goto exit_error;
}
if (kn_link->dynamic == KNET_LINK_STATIC) {
if (_create_connect_socket(knet_h, kn_link) < 0) {
savederrno = errno;
err = -1;
goto exit_error;
}
kn_link->outsock = info->connect_sock;
}
qb_list_add(&info->list, &handle_info->connect_links_list);
exit_error:
if (err) {
if (info) {
if (info->connect_sock >= 0) {
close(info->connect_sock);
}
if (info->listener) {
sctp_link_listener_stop(knet_h, kn_link);
}
kn_link->transport_link = NULL;
free(info);
}
}
errno = savederrno;
return err;
}
/*
* called with global wrlock
*/
int sctp_transport_link_clear_config(knet_handle_t knet_h, struct knet_link *kn_link)
{
int err = 0, savederrno = 0;
sctp_connect_link_info_t *info;
if (!kn_link) {
errno = EINVAL;
return -1;
}
info = kn_link->transport_link;
if (!info) {
errno = EINVAL;
return -1;
}
if ((sctp_link_listener_stop(knet_h, kn_link) <0) && (errno != EBUSY)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove listener transport: %s",
strerror(savederrno));
goto exit_error;
}
if (_close_connect_socket(knet_h, kn_link) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to close connected socket: %s",
strerror(savederrno));
goto exit_error;
}
qb_list_del(&info->list);
free(info);
kn_link->transport_link = NULL;
exit_error:
errno = savederrno;
return err;
}
/*
* transport_free and transport_init are
* called only from knet_handle_new and knet_handle_free.
* all resources (hosts/links) should have been already freed at this point
* and they are called in a write locked context, hence they
* don't need their own locking.
*/
int sctp_transport_free(knet_handle_t knet_h)
{
sctp_handle_info_t *handle_info;
void *thread_status;
struct epoll_event ev;
if (!knet_h->transports[KNET_TRANSPORT_SCTP]) {
errno = EINVAL;
return -1;
}
handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
/*
* keep it here while we debug list usage and such
*/
if (!qb_list_empty(&handle_info->listen_links_list)) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Internal error. listen links list is not empty");
}
if (!qb_list_empty(&handle_info->connect_links_list)) {
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Internal error. connect links list is not empty");
}
if (handle_info->listen_thread) {
pthread_cancel(handle_info->listen_thread);
pthread_join(handle_info->listen_thread, &thread_status);
}
if (handle_info->connect_thread) {
pthread_cancel(handle_info->connect_thread);
pthread_join(handle_info->connect_thread, &thread_status);
}
if (handle_info->listensockfd[0] >= 0) {
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = handle_info->listensockfd[0];
epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, handle_info->listensockfd[0], &ev);
}
if (handle_info->connectsockfd[0] >= 0) {
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = handle_info->connectsockfd[0];
epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_DEL, handle_info->connectsockfd[0], &ev);
}
_close_socketpair(knet_h, handle_info->connectsockfd);
_close_socketpair(knet_h, handle_info->listensockfd);
if (handle_info->listen_epollfd >= 0) {
close(handle_info->listen_epollfd);
}
if (handle_info->connect_epollfd >= 0) {
close(handle_info->connect_epollfd);
}
free(handle_info->event_subscribe_buffer);
free(handle_info);
knet_h->transports[KNET_TRANSPORT_SCTP] = NULL;
return 0;
}
static int _sctp_subscribe_init(knet_handle_t knet_h)
{
int test_socket, savederrno;
sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP];
char dummy_events[100];
struct sctp_event_subscribe *events;
/* Below we set the first 6 fields of this expanding struct.
* SCTP_EVENTS is deprecated, but SCTP_EVENT is not available
* on Linux; on the other hand, FreeBSD and old Linux does not
* accept small transfers, so we can't simply use this minimum
* everywhere. Thus we query and store the native size. */
const unsigned int subscribe_min = 6;
test_socket = socket(PF_INET, SOCK_STREAM, IPPROTO_SCTP);
if (test_socket < 0) {
if (errno == EPROTONOSUPPORT) {
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP not supported, skipping initialization");
return 0;
}
savederrno = errno;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create test socket: %s",
strerror(savederrno));
return savederrno;
}
handle_info->event_subscribe_kernel_size = sizeof dummy_events;
if (getsockopt(test_socket, IPPROTO_SCTP, SCTP_EVENTS, &dummy_events,
&handle_info->event_subscribe_kernel_size)) {
close(test_socket);
savederrno = errno;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to query kernel size of struct sctp_event_subscribe: %s",
strerror(savederrno));
return savederrno;
}
close(test_socket);
if (handle_info->event_subscribe_kernel_size < subscribe_min) {
savederrno = ERANGE;
log_err(knet_h, KNET_SUB_TRANSP_SCTP,
"No kernel support for the necessary notifications: struct sctp_event_subscribe is %u bytes, %u needed",
handle_info->event_subscribe_kernel_size, subscribe_min);
return savederrno;
}
events = malloc(handle_info->event_subscribe_kernel_size);
if (!events) {
savederrno = errno;
log_err(knet_h, KNET_SUB_TRANSP_SCTP,
"Failed to allocate event subscribe buffer: %s", strerror(savederrno));
return savederrno;
}
memset(events, 0, handle_info->event_subscribe_kernel_size);
events->sctp_data_io_event = 1;
events->sctp_association_event = 1;
events->sctp_address_event = 1;
events->sctp_send_failure_event = 1;
events->sctp_peer_error_event = 1;
events->sctp_shutdown_event = 1;
handle_info->event_subscribe_buffer = (char *)events;
log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Size of struct sctp_event_subscribe is %u in kernel, %zu in user space",
handle_info->event_subscribe_kernel_size, sizeof(struct sctp_event_subscribe));
return 0;
}
int sctp_transport_init(knet_handle_t knet_h)
{
int err = 0, savederrno = 0;
sctp_handle_info_t *handle_info;
struct epoll_event ev;
if (knet_h->transports[KNET_TRANSPORT_SCTP]) {
errno = EEXIST;
return -1;
}
handle_info = malloc(sizeof(sctp_handle_info_t));
if (!handle_info) {
return -1;
}
memset(handle_info, 0,sizeof(sctp_handle_info_t));
knet_h->transports[KNET_TRANSPORT_SCTP] = handle_info;
savederrno = _sctp_subscribe_init(knet_h);
if (savederrno) {
err = -1;
goto exit_fail;
}
qb_list_init(&handle_info->listen_links_list);
qb_list_init(&handle_info->connect_links_list);
handle_info->listen_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS + 1);
if (handle_info->listen_epollfd < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create epoll listen fd: %s",
strerror(savederrno));
goto exit_fail;
}
if (_fdset_cloexec(handle_info->listen_epollfd)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set CLOEXEC on listen_epollfd: %s",
strerror(savederrno));
goto exit_fail;
}
handle_info->connect_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS + 1);
if (handle_info->connect_epollfd < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create epoll connect fd: %s",
strerror(savederrno));
goto exit_fail;
}
if (_fdset_cloexec(handle_info->connect_epollfd)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set CLOEXEC on connect_epollfd: %s",
strerror(savederrno));
goto exit_fail;
}
if (_init_socketpair(knet_h, handle_info->connectsockfd) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to init connect socketpair: %s",
strerror(savederrno));
goto exit_fail;
}
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = handle_info->connectsockfd[0];
if (epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_ADD, handle_info->connectsockfd[0], &ev)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add connectsockfd[0] to connect epoll pool: %s",
strerror(savederrno));
goto exit_fail;
}
if (_init_socketpair(knet_h, handle_info->listensockfd) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to init listen socketpair: %s",
strerror(savederrno));
goto exit_fail;
}
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = handle_info->listensockfd[0];
if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_ADD, handle_info->listensockfd[0], &ev)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add listensockfd[0] to listen epoll pool: %s",
strerror(savederrno));
goto exit_fail;
}
/*
* Start connect & listener threads
*/
set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_REGISTERED);
savederrno = pthread_create(&handle_info->listen_thread, 0, _sctp_listen_thread, (void *) knet_h);
if (savederrno) {
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to start sctp listen thread: %s",
strerror(savederrno));
goto exit_fail;
}
set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_REGISTERED);
savederrno = pthread_create(&handle_info->connect_thread, 0, _sctp_connect_thread, (void *) knet_h);
if (savederrno) {
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to start sctp connect thread: %s",
strerror(savederrno));
goto exit_fail;
}
exit_fail:
if (err < 0) {
sctp_transport_free(knet_h);
}
errno = savederrno;
return err;
}
int sctp_transport_link_dyn_connect(knet_handle_t knet_h, int sockfd, struct knet_link *kn_link)
{
kn_link->outsock = sockfd;
kn_link->status.dynconnected = 1;
kn_link->transport_connected = 1;
return 0;
}
#endif
diff --git a/libknet/transport_udp.c b/libknet/transport_udp.c
index 9d1b8593..1116928c 100644
--- a/libknet/transport_udp.c
+++ b/libknet/transport_udp.c
@@ -1,482 +1,575 @@
/*
* Copyright (C) 2016-2024 Red Hat, Inc. All rights reserved.
*
* Author: Christine Caulfield <ccaulfie@redhat.com>
*
* This software licensed under LGPL-2.0+
*/
#include "config.h"
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#if defined (IP_RECVERR) || defined (IPV6_RECVERR)
#include <linux/errqueue.h>
#endif
+#ifdef IP_PKTINFO
+#include <net/if.h>
+#endif
+
#include "libknet.h"
#include "compat.h"
#include "host.h"
#include "link.h"
#include "logging.h"
#include "common.h"
#include "netutils.h"
#include "transport_common.h"
#include "transport_udp.h"
#include "transports.h"
#include "threads_common.h"
typedef struct udp_handle_info {
struct qb_list_head links_list;
} udp_handle_info_t;
typedef struct udp_link_info {
struct qb_list_head list;
struct sockaddr_storage local_address;
int socket_fd;
int on_epoll;
} udp_link_info_t;
int udp_transport_link_set_config(knet_handle_t knet_h, struct knet_link *kn_link)
{
int err = 0, savederrno = 0;
int sock = -1;
struct epoll_event ev;
udp_link_info_t *info;
udp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_UDP];
-#if defined (IP_RECVERR) || defined (IPV6_RECVERR)
- int value;
-#endif
/*
* Only allocate a new link if the local address is different
*/
qb_list_for_each_entry(info, &handle_info->links_list, list) {
if (memcmp(&info->local_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)) == 0) {
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Re-using existing UDP socket for new link");
kn_link->outsock = info->socket_fd;
kn_link->transport_link = info;
kn_link->transport_connected = 1;
return 0;
}
}
info = malloc(sizeof(udp_link_info_t));
if (!info) {
err = -1;
goto exit_error;
}
memset(info, 0, sizeof(udp_link_info_t));
sock = socket(kn_link->src_addr.ss_family, SOCK_DGRAM, 0);
if (sock < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to create listener socket: %s",
strerror(savederrno));
goto exit_error;
}
if (_configure_transport_socket(knet_h, sock, &kn_link->src_addr, kn_link->flags, "UDP") < 0) {
savederrno = errno;
err = -1;
goto exit_error;
}
#ifdef IP_RECVERR
if (kn_link->src_addr.ss_family == AF_INET) {
- value = 1;
+ int value = 1;
if (setsockopt(sock, SOL_IP, IP_RECVERR, &value, sizeof(value)) <0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set RECVERR on socket: %s",
strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IP_RECVERR enabled on socket: %i", sock);
}
#else
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IP_RECVERR not available in this build/platform");
#endif
+#ifdef IP_PKTINFO
+ if (kn_link->src_addr.ss_family == AF_INET) {
+ int value = 1;
+ if (setsockopt(sock, SOL_IP, IP_PKTINFO, &value, sizeof(value)) <0) {
+ savederrno = errno;
+ err = -1;
+ log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set PKTINFO on socket: %s",
+ strerror(savederrno));
+ goto exit_error;
+ }
+ log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IP_PKTINFO enabled on socket: %i", sock);
+ }
+#endif
+#ifdef IPV6_RECVPKTINFO
+ if (kn_link->src_addr.ss_family == AF_INET6) {
+ int value = 1;
+ if (setsockopt(sock, IPPROTO_IPV6, IPV6_RECVPKTINFO, &value, sizeof(value)) <0) {
+ savederrno = errno;
+ err = -1;
+ log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set RECVPKTINFO on socket: %s",
+ strerror(savederrno));
+ goto exit_error;
+ }
+ log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IPV6_RECVPKTINFO enabled on socket: %i", sock);
+ }
+#endif
#ifdef IPV6_RECVERR
if (kn_link->src_addr.ss_family == AF_INET6) {
- value = 1;
+ int value = 1;
if (setsockopt(sock, SOL_IPV6, IPV6_RECVERR, &value, sizeof(value)) <0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set RECVERR on socket: %s",
strerror(savederrno));
goto exit_error;
}
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IPV6_RECVERR enabled on socket: %i", sock);
}
#else
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IPV6_RECVERR not available in this build/platform");
#endif
if (bind(sock, (struct sockaddr *)&kn_link->src_addr, sockaddr_len(&kn_link->src_addr))) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to bind listener socket: %s",
strerror(savederrno));
goto exit_error;
}
-
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = sock;
if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, sock, &ev)) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to add listener to epoll pool: %s",
strerror(savederrno));
goto exit_error;
}
info->on_epoll = 1;
- if (_set_fd_tracker(knet_h, sock, KNET_TRANSPORT_UDP, 0, sockaddr_len(&kn_link->src_addr), info) < 0) {
+ if (_set_fd_tracker(knet_h, sock, KNET_TRANSPORT_UDP, 0, sockaddr_len(&kn_link->src_addr), info, -1) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set fd tracker: %s",
strerror(savederrno));
goto exit_error;
}
memmove(&info->local_address, &kn_link->src_addr, sizeof(struct sockaddr_storage));
info->socket_fd = sock;
qb_list_add(&info->list, &handle_info->links_list);
kn_link->outsock = sock;
kn_link->transport_link = info;
kn_link->transport_connected = 1;
exit_error:
if (err) {
if (info) {
if (info->on_epoll) {
epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, sock, &ev);
}
free(info);
}
if (sock >= 0) {
close(sock);
}
}
errno = savederrno;
return err;
}
int udp_transport_link_clear_config(knet_handle_t knet_h, struct knet_link *kn_link)
{
int err = 0, savederrno = 0;
int found = 0;
struct knet_host *host;
int link_idx;
udp_link_info_t *info = kn_link->transport_link;
struct epoll_event ev;
for (host = knet_h->host_head; host != NULL; host = host->next) {
for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) {
if (&host->link[link_idx] == kn_link)
continue;
if (host->link[link_idx].transport_link == info) {
found = 1;
break;
}
}
}
if (found) {
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "UDP socket %d still in use", info->socket_fd);
savederrno = EBUSY;
err = -1;
goto exit_error;
}
if (info->on_epoll) {
memset(&ev, 0, sizeof(struct epoll_event));
ev.events = EPOLLIN;
ev.data.fd = info->socket_fd;
if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->socket_fd, &ev) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to remove UDP socket from epoll poll: %s",
strerror(errno));
goto exit_error;
}
info->on_epoll = 0;
}
- if (_set_fd_tracker(knet_h, info->socket_fd, KNET_MAX_TRANSPORTS, 0, sockaddr_len(&kn_link->src_addr), NULL) < 0) {
+ if (_set_fd_tracker(knet_h, info->socket_fd, KNET_MAX_TRANSPORTS, 0, sockaddr_len(&kn_link->src_addr), NULL, -1) < 0) {
savederrno = errno;
err = -1;
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set fd tracker: %s",
strerror(savederrno));
goto exit_error;
}
close(info->socket_fd);
qb_list_del(&info->list);
free(kn_link->transport_link);
exit_error:
errno = savederrno;
return err;
}
int udp_transport_free(knet_handle_t knet_h)
{
udp_handle_info_t *handle_info;
if (!knet_h->transports[KNET_TRANSPORT_UDP]) {
errno = EINVAL;
return -1;
}
handle_info = knet_h->transports[KNET_TRANSPORT_UDP];
/*
* keep it here while we debug list usage and such
*/
if (!qb_list_empty(&handle_info->links_list)) {
log_err(knet_h, KNET_SUB_TRANSP_UDP, "Internal error. handle list is not empty");
return -1;
}
free(handle_info);
knet_h->transports[KNET_TRANSPORT_UDP] = NULL;
return 0;
}
int udp_transport_init(knet_handle_t knet_h)
{
udp_handle_info_t *handle_info;
if (knet_h->transports[KNET_TRANSPORT_UDP]) {
errno = EEXIST;
return -1;
}
handle_info = malloc(sizeof(udp_handle_info_t));
if (!handle_info) {
return -1;
}
memset(handle_info, 0, sizeof(udp_handle_info_t));
knet_h->transports[KNET_TRANSPORT_UDP] = handle_info;
qb_list_init(&handle_info->links_list);
return 0;
}
#if defined (IP_RECVERR) || defined (IPV6_RECVERR)
static int read_errs_from_sock(knet_handle_t knet_h, int sockfd)
{
int err = 0, savederrno = 0;
int got_err = 0;
char buffer[1024];
struct iovec iov;
struct msghdr msg;
struct cmsghdr *cmsg;
struct sock_extended_err *sock_err;
struct icmphdr icmph;
struct sockaddr_storage remote;
struct sockaddr_storage *origin;
char addr_str[KNET_MAX_HOST_LEN];
char port_str[KNET_MAX_PORT_LEN];
char addr_remote_str[KNET_MAX_HOST_LEN];
char port_remote_str[KNET_MAX_PORT_LEN];
iov.iov_base = &icmph;
iov.iov_len = sizeof(icmph);
msg.msg_name = (void*)&remote;
msg.msg_namelen = sizeof(remote);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_flags = 0;
msg.msg_control = buffer;
msg.msg_controllen = sizeof(buffer);
for (;;) {
err = recvmsg(sockfd, &msg, MSG_ERRQUEUE);
savederrno = errno;
if (err < 0) {
if (!got_err) {
errno = savederrno;
return -1;
} else {
return 0;
}
}
got_err = 1;
for (cmsg = CMSG_FIRSTHDR(&msg);cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (((cmsg->cmsg_level == SOL_IP) && (cmsg->cmsg_type == IP_RECVERR)) ||
((cmsg->cmsg_level == SOL_IPV6 && (cmsg->cmsg_type == IPV6_RECVERR)))) {
sock_err = (struct sock_extended_err*)(void *)CMSG_DATA(cmsg);
if (sock_err) {
switch (sock_err->ee_origin) {
case SO_EE_ORIGIN_NONE: /* no origin */
case SO_EE_ORIGIN_LOCAL: /* local source (EMSGSIZE) */
if (sock_err->ee_errno == EMSGSIZE || sock_err->ee_errno == EPERM) {
if (pthread_mutex_lock(&knet_h->kmtu_mutex) != 0) {
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Unable to get mutex lock");
knet_h->kernel_mtu = 0;
break;
} else {
knet_h->kernel_mtu = sock_err->ee_info;
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "detected kernel MTU: %u", knet_h->kernel_mtu);
pthread_mutex_unlock(&knet_h->kmtu_mutex);
}
force_pmtud_run(knet_h, KNET_SUB_TRANSP_UDP, 0, 0);
}
/*
* those errors are way too noisy
*/
break;
case SO_EE_ORIGIN_ICMP: /* ICMP */
case SO_EE_ORIGIN_ICMP6: /* ICMP6 */
origin = (struct sockaddr_storage *)(void *)SO_EE_OFFENDER(sock_err);
if (knet_addrtostr(origin, sizeof(*origin),
addr_str, KNET_MAX_HOST_LEN,
port_str, KNET_MAX_PORT_LEN) < 0) {
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from unknown source: %s", strerror(sock_err->ee_errno));
} else {
if (knet_addrtostr(&remote, sizeof(remote),
addr_remote_str, KNET_MAX_HOST_LEN,
port_remote_str, KNET_MAX_PORT_LEN) < 0) {
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from %s: %s destination unknown", addr_str, strerror(sock_err->ee_errno));
} else {
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from %s: %s %s", addr_str, strerror(sock_err->ee_errno), addr_remote_str);
if ((sock_err->ee_errno == ECONNREFUSED) || /* knet is not running on the other node */
(sock_err->ee_errno == ECONNABORTED) || /* local kernel closed the socket */
(sock_err->ee_errno == ENONET) || /* network does not exist */
(sock_err->ee_errno == ENETUNREACH) || /* network unreachable */
(sock_err->ee_errno == EHOSTUNREACH) || /* host unreachable */
(sock_err->ee_errno == EHOSTDOWN) || /* host down (from kernel/net/ipv4/icmp.c */
(sock_err->ee_errno == ENETDOWN)) { /* network down */
struct knet_host *host = NULL;
struct knet_link *kn_link = NULL;
int link_idx, found = 0;
for (host = knet_h->host_head; host != NULL; host = host->next) {
for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) {
kn_link = &host->link[link_idx];
if (kn_link->outsock == sockfd) {
if (!cmpaddr(&remote, &kn_link->dst_addr)) {
found = 1;
break;
}
}
}
if (found) {
break;
}
}
if ((host) && (kn_link) &&
(kn_link->status.connected)) {
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Setting down host %u link %i", host->host_id, kn_link->link_id);
/*
* setting transport_connected = 0 will trigger
* thread_heartbeat link_down process.
*
* the process terminates calling into transport_link_down
* below that will set transport_connected = 1
*/
kn_link->transport_connected = 0;
}
}
}
}
break;
}
} else {
log_debug(knet_h, KNET_SUB_TRANSP_UDP, "No data in MSG_ERRQUEUE");
}
}
}
}
}
#else
static int read_errs_from_sock(knet_handle_t knet_h, int sockfd)
{
return 0;
}
#endif
transport_sock_error_t udp_transport_rx_sock_error(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno)
{
if (recv_errno == EAGAIN) {
read_errs_from_sock(knet_h, sockfd);
}
return KNET_TRANSPORT_SOCK_ERROR_IGNORE;
}
transport_sock_error_t udp_transport_tx_sock_error(knet_handle_t knet_h, int sockfd, int subsys, int recv_err, int recv_errno)
{
if (recv_err < 0) {
log_trace(knet_h, KNET_SUB_TRANSP_UDP, "tx_sock_error, subsys=%s, recv_err=%d: %s", knet_log_get_subsystem_name(subsys), recv_err, strerror(recv_errno));
if ((recv_errno == EMSGSIZE) || ((recv_errno == EPERM) && ((subsys == KNET_SUB_TX) || (subsys == KNET_SUB_PMTUD)))) {
read_errs_from_sock(knet_h, sockfd);
return KNET_TRANSPORT_SOCK_ERROR_IGNORE;
}
if ((recv_errno == EINVAL) || (recv_errno == EPERM) ||
(recv_errno == ENETUNREACH) || (recv_errno == ENETDOWN) ||
(recv_errno == EHOSTUNREACH)) {
if ((recv_errno == ENETUNREACH) || (recv_errno == ENETDOWN)) {
log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Sock: %d is unreachable.", sockfd);
}
return KNET_TRANSPORT_SOCK_ERROR_INTERNAL;
}
if ((recv_errno == ENOBUFS) || (recv_errno == EAGAIN)) {
log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Sock: %d is overloaded. Slowing TX down", sockfd);
usleep(knet_h->threads_timer_res / 16);
} else {
read_errs_from_sock(knet_h, sockfd);
}
return KNET_TRANSPORT_SOCK_ERROR_RETRY;
}
return KNET_TRANSPORT_SOCK_ERROR_IGNORE;
}
+/*
+ * If the received IP addr doesn't match the destination IP
+ * then weird routing is going on.
+ */
+static int dst_addr_is_valid(knet_handle_t knet_h, int sockfd, struct msghdr *msg)
+{
+#ifdef IP_PKTINFO
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ int pkt_ifindex = -1;
+ int ifindex = knet_h->knet_transport_fd_tracker[sockfd].ifindex;
+ struct sockaddr_storage dstaddr;
+
+ if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_PKTINFO) {
+ struct in_pktinfo *pi = (void*)CMSG_DATA(cmsg);
+ struct sockaddr_in *dstaddr4 = (struct sockaddr_in *)&dstaddr;
+
+ pkt_ifindex = pi->ipi_ifindex;
+ dstaddr4->sin_family = AF_INET;
+ dstaddr4->sin_port = 0; /* unknown to PKTINFO */
+ dstaddr4->sin_addr.s_addr = pi->ipi_addr.s_addr;
+ }
+ if (cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *pi = (void*)CMSG_DATA(cmsg);
+ struct sockaddr_in6 *dstaddr6 = (struct sockaddr_in6 *)&dstaddr;
+ memset(dstaddr6, 0, sizeof(struct sockaddr_in6));
+
+ pkt_ifindex = pi->ipi6_ifindex;
+ dstaddr6->sin6_family = AF_INET6;
+ dstaddr6->sin6_port = 0; /* unknown to PKTINFO */
+ memcpy(&dstaddr6->sin6_addr, (char *)&pi->ipi6_addr, sizeof(pi->ipi6_addr));
+ }
+ if (ifindex != -1 && pkt_ifindex != -1 && ifindex != pkt_ifindex) {
+ char srcaddr_s[KNET_MAX_HOST_LEN];
+ char srcport_s[KNET_MAX_PORT_LEN];
+ char dstaddr_s[KNET_MAX_HOST_LEN];
+ char dstport_s[KNET_MAX_PORT_LEN];
+ char expected_ifname[IF_NAMESIZE];
+ char used_ifname[IF_NAMESIZE];
+
+ /* Make as detailed a message as we can */
+ if ((if_indextoname(pkt_ifindex, used_ifname) == NULL) ||
+ (if_indextoname(ifindex, expected_ifname) == NULL)) {
+ log_warn(knet_h, KNET_SUB_TRANSP_UDP, "Received packet on ifindex %d when expected ifindex %d", pkt_ifindex, ifindex);
+ } else if (knet_addrtostr(msg->msg_name, msg->msg_namelen,
+ srcaddr_s, sizeof(srcaddr_s),
+ srcport_s, sizeof(srcport_s)) != 0) {
+ log_warn(knet_h, KNET_SUB_TRANSP_UDP, "Received packet on i/f %s when expected i/f %s", used_ifname, expected_ifname);
+ } else if (knet_addrtostr((struct sockaddr_storage *)&dstaddr, sizeof(dstaddr),
+ dstaddr_s, sizeof(dstaddr_s),
+ dstport_s, sizeof(dstport_s)) != 0) {
+ log_warn(knet_h, KNET_SUB_TRANSP_UDP, "Received packet from %s on i/f %s when expected %s", srcaddr_s, used_ifname, expected_ifname);
+ } else {
+ log_warn(knet_h, KNET_SUB_TRANSP_UDP, "Received packet from %s to %s on i/f %s when expected %s", srcaddr_s, dstaddr_s, used_ifname, expected_ifname);
+ }
+ }
+ }
+#endif
+ return 0;
+}
+
+
transport_rx_isdata_t udp_transport_rx_is_data(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg)
{
if (msg->msg_len == 0)
return KNET_TRANSPORT_RX_NOT_DATA_CONTINUE;
- return KNET_TRANSPORT_RX_IS_DATA;
+ if (dst_addr_is_valid(knet_h, sockfd, &msg->msg_hdr) == 0) {
+ return KNET_TRANSPORT_RX_IS_DATA;
+ }
+
+ return KNET_TRANSPORT_RX_NOT_DATA_CONTINUE;
}
int udp_transport_link_dyn_connect(knet_handle_t knet_h, int sockfd, struct knet_link *kn_link)
{
kn_link->status.dynconnected = 1;
return 0;
}
int udp_transport_link_is_down(knet_handle_t knet_h, struct knet_link *kn_link)
{
/*
* see comments about handling ICMP error messages
*/
kn_link->transport_connected = 1;
return 0;
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 3:35 PM (20 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018851
Default Alt Text
(175 KB)

Event Timeline