diff --git a/libknet/links.c b/libknet/links.c index ef789f50..af048ebb 100644 --- a/libknet/links.c +++ b/libknet/links.c @@ -1,1572 +1,1587 @@ /* * Copyright (C) 2012-2023 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include "netutils.h" #include "internals.h" #include "logging.h" #include "links.h" #include "transports.h" #include "host.h" #include "threads_common.h" #include "links_acl.h" int _link_updown(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int enabled, unsigned int connected, unsigned int lock_stats) { struct knet_host *host = knet_h->host_index[host_id]; struct knet_link *link = &host->link[link_id]; int notify_status = link->status.connected; int savederrno = 0; if ((link->status.enabled == enabled) && (link->status.connected == connected)) return 0; if ((link->status.enabled) && (knet_h->link_status_change_notify_fn)) { if (link->status.connected != connected) { notify_status = connected; /* connection state */ } if (!enabled) { notify_status = 0; /* disable == disconnected */ } knet_h->link_status_change_notify_fn( knet_h->link_status_change_notify_fn_private_data, host_id, link_id, notify_status, host->status.remote, host->status.external); } link->status.enabled = enabled; link->status.connected = connected; _host_dstcache_update_async(knet_h, host); if ((link->status.dynconnected) && (!link->status.connected)) { link->status.dynconnected = 0; } if (!connected) { transport_link_is_down(knet_h, link); } else { /* Reset MTU in case new link can't use full line MTU */ log_info(knet_h, KNET_SUB_LINK, "Resetting MTU for link %u because host %u joined", link_id, host_id); force_pmtud_run(knet_h, KNET_SUB_LINK, 1, 1); } if (lock_stats) { savederrno = pthread_mutex_lock(&link->link_stats_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get stats mutex lock for host %u link %u: %s", host_id, link_id, strerror(savederrno)); errno = savederrno; return -1; } } if (connected) { time(&link->status.stats.last_up_times[link->status.stats.last_up_time_index]); link->status.stats.up_count++; if (++link->status.stats.last_up_time_index >= MAX_LINK_EVENTS) { link->status.stats.last_up_time_index = 0; } } else { time(&link->status.stats.last_down_times[link->status.stats.last_down_time_index]); link->status.stats.down_count++; if (++link->status.stats.last_down_time_index >= MAX_LINK_EVENTS) { link->status.stats.last_down_time_index = 0; } } if (lock_stats) { pthread_mutex_unlock(&link->link_stats_mutex); } return 0; } void _link_clear_stats(knet_handle_t knet_h) { struct knet_host *host; struct knet_link *link; uint32_t host_id; uint8_t link_id; for (host_id = 0; host_id < KNET_MAX_HOST; host_id++) { host = knet_h->host_index[host_id]; if (!host) { continue; } for (link_id = 0; link_id < KNET_MAX_LINK; link_id++) { link = &host->link[link_id]; memset(&link->status.stats, 0, sizeof(struct knet_link_stats)); } } } int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t transport, struct sockaddr_storage *src_addr, struct sockaddr_storage *dst_addr, uint64_t flags) { int savederrno = 0, err = 0, i, wipelink = 0, link_idx; struct knet_host *host, *tmp_host; struct knet_link *link = NULL; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!src_addr) { errno = EINVAL; return -1; } if (dst_addr && (src_addr->ss_family != dst_addr->ss_family)) { log_err(knet_h, KNET_SUB_LINK, "Source address family does not match destination address family"); errno = EINVAL; return -1; } if (transport >= KNET_MAX_TRANSPORTS) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (transport == KNET_TRANSPORT_LOOPBACK && knet_h->host_id != host_id) { log_err(knet_h, KNET_SUB_LINK, "Cannot create loopback link to remote node"); err = -1; savederrno = EINVAL; goto exit_unlock; } if (knet_h->host_id == host_id && knet_h->has_loop_link) { log_err(knet_h, KNET_SUB_LINK, "Cannot create more than 1 link when loopback is active"); err = -1; savederrno = EINVAL; goto exit_unlock; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } if (transport == KNET_TRANSPORT_LOOPBACK && knet_h->host_id == host_id) { for (i=0; ilink[i].configured) { log_err(knet_h, KNET_SUB_LINK, "Cannot add loopback link when other links are already configured."); err = -1; savederrno = EINVAL; goto exit_unlock; } } } link = &host->link[link_id]; if (link->configured != 0) { err =-1; savederrno = EBUSY; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->status.enabled != 0) { err =-1; savederrno = EBUSY; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently in use: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } /* * errors happening after this point should trigger * a memset of the link */ wipelink = 1; copy_sockaddr(&link->src_addr, src_addr); err = knet_addrtostr(src_addr, sizeof(struct sockaddr_storage), link->status.src_ipaddr, KNET_MAX_HOST_LEN, link->status.src_port, KNET_MAX_PORT_LEN); if (err) { if (err == EAI_SYSTEM) { savederrno = errno; log_warn(knet_h, KNET_SUB_LINK, "Unable to resolve host: %u link: %u source addr/port: %s", host_id, link_id, strerror(savederrno)); } else { savederrno = EINVAL; log_warn(knet_h, KNET_SUB_LINK, "Unable to resolve host: %u link: %u source addr/port: %s", host_id, link_id, gai_strerror(err)); } err = -1; goto exit_unlock; } if (!dst_addr) { link->dynamic = KNET_LINK_DYNIP; } else { link->dynamic = KNET_LINK_STATIC; copy_sockaddr(&link->dst_addr, dst_addr); err = knet_addrtostr(dst_addr, sizeof(struct sockaddr_storage), link->status.dst_ipaddr, KNET_MAX_HOST_LEN, link->status.dst_port, KNET_MAX_PORT_LEN); if (err) { if (err == EAI_SYSTEM) { savederrno = errno; log_warn(knet_h, KNET_SUB_LINK, "Unable to resolve host: %u link: %u destination addr/port: %s", host_id, link_id, strerror(savederrno)); } else { savederrno = EINVAL; log_warn(knet_h, KNET_SUB_LINK, "Unable to resolve host: %u link: %u destination addr/port: %s", host_id, link_id, gai_strerror(err)); } err = -1; goto exit_unlock; } } link->pmtud_crypto_timeout_multiplier = KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN; link->pong_count = KNET_LINK_DEFAULT_PONG_COUNT; link->has_valid_mtu = 0; link->ping_interval = KNET_LINK_DEFAULT_PING_INTERVAL * 1000; /* microseconds */ link->pong_timeout = KNET_LINK_DEFAULT_PING_TIMEOUT * 1000; /* microseconds */ link->pong_timeout_backoff = KNET_LINK_PONG_TIMEOUT_BACKOFF; link->pong_timeout_adj = link->pong_timeout * link->pong_timeout_backoff; /* microseconds */ link->latency_max_samples = KNET_LINK_DEFAULT_PING_PRECISION; link->status.stats.latency_samples = 0; link->flags = flags; /* * check for DYNIP vs STATIC collisions. * example: link0 is static, user attempts to configure link1 as dynamic with the same source * address/port. * This configuration is invalid and would cause ACL collisions. */ for (tmp_host = knet_h->host_head; tmp_host != NULL; tmp_host = tmp_host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if (&tmp_host->link[link_idx] == link) continue; if ((!memcmp(&tmp_host->link[link_idx].src_addr, &link->src_addr, sizeof(struct sockaddr_storage))) && (tmp_host->link[link_idx].dynamic != link->dynamic)) { savederrno = EINVAL; err = -1; log_err(knet_h, KNET_SUB_LINK, "Failed to configure host %u link %u dyn %u. Conflicts with host %u link %u dyn %u: %s", host_id, link_id, link->dynamic, tmp_host->host_id, link_idx, tmp_host->link[link_idx].dynamic, strerror(savederrno)); goto exit_unlock; } } } savederrno = pthread_mutex_init(&link->link_stats_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to initialize link stats mutex: %s", strerror(savederrno)); err = -1; goto exit_unlock; } if (transport_link_set_config(knet_h, link, transport) < 0) { savederrno = errno; err = -1; goto exit_transport_err; } /* * we can only configure default access lists if we know both endpoints * and the protocol uses GENERIC_ACL, otherwise the protocol has * to setup their own access lists above in transport_link_set_config. */ if ((transport_get_acl_type(knet_h, transport) == USE_GENERIC_ACL) && (link->dynamic == KNET_LINK_STATIC)) { log_debug(knet_h, KNET_SUB_LINK, "Configuring default access lists for host: %u link: %u socket: %d", host_id, link_id, link->outsock); if ((check_add(knet_h, link, -1, &link->dst_addr, &link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) { log_warn(knet_h, KNET_SUB_LINK, "Failed to configure default access lists for host: %u link: %u", host_id, link_id); savederrno = errno; err = -1; goto exit_acl_error; } } /* * no errors should happen after link is configured */ link->configured = 1; log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u is configured", host_id, link_id); if (transport == KNET_TRANSPORT_LOOPBACK) { knet_h->has_loop_link = 1; knet_h->loop_link = link_id; host->status.reachable = 1; link->status.mtu = KNET_PMTUD_SIZE_V6; } else { /* * calculate the minimum MTU that is safe to use, * based on RFCs and that each network device should * be able to support without any troubles */ if (link->dynamic == KNET_LINK_STATIC) { /* * with static link we can be more precise than using * the generic calc_min_mtu() */ switch (link->dst_addr.ss_family) { case AF_INET6: link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V6 - (KNET_PMTUD_OVERHEAD_V6 + link->proto_overhead)); break; case AF_INET: link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V4 + link->proto_overhead)); break; } } else { /* * for dynamic links we start with the minimum MTU * possible and PMTUd will kick in immediately * after connection status is 1 */ link->status.mtu = calc_min_mtu(knet_h); } link->has_valid_mtu = 1; } exit_acl_error: /* * if creating access lists has error, we only need to clean * the transport and the stuff below. */ if (err < 0) { if ((transport_link_clear_config(knet_h, link) < 0) && (errno != EBUSY)) { log_warn(knet_h, KNET_SUB_LINK, "Failed to deconfigure transport for host %u link %u: %s", host_id, link_id, strerror(errno)); } } exit_transport_err: /* * if transport has errors, transport will clean after itself * and we only need to clean the mutex */ if (err < 0) { pthread_mutex_destroy(&link->link_stats_mutex); } exit_unlock: /* * re-init the link on error */ if ((err < 0) && (wipelink)) { memset(link, 0, sizeof(struct knet_link)); link->link_id = link_id; } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *transport, struct sockaddr_storage *src_addr, struct sockaddr_storage *dst_addr, uint8_t *dynamic, uint64_t *flags) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!src_addr) { errno = EINVAL; return -1; } if (!dynamic) { errno = EINVAL; return -1; } if (!transport) { errno = EINVAL; return -1; } if (!flags) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if ((link->dynamic == KNET_LINK_STATIC) && (!dst_addr)) { savederrno = EINVAL; err = -1; goto exit_unlock; } memmove(src_addr, &link->src_addr, sockaddr_len(&link->src_addr)); *transport = link->transport; *flags = link->flags; if (link->dynamic == KNET_LINK_STATIC) { *dynamic = 0; memmove(dst_addr, &link->dst_addr, sockaddr_len(&link->dst_addr)); } else { *dynamic = 1; } exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_clear_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; int sock; uint8_t transport; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (link->configured != 1) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->status.enabled != 0) { err = -1; savederrno = EBUSY; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently in use: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } /* * remove well known access lists here. * After the transport has done clearing the config, * then we can remove any leftover access lists if the link * is no longer in use. */ if ((transport_get_acl_type(knet_h, link->transport) == USE_GENERIC_ACL) && (link->dynamic == KNET_LINK_STATIC)) { if ((check_rm(knet_h, link, &link->dst_addr, &link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != ENOENT)) { err = -1; savederrno = errno; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u: unable to remove default access list", host_id, link_id); goto exit_unlock; } } /* * cache it for later as we don't know if the transport * will clear link info during clear_config. */ sock = link->outsock; transport = link->transport; if ((transport_link_clear_config(knet_h, link) < 0) && (errno != EBUSY)) { savederrno = errno; err = -1; goto exit_unlock; } /* * remove any other access lists when the socket is no * longer in use by the transport. */ if ((transport_get_acl_type(knet_h, transport) == USE_GENERIC_ACL) && (knet_h->knet_transport_fd_tracker[sock].transport == KNET_MAX_TRANSPORTS)) { check_rmall(knet_h, link); } pthread_mutex_destroy(&link->link_stats_mutex); memset(link, 0, sizeof(struct knet_link)); link->link_id = link_id; if (knet_h->has_loop_link && host_id == knet_h->host_id && link_id == knet_h->loop_link) { knet_h->has_loop_link = 0; if (host->active_link_entries == 0) { host->status.reachable = 0; } } log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u config has been wiped", host_id, link_id); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_set_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int enabled) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (enabled > 1) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->status.enabled == enabled) { err = 0; goto exit_unlock; } err = _link_updown(knet_h, host_id, link_id, enabled, link->status.connected, 0); savederrno = errno; if (enabled) { goto exit_unlock; } log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u is disabled", host_id, link_id); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int *enabled) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!enabled) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } *enabled = link->status.enabled; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_set_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t pong_count) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (pong_count < 1) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } link->pong_count = pong_count; log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u pong count update: %u", host_id, link_id, link->pong_count); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *pong_count) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!pong_count) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } *pong_count = link->pong_count; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_set_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, time_t interval, time_t timeout, unsigned int precision) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!interval) { errno = EINVAL; return -1; } if (!timeout) { errno = ENOSYS; return -1; } if (!precision) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } + if ((interval * 1000) < knet_h->threads_timer_res) { + log_warn(knet_h, KNET_SUB_LINK, + "host: %u link: %u interval: %zu too small (%s). interval lower than thread_timer_res (%u ms) has no effect", + host_id, link_id, interval, strerror(savederrno), (knet_h->threads_timer_res / 1000)); + } + + if ((timeout * 1000) < knet_h->threads_timer_res) { + err = -1; + savederrno = EINVAL; + log_err(knet_h, KNET_SUB_LINK, + "host: %u link: %u pong timeout: %zu too small (%s). timeout cannot be less than thread_timer_res (%u ms)", + host_id, link_id, timeout, strerror(savederrno), (knet_h->threads_timer_res / 1000)); + goto exit_unlock; + } + link->ping_interval = interval * 1000; /* microseconds */ link->pong_timeout = timeout * 1000; /* microseconds */ link->latency_max_samples = precision; log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u timeout update - interval: %llu timeout: %llu precision: %u", host_id, link_id, link->ping_interval, link->pong_timeout, precision); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, time_t *interval, time_t *timeout, unsigned int *precision) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!interval) { errno = EINVAL; return -1; } if (!timeout) { errno = EINVAL; return -1; } if (!precision) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } *interval = link->ping_interval / 1000; /* microseconds */ *timeout = link->pong_timeout / 1000; *precision = link->latency_max_samples; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_set_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t priority) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; uint8_t old_priority; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } old_priority = link->priority; if (link->priority == priority) { err = 0; goto exit_unlock; } link->priority = priority; if (_host_dstcache_update_sync(knet_h, host)) { savederrno = errno; log_debug(knet_h, KNET_SUB_LINK, "Unable to update link priority (host: %u link: %u priority: %u): %s", host_id, link_id, link->priority, strerror(savederrno)); link->priority = old_priority; err = -1; goto exit_unlock; } log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u priority set to: %u", host_id, link_id, link->priority); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *priority) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!priority) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } *priority = link->priority; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_link_list(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t *link_ids, size_t *link_ids_entries) { int savederrno = 0, err = 0, i, count = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (!link_ids) { errno = EINVAL; return -1; } if (!link_ids_entries) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } for (i = 0; i < KNET_MAX_LINK; i++) { link = &host->link[i]; if (!link->configured) { continue; } link_ids[count] = i; count++; } *link_ids_entries = count; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_status(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct knet_link_status *status, size_t struct_size) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!status) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } savederrno = pthread_mutex_lock(&link->link_stats_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get stats mutex lock for host %u link %u: %s", host_id, link_id, strerror(savederrno)); err = -1; goto exit_unlock; } memmove(status, &link->status, struct_size); pthread_mutex_unlock(&link->link_stats_mutex); /* Calculate totals - no point in doing this on-the-fly */ status->stats.rx_total_packets = status->stats.rx_data_packets + status->stats.rx_ping_packets + status->stats.rx_pong_packets + status->stats.rx_pmtu_packets; status->stats.tx_total_packets = status->stats.tx_data_packets + status->stats.tx_ping_packets + status->stats.tx_pong_packets + status->stats.tx_pmtu_packets; status->stats.rx_total_bytes = status->stats.rx_data_bytes + status->stats.rx_ping_bytes + status->stats.rx_pong_bytes + status->stats.rx_pmtu_bytes; status->stats.tx_total_bytes = status->stats.tx_data_bytes + status->stats.tx_ping_bytes + status->stats.tx_pong_bytes + status->stats.tx_pmtu_bytes; status->stats.tx_total_errors = status->stats.tx_data_errors + status->stats.tx_ping_errors + status->stats.tx_pong_errors + status->stats.tx_pmtu_errors; status->stats.tx_total_retries = status->stats.tx_data_retries + status->stats.tx_ping_retries + status->stats.tx_pong_retries + status->stats.tx_pmtu_retries; /* Tell the caller our full size in case they have an old version */ status->size = sizeof(struct knet_link_status); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_enable_status_change_notify(knet_handle_t knet_h, void *link_status_change_notify_fn_private_data, void (*link_status_change_notify_fn) ( void *private_data, knet_node_id_t host_id, uint8_t link_id, uint8_t connected, uint8_t remote, uint8_t external)) { int savederrno = 0; if (!_is_valid_handle(knet_h)) { return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->link_status_change_notify_fn_private_data = link_status_change_notify_fn_private_data; knet_h->link_status_change_notify_fn = link_status_change_notify_fn; if (knet_h->link_status_change_notify_fn) { log_debug(knet_h, KNET_SUB_LINK, "link_status_change_notify_fn enabled"); } else { log_debug(knet_h, KNET_SUB_LINK, "link_status_change_notify_fn disabled"); } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_link_insert_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, int index, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (!ss1) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (type != CHECK_TYPE_MASK) && (type != CHECK_TYPE_RANGE)) { errno = EINVAL; return -1; } if ((acceptreject != CHECK_ACCEPT) && (acceptreject != CHECK_REJECT)) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (!ss2)) { errno = EINVAL; return -1; } if ((type == CHECK_TYPE_RANGE) && (ss1->ss_family != ss2->ss_family)) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->dynamic != KNET_LINK_DYNIP) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } err = check_add(knet_h, link, index, ss1, ss2, type, acceptreject); savederrno = errno; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return err; } int knet_link_add_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { return knet_link_insert_acl(knet_h, host_id, link_id, -1, ss1, ss2, type, acceptreject); } int knet_link_rm_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (!ss1) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (type != CHECK_TYPE_MASK) && (type != CHECK_TYPE_RANGE)) { errno = EINVAL; return -1; } if ((acceptreject != CHECK_ACCEPT) && (acceptreject != CHECK_REJECT)) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (!ss2)) { errno = EINVAL; return -1; } if ((type == CHECK_TYPE_RANGE) && (ss1->ss_family != ss2->ss_family)) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->dynamic != KNET_LINK_DYNIP) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } err = check_rm(knet_h, link, ss1, ss2, type, acceptreject); savederrno = errno; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return err; } int knet_link_clear_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!_is_valid_handle(knet_h)) { return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->dynamic != KNET_LINK_DYNIP) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } check_rmall(knet_h, link); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return err; } diff --git a/libknet/tests/api_knet_handle_set_threads_timer_res.c b/libknet/tests/api_knet_handle_set_threads_timer_res.c index 09116e33..253c34ff 100644 --- a/libknet/tests/api_knet_handle_set_threads_timer_res.c +++ b/libknet/tests/api_knet_handle_set_threads_timer_res.c @@ -1,55 +1,74 @@ /* * Copyright (C) 2016-2023 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "libknet.h" #include "internals.h" #include "test-common.h" static void test(void) { knet_handle_t knet_h1, knet_h[2]; int res; int logfds[2]; + struct sockaddr_storage src, dst; + + if (make_local_sockaddr(&src, 0) < 0) { + printf("Unable to convert src to sockaddr: %s\n", strerror(errno)); + exit(FAIL); + } + + if (make_local_sockaddr(&dst, 1) < 0) { + printf("Unable to convert dst to sockaddr: %s\n", strerror(errno)); + exit(FAIL); + } printf("Test knet_handle_set_threads_timer_res incorrect knet_h\n"); if ((!knet_handle_set_threads_timer_res(NULL, 0)) || (errno != EINVAL)) { printf("knet_handle_set_threads_timer_res accepted invalid knet_h or returned incorrect error: %s\n", strerror(errno)); exit(FAIL); } setup_logpipes(logfds); knet_h1 = knet_handle_start(logfds, KNET_LOG_DEBUG, knet_h); printf("Test knet_handle_set_threads_timer_res with invalid timeres\n"); FAIL_ON_SUCCESS(knet_handle_set_threads_timer_res(knet_h1, 999), EINVAL); + printf("Configuring host and link"); + FAIL_ON_ERR(knet_host_add(knet_h1, 1)); + FAIL_ON_ERR(knet_link_set_config(knet_h1, 1, 0, KNET_TRANSPORT_UDP, &src, &dst, 0)); + FAIL_ON_ERR(knet_link_set_ping_timers(knet_h1, 1, 0, 1000, 300, 2048)); + + printf("Test knet_handle_set_threads_timer_res with too high timeres\n"); + FAIL_ON_SUCCESS(knet_handle_set_threads_timer_res(knet_h1, 300001), EINVAL); + printf("Test knet_handle_set_threads_timer_res with valid timeres\n"); - FAIL_ON_ERR(knet_handle_set_threads_timer_res(knet_h1, 2000)); - if (knet_h1->threads_timer_res != 2000) { + FAIL_ON_ERR(knet_handle_set_threads_timer_res(knet_h1, 20000)); + if (knet_h1->threads_timer_res != 20000) { printf("knet_handle_set_threads_timer_res did not set timeres to correct value: %s\n", strerror(errno)); CLEAN_EXIT(FAIL); } CLEAN_EXIT(CONTINUE); } int main(int argc, char *argv[]) { test(); return PASS; } diff --git a/libknet/tests/api_knet_link_set_ping_timers.c b/libknet/tests/api_knet_link_set_ping_timers.c index 89852cb1..d83e2588 100644 --- a/libknet/tests/api_knet_link_set_ping_timers.c +++ b/libknet/tests/api_knet_link_set_ping_timers.c @@ -1,88 +1,93 @@ /* * Copyright (C) 2016-2023 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "libknet.h" #include "internals.h" #include "link.h" #include "netutils.h" #include "test-common.h" static void test(void) { knet_handle_t knet_h1, knet_h[2]; int res; int logfds[2]; struct sockaddr_storage src, dst; if (make_local_sockaddr(&src, 0) < 0) { printf("Unable to convert src to sockaddr: %s\n", strerror(errno)); exit(FAIL); } if (make_local_sockaddr(&dst, 1) < 0) { printf("Unable to convert dst to sockaddr: %s\n", strerror(errno)); exit(FAIL); } printf("Test knet_link_set_ping_timers incorrect knet_h\n"); if ((!knet_link_set_ping_timers(NULL, 1, 0, 1000, 2000, 2048)) || (errno != EINVAL)) { printf("knet_link_set_ping_timers accepted invalid knet_h or returned incorrect error: %s\n", strerror(errno)); exit(FAIL); } setup_logpipes(logfds); knet_h1 = knet_handle_start(logfds, KNET_LOG_DEBUG, knet_h); printf("Test knet_link_set_ping_timers with unconfigured host_id\n"); FAIL_ON_SUCCESS(knet_link_set_ping_timers(knet_h1, 1, 0, 1000, 2000, 2048), EINVAL); printf("Test knet_link_set_ping_timers with incorrect linkid\n"); FAIL_ON_ERR(knet_host_add(knet_h1, 1)); FAIL_ON_SUCCESS(knet_link_set_ping_timers(knet_h1, 1, KNET_MAX_LINK, 1000, 2000, 2048), EINVAL); printf("Test knet_link_set_ping_timers with incorrect interval\n"); FAIL_ON_SUCCESS(knet_link_set_ping_timers(knet_h1, 1, 0, 0, 2000, 2048), EINVAL); printf("Test knet_link_set_ping_timers with 0 timeout\n"); FAIL_ON_SUCCESS(knet_link_set_ping_timers(knet_h1, 1, 0, 1000, 0, 2048), ENOSYS); printf("Test knet_link_set_ping_timers with incorrect interval\n"); FAIL_ON_SUCCESS(knet_link_set_ping_timers(knet_h1, 1, 0, 1000, 2000, 0), EINVAL); printf("Test knet_link_set_ping_timers with unconfigured link\n"); FAIL_ON_SUCCESS(knet_link_set_ping_timers(knet_h1, 1, 0, 1000, 2000, 2048), EINVAL); - printf("Test knet_link_set_ping_timers with correct values\n"); + printf("Configure link"); FAIL_ON_ERR(knet_link_set_config(knet_h1, 1, 0, KNET_TRANSPORT_UDP, &src, &dst, 0)); + + printf("Test knet_link_set_ping_timers with too small timeout\n"); + FAIL_ON_SUCCESS(knet_link_set_ping_timers(knet_h1, 1, 0, 1000, (knet_h1->threads_timer_res / 2000), 2048), EINVAL); + + printf("Test knet_link_set_ping_timers with correct values\n"); FAIL_ON_ERR(knet_link_set_ping_timers(knet_h1, 1, 0, 1000, 2000, 2048)); if ((knet_h1->host_index[1]->link[0].ping_interval != 1000000) || (knet_h1->host_index[1]->link[0].pong_timeout != 2000000) || (knet_h1->host_index[1]->link[0].latency_max_samples != 2048)) { printf("knet_link_set_ping_timers failed to set values\n"); CLEAN_EXIT(FAIL); } CLEAN_EXIT(CONTINUE); } int main(int argc, char *argv[]) { test(); return PASS; } diff --git a/libknet/threads_common.c b/libknet/threads_common.c index bde67767..7cd6d7f9 100644 --- a/libknet/threads_common.c +++ b/libknet/threads_common.c @@ -1,353 +1,383 @@ /* * Copyright (C) 2016-2023 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include "internals.h" #include "logging.h" #include "threads_common.h" int shutdown_in_progress(knet_handle_t knet_h) { int savederrno = 0; int ret; savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_COMMON, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } ret = knet_h->fini_in_progress; pthread_rwlock_unlock(&knet_h->global_rwlock); return ret; } static int _pmtud_reschedule(knet_handle_t knet_h) { if (knet_h->pmtud_running) { knet_h->pmtud_abort = 1; if (knet_h->pmtud_waiting) { pthread_cond_signal(&knet_h->pmtud_cond); } } return 0; } static int pmtud_reschedule(knet_handle_t knet_h) { int res; if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); return -1; } res = _pmtud_reschedule(knet_h); pthread_mutex_unlock(&knet_h->pmtud_mutex); return res; } int get_global_wrlock(knet_handle_t knet_h) { if (pmtud_reschedule(knet_h) < 0) { log_info(knet_h, KNET_SUB_PMTUD, "Unable to notify PMTUd to reschedule. Expect delays in executing API calls"); } return pthread_rwlock_wrlock(&knet_h->global_rwlock); } static struct pretty_names thread_names[KNET_THREAD_MAX] = { { "TX", KNET_THREAD_TX }, { "RX", KNET_THREAD_RX }, { "HB", KNET_THREAD_HB }, { "PMTUD", KNET_THREAD_PMTUD }, #ifdef HAVE_NETINET_SCTP_H { "SCTP_LISTEN", KNET_THREAD_SCTP_LISTEN }, { "SCTP_CONN", KNET_THREAD_SCTP_CONN }, #endif { "DST_LINK", KNET_THREAD_DST_LINK } }; static struct pretty_names thread_status[] = { { "unregistered", KNET_THREAD_UNREGISTERED }, { "registered", KNET_THREAD_REGISTERED }, { "started", KNET_THREAD_STARTED }, { "stopped", KNET_THREAD_STOPPED } }; static const char *get_thread_status_name(uint8_t status) { unsigned int i; for (i = 0; i < KNET_THREAD_STATUS_MAX; i++) { if (thread_status[i].val == status) { return thread_status[i].name; } } return "unknown"; } static const char *get_thread_name(uint8_t thread_id) { unsigned int i; for (i = 0; i < KNET_THREAD_MAX; i++) { if (thread_names[i].val == thread_id) { return thread_names[i].name; } } return "unknown"; } int get_thread_flush_queue(knet_handle_t knet_h, uint8_t thread_id) { uint8_t flush; if (pthread_mutex_lock(&knet_h->threads_status_mutex) != 0) { log_debug(knet_h, KNET_SUB_HANDLE, "Unable to get mutex lock"); return -1; } flush = knet_h->threads_flush_queue[thread_id]; pthread_mutex_unlock(&knet_h->threads_status_mutex); return flush; } int set_thread_flush_queue(knet_handle_t knet_h, uint8_t thread_id, uint8_t status) { if (pthread_mutex_lock(&knet_h->threads_status_mutex) != 0) { log_debug(knet_h, KNET_SUB_HANDLE, "Unable to get mutex lock"); return -1; } knet_h->threads_flush_queue[thread_id] = status; log_debug(knet_h, KNET_SUB_HANDLE, "Updated flush queue request for thread %s to %u", get_thread_name(thread_id), status); pthread_mutex_unlock(&knet_h->threads_status_mutex); return 0; } int wait_all_threads_flush_queue(knet_handle_t knet_h) { uint8_t i = 0, found = 0; while (!found) { usleep(knet_h->threads_timer_res); if (pthread_mutex_lock(&knet_h->threads_status_mutex) != 0) { continue; } found = 1; for (i = 0; i < KNET_THREAD_MAX; i++) { if (knet_h->threads_flush_queue[i] == KNET_THREAD_QUEUE_FLUSHED) { continue; } log_debug(knet_h, KNET_SUB_HANDLE, "Checking thread: %s queue: %u", get_thread_name(i), knet_h->threads_flush_queue[i]); if (knet_h->threads_flush_queue[i] != KNET_THREAD_QUEUE_FLUSHED) { found = 0; } } pthread_mutex_unlock(&knet_h->threads_status_mutex); } return 0; } int set_thread_status(knet_handle_t knet_h, uint8_t thread_id, uint8_t status) { if (pthread_mutex_lock(&knet_h->threads_status_mutex) != 0) { log_debug(knet_h, KNET_SUB_HANDLE, "Unable to get mutex lock"); return -1; } knet_h->threads_status[thread_id] = status; log_debug(knet_h, KNET_SUB_HANDLE, "Updated status for thread %s to %s", get_thread_name(thread_id), get_thread_status_name(status)); pthread_mutex_unlock(&knet_h->threads_status_mutex); return 0; } int wait_all_threads_status(knet_handle_t knet_h, uint8_t status) { uint8_t i = 0, found = 0; while (!found) { usleep(knet_h->threads_timer_res); if (pthread_mutex_lock(&knet_h->threads_status_mutex) != 0) { continue; } found = 1; for (i = 0; i < KNET_THREAD_MAX; i++) { if (knet_h->threads_status[i] == KNET_THREAD_UNREGISTERED) { continue; } log_debug(knet_h, KNET_SUB_HANDLE, "Checking thread: %s status: %s req: %s", get_thread_name(i), get_thread_status_name(knet_h->threads_status[i]), get_thread_status_name(status)); if (knet_h->threads_status[i] != status) { found = 0; } } pthread_mutex_unlock(&knet_h->threads_status_mutex); } return 0; } void force_pmtud_run(knet_handle_t knet_h, uint8_t subsystem, uint8_t reset_mtu, uint8_t force_restart) { if (reset_mtu) { log_debug(knet_h, subsystem, "PMTUd has been reset to default"); knet_h->data_mtu = calc_min_mtu(knet_h); if (knet_h->pmtud_notify_fn) { knet_h->pmtud_notify_fn(knet_h->pmtud_notify_fn_private_data, knet_h->data_mtu); } } /* * we can only try to take a lock here. This part of the code * can be invoked by any thread, including PMTUd that is already * holding a lock at that stage. * If PMTUd is holding the lock, most likely it is already running * and we don't need to notify it back. */ if (!pthread_mutex_trylock(&knet_h->pmtud_mutex)) { if (!knet_h->pmtud_running) { if (!knet_h->pmtud_forcerun) { log_debug(knet_h, subsystem, "Notifying PMTUd to rerun"); knet_h->pmtud_forcerun = 1; } } else { if (force_restart) { if (_pmtud_reschedule(knet_h) < 0) { log_info(knet_h, KNET_SUB_PMTUD, "Unable to notify PMTUd to reschedule. A joining node may struggle to connect properly"); } } } pthread_mutex_unlock(&knet_h->pmtud_mutex); } } int knet_handle_set_threads_timer_res(knet_handle_t knet_h, useconds_t timeres) { - int savederrno = 0; + int savederrno = 0, err = 0; + struct knet_host *host; + int link_idx; + int found = 0; if (!_is_valid_handle(knet_h)) { return -1; } /* * most threads use timeres / 1000 as timeout on epoll. * anything below 1000 would generate a result of 0, making * the threads spin at 100% cpu */ if ((timeres > 0) && (timeres < 1000)) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (timeres) { + if (timeres > knet_h->threads_timer_res) { + for (host = knet_h->host_head; host != NULL; host = host->next) { + for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { + if (!host->link[link_idx].configured) { + continue; + } + if (host->link[link_idx].ping_interval < timeres) { + log_warn(knet_h, KNET_SUB_HANDLE, + "Requested new threads timer resolution %u is higher than detected link ping interval on host: %u link: %u interval: %llu (ns).", + timeres, host->host_id, link_idx, host->link[link_idx].ping_interval); + } + if (host->link[link_idx].pong_timeout < timeres) { + log_err(knet_h, KNET_SUB_HANDLE, + "Requested new threads timer resolution %u is higher than detected link pong time on host: %u link: %u timeout: %llu (ns) and will cause network instability", + timeres, host->host_id, link_idx, host->link[link_idx].pong_timeout); + found = 1; + } + } + } + if (found) { + err = -1; + savederrno = EINVAL; + goto exit_unlock; + } + } knet_h->threads_timer_res = timeres; log_debug(knet_h, KNET_SUB_HANDLE, "Setting new threads timer resolution to %u usecs", knet_h->threads_timer_res); } else { knet_h->threads_timer_res = KNET_THREADS_TIMER_RES; log_debug(knet_h, KNET_SUB_HANDLE, "Setting new threads timer resolution to default %u usecs", knet_h->threads_timer_res); } +exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); - return 0; + errno = err ? savederrno : 0; + return err; } int knet_handle_get_threads_timer_res(knet_handle_t knet_h, useconds_t *timeres) { int savederrno = 0; if (!_is_valid_handle(knet_h)) { return -1; } if (!timeres) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } *timeres = knet_h->threads_timer_res; pthread_rwlock_unlock(&knet_h->global_rwlock); return 0; } uint32_t compute_chksum(const unsigned char *data, uint32_t data_len) { uLong crc; crc = crc32(0, NULL, 0); crc = crc32(crc, (Bytef*)data, data_len); return crc; } uint32_t compute_chksumv(const struct iovec *iov_in, int iovcnt_in) { uLong crc; int i; crc = crc32(0, NULL, 0); for (i = 0; i < iovcnt_in; i++) { crc = crc32(crc, (Bytef*)iov_in[i].iov_base, iov_in[i].iov_len); } return crc; } diff --git a/libknet/threads_rx.c b/libknet/threads_rx.c index 63e9d65f..662d90dd 100644 --- a/libknet/threads_rx.c +++ b/libknet/threads_rx.c @@ -1,1228 +1,1241 @@ /* * Copyright (C) 2012-2023 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include "compat.h" #include "compress.h" #include "crypto.h" #include "host.h" #include "links.h" #include "links_acl.h" #include "logging.h" #include "transports.h" #include "transport_common.h" #include "threads_common.h" #include "threads_heartbeat.h" #include "threads_pmtud.h" #include "threads_rx.h" #include "netutils.h" #include "onwire_v1.h" /* * RECV */ /* * return 1 if a > b * return -1 if b > a * return 0 if they are equal */ static inline int _timecmp(struct timespec a, struct timespec b) { if (a.tv_sec != b.tv_sec) { if (a.tv_sec > b.tv_sec) { return 1; } else { return -1; } } else { if (a.tv_nsec > b.tv_nsec) { return 1; } else if (a.tv_nsec < b.tv_nsec) { return -1; } else { return 0; } } } /* * calculate use % of defrag buffers per host * and if % is <= knet_h->defrag_bufs_shrink_threshold for the last second, then half the size */ static void _shrink_defrag_buffers(knet_handle_t knet_h) { struct knet_host *host; struct knet_host_defrag_buf *new_bufs = NULL; struct timespec now; unsigned long long time_diff; /* nanoseconds */ uint16_t i, x, in_use_bufs; uint32_t sum; /* * first run. */ if ((knet_h->defrag_bufs_last_run.tv_sec == 0) && (knet_h->defrag_bufs_last_run.tv_nsec == 0)) { clock_gettime(CLOCK_MONOTONIC, &knet_h->defrag_bufs_last_run); return; } clock_gettime(CLOCK_MONOTONIC, &now); timespec_diff(knet_h->defrag_bufs_last_run, now, &time_diff); if (time_diff < (((unsigned long long)knet_h->defrag_bufs_usage_samples_timespan * 1000000000) / knet_h->defrag_bufs_usage_samples)) { return; } /* * record the last run */ memmove(&knet_h->defrag_bufs_last_run, &now, sizeof(struct timespec)); /* * do the real work: */ for (host = knet_h->host_head; host != NULL; host = host->next) { /* * Update buffer usage stats. We do this for all nodes. */ in_use_bufs = 0; for (i = 0; i < host->allocated_defrag_bufs; i++) { if (host->defrag_bufs[i].in_use) { in_use_bufs++; } } /* * record only % */ host->in_use_defrag_buffers[host->in_use_defrag_buffers_index] = (in_use_bufs * 100 / host->allocated_defrag_bufs); host->in_use_defrag_buffers_index++; /* * make sure to stay within buffer */ if (host->in_use_defrag_buffers_index == knet_h->defrag_bufs_usage_samples) { host->in_use_defrag_buffers_index = 0; } /* * only allow shrinking if we have enough samples */ if (host->in_use_defrag_buffers_samples < knet_h->defrag_bufs_usage_samples) { host->in_use_defrag_buffers_samples++; continue; } /* * only allow shrinking if in use bufs are <= knet_h->defrag_bufs_shrink_threshold% */ if (knet_h->defrag_bufs_reclaim_policy == RECLAIM_POLICY_AVERAGE) { sum = 0; for (i = 0; i < knet_h->defrag_bufs_usage_samples; i++) { sum += host->in_use_defrag_buffers[i]; } sum = sum / knet_h->defrag_bufs_usage_samples; if (sum > knet_h->defrag_bufs_shrink_threshold) { continue; } } else { sum = 0; for (i = 0; i < knet_h->defrag_bufs_usage_samples; i++) { if (host->in_use_defrag_buffers[i] > knet_h->defrag_bufs_shrink_threshold) { sum = 1; } } if (sum) { continue; } } /* * only allow shrinking if allocated bufs > min_defrag_bufs */ if (host->allocated_defrag_bufs == knet_h->defrag_bufs_min) { continue; } /* * compat all the in_use buffers at the beginning. * we the checks above, we are 100% sure they fit */ x = 0; for (i = 0; i < host->allocated_defrag_bufs; i++) { if (host->defrag_bufs[i].in_use) { memmove(&host->defrag_bufs[x], &host->defrag_bufs[i], sizeof(struct knet_host_defrag_buf)); x++; } } /* * memory allocation is not critical. it just means the system is under * memory pressure and we will need to wait our turn to free memory... how odd :) */ new_bufs = realloc(host->defrag_bufs, sizeof(struct knet_host_defrag_buf) * (host->allocated_defrag_bufs / 2)); if (!new_bufs) { log_err(knet_h, KNET_SUB_RX, "Unable to decrease defrag buffers for host %u: %s", host->host_id, strerror(errno)); continue; } host->defrag_bufs = new_bufs; host->allocated_defrag_bufs = host->allocated_defrag_bufs / 2; /* * clear buffer use stats. Old ones are no good for new one */ _clear_defrag_bufs_stats(host); log_debug(knet_h, KNET_SUB_RX, "Defrag buffers for host %u decreased from %u to: %u", host->host_id, host->allocated_defrag_bufs * 2, host->allocated_defrag_bufs); } } /* * check if we can double the defrag buffers. * * return 0 if we cannot reallocate * return 1 if we have more buffers */ static int _realloc_defrag_buffers(knet_handle_t knet_h, struct knet_host *src_host) { struct knet_host_defrag_buf *new_bufs = NULL; int i; /* * max_defrag_bufs is a power of 2 * allocated_defrag_bufs doubles on each iteration. * Sooner or later (and hopefully never) allocated with be == to max. */ if (src_host->allocated_defrag_bufs < knet_h->defrag_bufs_max) { new_bufs = realloc(src_host->defrag_bufs, src_host->allocated_defrag_bufs * 2 * sizeof(struct knet_host_defrag_buf)); if (!new_bufs) { log_err(knet_h, KNET_SUB_RX, "Unable to increase defrag buffers for host %u: %s", src_host->host_id, strerror(errno)); return 0; } /* * keep the math simple here between arrays, pointers and what not. * Init each buffer individually. */ for (i = src_host->allocated_defrag_bufs; i < src_host->allocated_defrag_bufs * 2; i++) { memset(&new_bufs[i], 0, sizeof(struct knet_host_defrag_buf)); } src_host->allocated_defrag_bufs = src_host->allocated_defrag_bufs * 2; src_host->defrag_bufs = new_bufs; /* * clear buffer use stats. Old ones are no good for new one */ _clear_defrag_bufs_stats(src_host); log_debug(knet_h, KNET_SUB_RX, "Defrag buffers for host %u increased from %u to: %u", src_host->host_id, src_host->allocated_defrag_bufs / 2, src_host->allocated_defrag_bufs); return 1; } return 0; } /* * this functions needs to return an index * to a knet_host_defrag_buf. (-1 on errors) */ static int _find_pckt_defrag_buf(knet_handle_t knet_h, struct knet_host *src_host, seq_num_t seq_num) { int i, oldest; uint16_t cur_allocated_defrag_bufs = src_host->allocated_defrag_bufs; /* * check if there is a buffer already in use handling the same seq_num */ for (i = 0; i < src_host->allocated_defrag_bufs; i++) { if (src_host->defrag_bufs[i].in_use) { if (src_host->defrag_bufs[i].pckt_seq == seq_num) { return i; } } } /* * If there is no buffer that's handling the current seq_num * either it's new or it's been reclaimed already. * check if it's been reclaimed/seen before using the defrag circular * buffer. If the pckt has been seen before, the buffer expired (ETIME) * and there is no point to try to defrag it again. */ if (!_seq_num_lookup(knet_h, src_host, seq_num, 1, 0)) { errno = ETIME; return -1; } /* * register the pckt as seen */ _seq_num_set(src_host, seq_num, 1); /* * see if there is a free buffer */ for (i = 0; i < src_host->allocated_defrag_bufs; i++) { if (!src_host->defrag_bufs[i].in_use) { return i; } } /* * check if we can increase num of buffers */ if (_realloc_defrag_buffers(knet_h, src_host)) { return cur_allocated_defrag_bufs + 1; } /* * at this point, there are no free buffers, the pckt is new * and we need to reclaim a buffer, and we will take the one * with the oldest timestamp. It's as good as any. */ oldest = 0; for (i = 0; i < src_host->allocated_defrag_bufs; i++) { if (_timecmp(src_host->defrag_bufs[i].last_update, src_host->defrag_bufs[oldest].last_update) < 0) { oldest = i; } } src_host->defrag_bufs[oldest].in_use = 0; return oldest; } static int _pckt_defrag(knet_handle_t knet_h, struct knet_host *src_host, seq_num_t seq_num, unsigned char *data, ssize_t *len, uint8_t frags, uint8_t frag_seq) { struct knet_host_defrag_buf *defrag_buf; int defrag_buf_idx; defrag_buf_idx = _find_pckt_defrag_buf(knet_h, src_host, seq_num); if (defrag_buf_idx < 0) { return 1; } defrag_buf = &src_host->defrag_bufs[defrag_buf_idx]; /* * if the buf is not is use, then make sure it's clean */ if (!defrag_buf->in_use) { memset(defrag_buf, 0, sizeof(struct knet_host_defrag_buf)); defrag_buf->in_use = 1; defrag_buf->pckt_seq = seq_num; } /* * update timestamp on the buffer */ clock_gettime(CLOCK_MONOTONIC, &defrag_buf->last_update); /* * check if we already received this fragment */ if (defrag_buf->frag_map[frag_seq]) { /* * if we have received this fragment and we didn't clear the buffer * it means that we don't have all fragments yet */ return 1; } /* * we need to handle the last packet with gloves due to its different size */ if (frag_seq == frags) { defrag_buf->last_frag_size = *len; /* * in the event when the last packet arrives first, * we still don't know the offset vs the other fragments (based on MTU), * so we store the fragment at the end of the buffer where it's safe * and take a copy of the len so that we can restore its offset later. * remember we can't use the local MTU for this calculation because pMTU * can be asymettric between the same hosts. */ if (!defrag_buf->frag_size) { defrag_buf->last_first = 1; memmove(defrag_buf->buf + (KNET_MAX_PACKET_SIZE - *len), data, *len); } } else { defrag_buf->frag_size = *len; } if (defrag_buf->frag_size) { memmove(defrag_buf->buf + ((frag_seq - 1) * defrag_buf->frag_size), data, *len); } defrag_buf->frag_recv++; defrag_buf->frag_map[frag_seq] = 1; /* * check if we received all the fragments */ if (defrag_buf->frag_recv == frags) { /* * special case the last pckt */ if (defrag_buf->last_first) { memmove(defrag_buf->buf + ((frags - 1) * defrag_buf->frag_size), defrag_buf->buf + (KNET_MAX_PACKET_SIZE - defrag_buf->last_frag_size), defrag_buf->last_frag_size); } /* * recalculate packet lenght */ *len = ((frags - 1) * defrag_buf->frag_size) + defrag_buf->last_frag_size; /* * copy the pckt back in the user data */ memmove(data, defrag_buf->buf, *len); /* * free this buffer */ defrag_buf->in_use = 0; return 0; } return 1; } static int _handle_data_stats(knet_handle_t knet_h, struct knet_link *src_link, ssize_t len, uint64_t decrypt_time) { int stats_err; /* data stats at the top for consistency with TX */ src_link->status.stats.rx_data_packets++; src_link->status.stats.rx_data_bytes += len; if (decrypt_time) { stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); if (stats_err < 0) { log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); return -1; } /* Only update the crypto overhead for data packets. Mainly to be consistent with TX */ if (decrypt_time < knet_h->stats.rx_crypt_time_min) { knet_h->stats.rx_crypt_time_min = decrypt_time; } if (decrypt_time > knet_h->stats.rx_crypt_time_max) { knet_h->stats.rx_crypt_time_max = decrypt_time; } knet_h->stats.rx_crypt_time_ave = (knet_h->stats.rx_crypt_time_ave * knet_h->stats.rx_crypt_packets + decrypt_time) / (knet_h->stats.rx_crypt_packets+1); knet_h->stats.rx_crypt_packets++; pthread_mutex_unlock(&knet_h->handle_stats_mutex); } return 0; } static int _decompress_data(knet_handle_t knet_h, uint8_t decompress_type, unsigned char *data, ssize_t *len, ssize_t header_size) { int err = 0, stats_err = 0; if (decompress_type) { ssize_t decmp_outlen = KNET_DATABUFSIZE_COMPRESS; struct timespec start_time; struct timespec end_time; uint64_t decompress_time; clock_gettime(CLOCK_MONOTONIC, &start_time); err = decompress(knet_h, decompress_type, data, *len - header_size, knet_h->recv_from_links_buf_decompress, &decmp_outlen); clock_gettime(CLOCK_MONOTONIC, &end_time); timespec_diff(start_time, end_time, &decompress_time); stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); if (stats_err < 0) { log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); return -1; } if (!err) { /* Collect stats */ if (decompress_time < knet_h->stats.rx_compress_time_min) { knet_h->stats.rx_compress_time_min = decompress_time; } if (decompress_time > knet_h->stats.rx_compress_time_max) { knet_h->stats.rx_compress_time_max = decompress_time; } knet_h->stats.rx_compress_time_ave = (knet_h->stats.rx_compress_time_ave * knet_h->stats.rx_compressed_packets + decompress_time) / (knet_h->stats.rx_compressed_packets+1); knet_h->stats.rx_compressed_packets++; knet_h->stats.rx_compressed_original_bytes += decmp_outlen; knet_h->stats.rx_compressed_size_bytes += *len - KNET_HEADER_SIZE; memmove(data, knet_h->recv_from_links_buf_decompress, decmp_outlen); *len = decmp_outlen + header_size; } else { knet_h->stats.rx_failed_to_decompress++; pthread_mutex_unlock(&knet_h->handle_stats_mutex); log_err(knet_h, KNET_SUB_COMPRESS, "Unable to decompress packet (%d): %s", err, strerror(errno)); return -1; } pthread_mutex_unlock(&knet_h->handle_stats_mutex); } return 0; } static int _check_destination(knet_handle_t knet_h, struct knet_header *inbuf, unsigned char *data, ssize_t len, ssize_t header_size, int8_t *channel) { knet_node_id_t dst_host_ids[KNET_MAX_HOST]; size_t dst_host_ids_entries = 0; int bcast = 1; size_t host_idx; int found = 0; if (knet_h->dst_host_filter_fn) { bcast = knet_h->dst_host_filter_fn( knet_h->dst_host_filter_fn_private_data, data, len - header_size, KNET_NOTIFY_RX, knet_h->host_id, inbuf->kh_node, channel, dst_host_ids, &dst_host_ids_entries); if (bcast < 0) { log_debug(knet_h, KNET_SUB_RX, "Error from dst_host_filter_fn: %d", bcast); return -1; } if ((!bcast) && (!dst_host_ids_entries)) { log_debug(knet_h, KNET_SUB_RX, "Message is unicast but no dst_host_ids_entries"); return -1; } /* check if we are dst for this packet */ if (!bcast) { if (dst_host_ids_entries > KNET_MAX_HOST) { log_debug(knet_h, KNET_SUB_RX, "dst_host_filter_fn returned too many destinations"); return -1; } for (host_idx = 0; host_idx < dst_host_ids_entries; host_idx++) { if (dst_host_ids[host_idx] == knet_h->host_id) { found = 1; break; } } if (!found) { log_debug(knet_h, KNET_SUB_RX, "Packet is not for us"); return -1; } } } return 0; } static int _deliver_data(knet_handle_t knet_h, unsigned char *data, ssize_t len, ssize_t header_size, int8_t channel) { struct iovec iov_out[1]; ssize_t outlen = 0; memset(iov_out, 0, sizeof(iov_out)); retry: iov_out[0].iov_base = (void *) data + outlen; iov_out[0].iov_len = len - (outlen + header_size); outlen = writev(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], iov_out, 1); if ((outlen > 0) && (outlen < (ssize_t)iov_out[0].iov_len)) { log_debug(knet_h, KNET_SUB_RX, "Unable to send all data to the application in one go. Expected: %zu Sent: %zd\n", iov_out[0].iov_len, outlen); goto retry; } if (outlen <= 0) { knet_h->sock_notify_fn(knet_h->sock_notify_fn_private_data, knet_h->sockfd[channel].sockfd[0], channel, KNET_NOTIFY_RX, outlen, errno); return -1; } if ((size_t)outlen != iov_out[0].iov_len) { return -1; } return 0; } +static int _fast_data_up(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link) +{ + if (src_link->received_pong) { + log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u received data during valid ping/pong activity. Force link up.", src_host->host_id, src_link->link_id); + _link_updown(knet_h, src_host->host_id, src_link->link_id, src_link->status.enabled, 1, 0); + return 1; + } + // host is not eligible for fast data up + return 0; +} + static void _process_data(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len, uint64_t decrypt_time) { int8_t channel; uint8_t decompress_type = 0; ssize_t header_size; seq_num_t seq_num; uint8_t frags, frag_seq; unsigned char *data; if (_handle_data_stats(knet_h, src_link, len, decrypt_time) < 0) { return; } /* * register host is sending data. Required to determine if we need * to reset circular buffers. (see onwire_v1.c) */ src_host->got_data = 1; if (knet_h->onwire_ver_remap) { get_data_header_info_v1(knet_h, inbuf, &header_size, &channel, &seq_num, &decompress_type, &frags, &frag_seq); data = get_data_v1(knet_h, inbuf); } else { switch (inbuf->kh_version) { case 1: get_data_header_info_v1(knet_h, inbuf, &header_size, &channel, &seq_num, &decompress_type, &frags, &frag_seq); data = get_data_v1(knet_h, inbuf); break; default: log_warn(knet_h, KNET_SUB_RX, "processing data onwire version %u not supported", inbuf->kh_version); return; break; } } if (!_seq_num_lookup(knet_h, src_host, seq_num, 0, 0)) { if (src_host->link_handler_policy != KNET_LINK_POLICY_ACTIVE) { log_debug(knet_h, KNET_SUB_RX, "Packet has already been delivered"); } return; } if (frags > 1) { /* * len as received from the socket also includes extra stuff * that the defrag code doesn't care about. So strip it * here and readd only for repadding once we are done * defragging * * the defrag code assumes that data packets have all the same size * except the last one that might be smaller. * */ len = len - header_size; if (_pckt_defrag(knet_h, src_host, seq_num, data, &len, frags, frag_seq)) { return; } len = len + header_size; } if (_decompress_data(knet_h, decompress_type, data, &len, header_size) < 0) { return; } if (!src_host->status.reachable) { - log_debug(knet_h, KNET_SUB_RX, "Source host %u not reachable yet. Discarding packet.", src_host->host_id); - return; + if (!_fast_data_up(knet_h, src_host, src_link)) { + log_debug(knet_h, KNET_SUB_RX, "Source host %u not reachable yet. Discarding packet.", src_host->host_id); + return; + } } if (knet_h->enabled != 1) /* data forward is disabled */ return; if (_check_destination(knet_h, inbuf, data, len, header_size, &channel) < 0) { return; } if (!knet_h->sockfd[channel].in_use) { log_debug(knet_h, KNET_SUB_RX, "received packet for channel %d but there is no local sock connected", channel); return; } #ifdef ONWIRE_V1_EXTRA_DEBUG if (inbuf->khp_data_v1_checksum != compute_chksum(data, len - header_size)) { log_err(knet_h, KNET_SUB_RX, "Received incorrect data checksum after reassembly from host: %u seq: %u", src_host->host_id, seq_num); /* * give a chance to the log threads to pick up the message */ sleep(1); abort(); } #endif if (_deliver_data(knet_h, data, len, header_size, channel) < 0) { return; } _seq_num_set(src_host, seq_num, 0); } static struct knet_header *_decrypt_packet(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *len, uint64_t *decrypt_time) { int try_decrypt = 0; int i = 0; struct timespec start_time; struct timespec end_time; ssize_t outlen; for (i = 1; i <= KNET_MAX_CRYPTO_INSTANCES; i++) { if (knet_h->crypto_instance[i]) { try_decrypt = 1; break; } } if ((!try_decrypt) && (knet_h->crypto_only == KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC)) { log_debug(knet_h, KNET_SUB_RX, "RX thread configured to accept only crypto packets, but no crypto configs are configured!"); return NULL; } if (try_decrypt) { clock_gettime(CLOCK_MONOTONIC, &start_time); if (crypto_authenticate_and_decrypt(knet_h, (unsigned char *)inbuf, *len, knet_h->recv_from_links_buf_decrypt, &outlen) < 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to decrypt/auth packet"); if (knet_h->crypto_only == KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC) { return NULL; } log_debug(knet_h, KNET_SUB_RX, "Attempting to process packet as clear data"); } else { clock_gettime(CLOCK_MONOTONIC, &end_time); timespec_diff(start_time, end_time, decrypt_time); *len = outlen; inbuf = (struct knet_header *)knet_h->recv_from_links_buf_decrypt; } } return inbuf; } static int _packet_checks(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t len) { #ifdef ONWIRE_V1_EXTRA_DEBUG uint32_t rx_packet_checksum, expected_packet_checksum; #endif if (len < (ssize_t)(KNET_HEADER_SIZE + 1)) { log_debug(knet_h, KNET_SUB_RX, "Packet is too short: %ld", (long)len); return -1; } #ifdef ONWIRE_V1_EXTRA_DEBUG inbuf->kh_node = htons(inbuf->kh_node); rx_packet_checksum = inbuf->kh_checksum; inbuf->kh_checksum = 0; expected_packet_checksum = compute_chksum((const unsigned char *)inbuf, len); if (rx_packet_checksum != expected_packet_checksum) { log_err(knet_h, KNET_SUB_RX, "Received packet with incorrect checksum. Received: %u Expected: %u", rx_packet_checksum, expected_packet_checksum); /* * give a chance to the log threads to pick up the message */ sleep(1); abort(); } inbuf->kh_node = ntohs(inbuf->kh_node); #endif /* * old versions of knet did not advertise max_ver and max_ver is set to 0. */ if (!inbuf->kh_max_ver) { inbuf->kh_max_ver = 1; } /* * if the node joining max version is lower than the min version * then we reject the node */ if (inbuf->kh_max_ver < knet_h->onwire_min_ver) { log_warn(knet_h, KNET_SUB_RX, "Received packet version %u from node %u, lower than currently minimal supported onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node); return -1; } /* * if the node joining with version higher than our max version * then we reject the node */ if (inbuf->kh_version > knet_h->onwire_max_ver) { log_warn(knet_h, KNET_SUB_RX, "Received packet version %u from node %u, higher than currently maximum supported onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node); return -1; } /* * if the node joining with version lower than the current in use version * then we reject the node * * NOTE: should we make this configurable and support downgrades? */ if ((!knet_h->onwire_force_ver) && (inbuf->kh_version < knet_h->onwire_ver) && (inbuf->kh_max_ver > inbuf->kh_version)) { log_warn(knet_h, KNET_SUB_RX, "Received packet version %u from node %u, lower than currently in use onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node); return -1; } return 0; } static void _handle_dynip(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, int sockfd, const struct knet_mmsghdr *msg) { if (src_link->dynamic == KNET_LINK_DYNIP) { if (cmpaddr(&src_link->dst_addr, msg->msg_hdr.msg_name) != 0) { log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u appears to have changed ip address", src_host->host_id, src_link->link_id); memmove(&src_link->dst_addr, msg->msg_hdr.msg_name, sizeof(struct sockaddr_storage)); if (knet_addrtostr(&src_link->dst_addr, sockaddr_len(&src_link->dst_addr), src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN, src_link->status.dst_port, KNET_MAX_PORT_LEN) != 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to resolve ???"); snprintf(src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN - 1, "Unknown!!!"); snprintf(src_link->status.dst_port, KNET_MAX_PORT_LEN - 1, "??"); } else { log_info(knet_h, KNET_SUB_RX, "host: %u link: %u new connection established from: %s:%s", src_host->host_id, src_link->link_id, src_link->status.dst_ipaddr, src_link->status.dst_port); } } /* * transport has already accepted the connection here * otherwise we would not be receiving packets */ transport_link_dyn_connect(knet_h, sockfd, src_link); } } /* * processing incoming packets vs access lists */ static int _check_rx_acl(knet_handle_t knet_h, struct knet_link *src_link, const struct knet_mmsghdr *msg) { if (knet_h->use_access_lists) { if (!check_validate(knet_h, src_link, msg->msg_hdr.msg_name)) { char src_ipaddr[KNET_MAX_HOST_LEN]; char src_port[KNET_MAX_PORT_LEN]; memset(src_ipaddr, 0, KNET_MAX_HOST_LEN); memset(src_port, 0, KNET_MAX_PORT_LEN); if (knet_addrtostr(msg->msg_hdr.msg_name, sockaddr_len(msg->msg_hdr.msg_name), src_ipaddr, KNET_MAX_HOST_LEN, src_port, KNET_MAX_PORT_LEN) < 0) { log_warn(knet_h, KNET_SUB_RX, "Packet rejected: unable to resolve host/port"); } else { log_warn(knet_h, KNET_SUB_RX, "Packet rejected from %s:%s", src_ipaddr, src_port); } return 0; } } return 1; } static void _parse_recv_from_links(knet_handle_t knet_h, int sockfd, const struct knet_mmsghdr *msg) { int savederrno = 0, stats_err = 0; struct knet_host *src_host; struct knet_link *src_link; uint64_t decrypt_time = 0; struct knet_header *inbuf = msg->msg_hdr.msg_iov->iov_base; ssize_t len = msg->msg_len; int i, found_link = 0; inbuf = _decrypt_packet(knet_h, inbuf, &len, &decrypt_time); if (!inbuf) { char src_ipaddr[KNET_MAX_HOST_LEN]; char src_port[KNET_MAX_PORT_LEN]; memset(src_ipaddr, 0, KNET_MAX_HOST_LEN); memset(src_port, 0, KNET_MAX_PORT_LEN); if (knet_addrtostr(msg->msg_hdr.msg_name, sockaddr_len(msg->msg_hdr.msg_name), src_ipaddr, KNET_MAX_HOST_LEN, src_port, KNET_MAX_PORT_LEN) < 0) { log_err(knet_h, KNET_SUB_RX, "Unable to decrypt packet from unknown host/port (size %zu)!", len); } else { log_err(knet_h, KNET_SUB_RX, "Unable to decrypt packet from %s:%s (size %zu)!", src_ipaddr, src_port, len); } return; } inbuf->kh_node = ntohs(inbuf->kh_node); if (_packet_checks(knet_h, inbuf, len) < 0) { if (knet_h->rx_odd_packets < KNET_RX_ODD_PACKETS_THRESHOLD) { knet_h->rx_odd_packets++; } else { log_warn(knet_h, KNET_SUB_RX, "This node has received more than %u packets that have failed basic sanity checks", KNET_RX_ODD_PACKETS_THRESHOLD); log_warn(knet_h, KNET_SUB_RX, "It is highly recommended to check if all nodes are using the same crypto configuration"); knet_h->rx_odd_packets = 0; } return; } /* * determine source host */ src_host = knet_h->host_index[inbuf->kh_node]; if (src_host == NULL) { /* host not found */ log_debug(knet_h, KNET_SUB_RX, "Unable to find source host for this packet"); return; } /* * deteremine source link */ if (inbuf->kh_type == KNET_HEADER_TYPE_PING) { _handle_onwire_version(knet_h, src_host, inbuf); if (knet_h->onwire_ver_remap) { src_link = get_link_from_pong_v1(knet_h, src_host, inbuf); } else { switch (inbuf->kh_version) { case 1: src_link = get_link_from_pong_v1(knet_h, src_host, inbuf); break; default: log_warn(knet_h, KNET_SUB_RX, "Parsing ping onwire version %u not supported", inbuf->kh_version); return; break; } } if (!_check_rx_acl(knet_h, src_link, msg)) { return; } _handle_dynip(knet_h, src_host, src_link, sockfd, msg); } else { /* all other packets */ for (i = 0; i < KNET_MAX_LINK; i++) { src_link = &src_host->link[i]; if (cmpaddr(&src_link->dst_addr, msg->msg_hdr.msg_name) == 0) { found_link = 1; break; } } if (found_link) { /* * this check is currently redundant.. Keep it here for now */ if (!_check_rx_acl(knet_h, src_link, msg)) { return; } } else { log_debug(knet_h, KNET_SUB_RX, "Unable to determine source link for data packet. Discarding packet."); return; } } stats_err = pthread_mutex_lock(&src_link->link_stats_mutex); if (stats_err) { log_err(knet_h, KNET_SUB_RX, "Unable to get stats mutex lock for host %u link %u: %s", src_host->host_id, src_link->link_id, strerror(savederrno)); return; } switch (inbuf->kh_type) { case KNET_HEADER_TYPE_DATA: _process_data(knet_h, src_host, src_link, inbuf, len, decrypt_time); break; case KNET_HEADER_TYPE_PING: process_ping(knet_h, src_host, src_link, inbuf, len); break; case KNET_HEADER_TYPE_PONG: process_pong(knet_h, src_host, src_link, inbuf, len); break; case KNET_HEADER_TYPE_PMTUD: src_link->status.stats.rx_pmtu_packets++; src_link->status.stats.rx_pmtu_bytes += len; /* Unlock so we don't deadlock with tx_mutex */ pthread_mutex_unlock(&src_link->link_stats_mutex); process_pmtud(knet_h, src_link, inbuf); return; /* Don't need to unlock link_stats_mutex */ break; case KNET_HEADER_TYPE_PMTUD_REPLY: src_link->status.stats.rx_pmtu_packets++; src_link->status.stats.rx_pmtu_bytes += len; /* pmtud_mutex can't be acquired while we hold a link_stats_mutex (ordering) */ pthread_mutex_unlock(&src_link->link_stats_mutex); process_pmtud_reply(knet_h, src_link, inbuf); return; break; default: pthread_mutex_unlock(&src_link->link_stats_mutex); return; break; } pthread_mutex_unlock(&src_link->link_stats_mutex); } static void _handle_recv_from_links(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg) { int err, savederrno; int i, msg_recv, transport; if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to get global read lock"); return; } if (_is_valid_fd(knet_h, sockfd) < 1) { /* * this is normal if a fd got an event and before we grab the read lock * and the link is removed by another thread */ goto exit_unlock; } transport = knet_h->knet_transport_fd_tracker[sockfd].transport; /* * reset msg_namelen to buffer size because after recvmmsg * each msg_namelen will contain sizeof sockaddr_in or sockaddr_in6 */ for (i = 0; i < PCKT_RX_BUFS; i++) { msg[i].msg_hdr.msg_namelen = knet_h->knet_transport_fd_tracker[sockfd].sockaddr_len; } msg_recv = _recvmmsg(sockfd, &msg[0], PCKT_RX_BUFS, MSG_DONTWAIT | MSG_NOSIGNAL); savederrno = errno; /* * WARNING: man page for recvmmsg is wrong. Kernel implementation here: * recvmmsg can return: * -1 on error * 0 if the previous run of recvmmsg recorded an error on the socket * N number of messages (see exception below). * * If there is an error from recvmsg after receiving a frame or more, the recvmmsg * loop is interrupted, error recorded in the socket (getsockopt(SO_ERROR) and * it will be visibile in the next run. * * Need to be careful how we handle errors at this stage. * * error messages need to be handled on a per transport/protocol base * at this point we have different layers of error handling * - msg_recv < 0 -> error from this run * msg_recv = 0 -> error from previous run and error on socket needs to be cleared * - per-transport message data * example: msg[i].msg_hdr.msg_flags & MSG_NOTIFICATION or msg_len for SCTP == EOF, * but for UDP it is perfectly legal to receive a 0 bytes message.. go figure * - NOTE: on SCTP MSG_NOTIFICATION we get msg_recv == PCKT_FRAG_MAX messages and no * errno set. That means the error api needs to be able to abort the loop below. */ if (msg_recv <= 0) { transport_rx_sock_error(knet_h, transport, sockfd, msg_recv, savederrno); goto exit_unlock; } for (i = 0; i < msg_recv; i++) { err = transport_rx_is_data(knet_h, transport, sockfd, &msg[i]); /* * TODO: make this section silent once we are confident * all protocols packet handlers are good */ switch(err) { case KNET_TRANSPORT_RX_ISDATA_ERROR: /* on error */ log_debug(knet_h, KNET_SUB_RX, "Transport reported error parsing packet"); goto exit_unlock; break; case KNET_TRANSPORT_RX_NOT_DATA_CONTINUE: /* packet is not data and we should continue the packet process loop */ log_debug(knet_h, KNET_SUB_RX, "Transport reported no data, continue"); break; case KNET_TRANSPORT_RX_NOT_DATA_STOP: /* packet is not data and we should STOP the packet process loop */ log_debug(knet_h, KNET_SUB_RX, "Transport reported no data, stop"); goto exit_unlock; break; case KNET_TRANSPORT_RX_IS_DATA: /* packet is data and should be parsed as such */ _parse_recv_from_links(knet_h, sockfd, &msg[i]); break; case KNET_TRANSPORT_RX_OOB_DATA_CONTINUE: log_debug(knet_h, KNET_SUB_RX, "Transport is processing sock OOB data, continue"); break; case KNET_TRANSPORT_RX_OOB_DATA_STOP: log_debug(knet_h, KNET_SUB_RX, "Transport has completed processing sock OOB data, stop"); goto exit_unlock; break; } } exit_unlock: _shrink_defrag_buffers(knet_h); pthread_rwlock_unlock(&knet_h->global_rwlock); } void *_handle_recv_from_links_thread(void *data) { int i, nev; knet_handle_t knet_h = (knet_handle_t) data; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; struct sockaddr_storage address[PCKT_RX_BUFS]; struct knet_mmsghdr msg[PCKT_RX_BUFS]; struct iovec iov_in[PCKT_RX_BUFS]; set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_STARTED); memset(&msg, 0, sizeof(msg)); memset(&events, 0, sizeof(events)); for (i = 0; i < PCKT_RX_BUFS; i++) { iov_in[i].iov_base = (void *)knet_h->recv_from_links_buf[i]; iov_in[i].iov_len = KNET_DATABUFSIZE; memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr)); msg[i].msg_hdr.msg_name = &address[i]; msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); /* Real value filled in before actual use */ msg[i].msg_hdr.msg_iov = &iov_in[i]; msg[i].msg_hdr.msg_iovlen = 1; } while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(knet_h->recv_from_links_epollfd, events, KNET_EPOLL_MAX_EVENTS, knet_h->threads_timer_res / 1000); /* * the RX threads only need to notify that there has been at least * one successful run after queue flush has been requested. * See setfwd in handle.c */ if (get_thread_flush_queue(knet_h, KNET_THREAD_RX) == KNET_THREAD_QUEUE_FLUSH) { set_thread_flush_queue(knet_h, KNET_THREAD_RX, KNET_THREAD_QUEUE_FLUSHED); } /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { continue; } for (i = 0; i < nev; i++) { _handle_recv_from_links(knet_h, events[i].data.fd, msg); } } set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_STOPPED); return NULL; } ssize_t knet_recv(knet_handle_t knet_h, char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0; ssize_t err = 0; struct iovec iov_in; if (!_is_valid_handle(knet_h)) { return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out_unlock; } memset(&iov_in, 0, sizeof(iov_in)); iov_in.iov_base = (void *)buff; iov_in.iov_len = buff_len; err = readv(knet_h->sockfd[channel].sockfd[0], &iov_in, 1); savederrno = errno; out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; }