From e60c2e4624086b1073aad6f22f79d25d7c8f5ce8 Mon Sep 17 00:00:00 2001 From: Robert Lubos Date: Tue, 14 Nov 2023 14:31:37 +0100 Subject: [PATCH 1/5] net: pkt: Add function for allocating buffers w/o preconditions Add new function to allocate additional buffers for net_pkt, w/o any additional preconditions/checks. Just allocate what was requested. Signed-off-by: Robert Lubos --- include/zephyr/net/net_pkt.h | 25 +++++++++++++++ subsys/net/ip/net_pkt.c | 61 ++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/include/zephyr/net/net_pkt.h b/include/zephyr/net/net_pkt.h index 4f67249f15ba5..c5878213a0293 100644 --- a/include/zephyr/net/net_pkt.h +++ b/include/zephyr/net/net_pkt.h @@ -1707,6 +1707,13 @@ int net_pkt_alloc_buffer_debug(struct net_pkt *pkt, net_pkt_alloc_buffer_debug(_pkt, _size, _proto, _timeout, \ __func__, __LINE__) +int net_pkt_alloc_buffer_raw_debug(struct net_pkt *pkt, size_t size, + k_timeout_t timeout, + const char *caller, int line); +#define net_pkt_alloc_buffer_raw(_pkt, _size, _timeout) \ + net_pkt_alloc_buffer_raw_debug(_pkt, _size, _timeout, \ + __func__, __LINE__) + struct net_pkt *net_pkt_alloc_with_buffer_debug(struct net_if *iface, size_t size, sa_family_t family, @@ -1821,6 +1828,24 @@ int net_pkt_alloc_buffer(struct net_pkt *pkt, k_timeout_t timeout); #endif +/** + * @brief Allocate buffer for a net_pkt, of specified size, w/o any additional + * preconditions + * + * @details: The actual buffer size may be larger than requested one if fixed + * size buffers are in use. + * + * @param pkt The network packet requiring buffer to be allocated. + * @param size The size of buffer being requested. + * @param timeout Maximum time to wait for an allocation. + * + * @return 0 on success, negative errno code otherwise. + */ +#if !defined(NET_PKT_DEBUG_ENABLED) +int net_pkt_alloc_buffer_raw(struct net_pkt *pkt, size_t size, + k_timeout_t timeout); +#endif + /** * @brief Allocate a network packet and buffer at once * diff --git a/subsys/net/ip/net_pkt.c b/subsys/net/ip/net_pkt.c index ea60060abe578..38fc288d8f5c3 100644 --- a/subsys/net/ip/net_pkt.c +++ b/subsys/net/ip/net_pkt.c @@ -1197,6 +1197,67 @@ int net_pkt_alloc_buffer(struct net_pkt *pkt, return 0; } + +#if NET_LOG_LEVEL >= LOG_LEVEL_DBG +int net_pkt_alloc_buffer_raw_debug(struct net_pkt *pkt, size_t size, + k_timeout_t timeout, const char *caller, + int line) +#else +int net_pkt_alloc_buffer_raw(struct net_pkt *pkt, size_t size, + k_timeout_t timeout) +#endif +{ + struct net_buf_pool *pool = NULL; + struct net_buf *buf; + + if (size == 0) { + return 0; + } + + if (k_is_in_isr()) { + timeout = K_NO_WAIT; + } + + NET_DBG("Data allocation size %zu", size); + + if (pkt->context) { + pool = get_data_pool(pkt->context); + } + + if (!pool) { + pool = pkt->slab == &tx_pkts ? &tx_bufs : &rx_bufs; + } + +#if NET_LOG_LEVEL >= LOG_LEVEL_DBG + buf = pkt_alloc_buffer(pool, size, timeout, caller, line); +#else + buf = pkt_alloc_buffer(pool, size, timeout); +#endif + + if (!buf) { +#if NET_LOG_LEVEL >= LOG_LEVEL_DBG + NET_ERR("Data buffer (%zd) allocation failed (%s:%d)", + size, caller, line); +#else + NET_ERR("Data buffer (%zd) allocation failed.", size); +#endif + return -ENOMEM; + } + + net_pkt_append_buffer(pkt, buf); + +#if IS_ENABLED(CONFIG_NET_BUF_FIXED_DATA_SIZE) + /* net_buf allocators shrink the buffer size to the requested size. + * We don't want this behavior here, so restore the real size of the + * last fragment. + */ + buf = net_buf_frag_last(buf); + buf->size = CONFIG_NET_BUF_DATA_SIZE; +#endif + + return 0; +} + #if NET_LOG_LEVEL >= LOG_LEVEL_DBG static struct net_pkt *pkt_alloc(struct k_mem_slab *slab, k_timeout_t timeout, const char *caller, int line) From 531e83dfd408f68812209a0a12c69fa6dca8dcfd Mon Sep 17 00:00:00 2001 From: Robert Lubos Date: Tue, 14 Nov 2023 12:40:49 +0100 Subject: [PATCH 2/5] net: tcp: Rework data queueing API Rework how data is queued for the TCP connections: * net_context no longer allocates net_pkt for TCP connections. This was not only inefficient (net_context has no knowledge of the TX window size), but also error-prone in certain configuration (for example when IP fragmentation was enabled, net_context may attempt to allocate enormous packet, instead of let the data be fragmented for the TCP stream. * Instead, implement already defined `net_tcp_queue()` API, which takes raw buffer and length. This allows to take TX window into account and also better manage the allocated net_buf's (like for example avoid allocation if there's still room in the buffer). In result, the TCP stack will not only no longer exceed the TX window, but also prevent empty gaps in allocated net_buf's, which should lead to less out-of-mem issues with the stack. * As net_pkt-based `net_tcp_queue_data()` is no longer in use, it was removed. Signed-off-by: Robert Lubos --- subsys/net/ip/net_context.c | 27 +++--- subsys/net/ip/tcp.c | 166 +++++++++++++++++++++-------------- subsys/net/ip/tcp.h | 15 +--- subsys/net/ip/tcp_internal.h | 20 +++-- 4 files changed, 132 insertions(+), 96 deletions(-) diff --git a/subsys/net/ip/net_context.c b/subsys/net/ip/net_context.c index ef9fc640f0816..d9469948286b6 100644 --- a/subsys/net/ip/net_context.c +++ b/subsys/net/ip/net_context.c @@ -1713,7 +1713,7 @@ static int context_sendto(struct net_context *context, { const struct msghdr *msghdr = NULL; struct net_if *iface; - struct net_pkt *pkt; + struct net_pkt *pkt = NULL; size_t tmp_len; int ret; @@ -1935,6 +1935,15 @@ static int context_sendto(struct net_context *context, return -ENETDOWN; } + context->send_cb = cb; + context->user_data = user_data; + + if (IS_ENABLED(CONFIG_NET_TCP) && + net_context_get_proto(context) == IPPROTO_TCP && + !net_if_is_ip_offloaded(net_context_get_iface(context))) { + goto skip_alloc; + } + pkt = context_alloc_pkt(context, len, PKT_WAIT_TIME); if (!pkt) { NET_ERR("Failed to allocate net_pkt"); @@ -1953,9 +1962,6 @@ static int context_sendto(struct net_context *context, len = tmp_len; } - context->send_cb = cb; - context->user_data = user_data; - if (IS_ENABLED(CONFIG_NET_CONTEXT_PRIORITY)) { uint8_t priority; @@ -1977,6 +1983,7 @@ static int context_sendto(struct net_context *context, } } +skip_alloc: if (IS_ENABLED(CONFIG_NET_OFFLOAD) && net_if_is_ip_offloaded(net_context_get_iface(context))) { ret = context_write_data(pkt, buf, len, msghdr); @@ -2008,16 +2015,12 @@ static int context_sendto(struct net_context *context, } else if (IS_ENABLED(CONFIG_NET_TCP) && net_context_get_proto(context) == IPPROTO_TCP) { - ret = context_write_data(pkt, buf, len, msghdr); + ret = net_tcp_queue(context, buf, len, msghdr); if (ret < 0) { goto fail; } - net_pkt_cursor_init(pkt); - ret = net_tcp_queue_data(context, pkt); - if (ret < 0) { - goto fail; - } + len = ret; ret = net_tcp_send_data(context, cb, user_data); } else if (IS_ENABLED(CONFIG_NET_SOCKETS_PACKET) && @@ -2073,7 +2076,9 @@ static int context_sendto(struct net_context *context, return len; fail: - net_pkt_unref(pkt); + if (pkt != NULL) { + net_pkt_unref(pkt); + } return ret; } diff --git a/subsys/net/ip/tcp.c b/subsys/net/ip/tcp.c index 82faa77769478..2d0f753ca9115 100644 --- a/subsys/net/ip/tcp.c +++ b/subsys/net/ip/tcp.c @@ -1347,6 +1347,49 @@ static int tcp_pkt_peek(struct net_pkt *to, struct net_pkt *from, size_t pos, return net_pkt_copy(to, from, len); } +static int tcp_pkt_append(struct net_pkt *pkt, const uint8_t *data, size_t len) +{ + size_t alloc_len = len; + struct net_buf *buf = NULL; + int ret = 0; + + if (pkt->buffer) { + buf = net_buf_frag_last(pkt->buffer); + + if (len > net_buf_tailroom(buf)) { + alloc_len -= net_buf_tailroom(buf); + } else { + alloc_len = 0; + } + } + + if (alloc_len > 0) { + ret = net_pkt_alloc_buffer_raw(pkt, alloc_len, + TCP_PKT_ALLOC_TIMEOUT); + if (ret < 0) { + return -ENOBUFS; + } + } + + if (buf == NULL) { + buf = pkt->buffer; + } + + while (buf != NULL && len > 0) { + size_t write_len = MIN(len, net_buf_tailroom(buf)); + + net_buf_add_mem(buf, data, write_len); + + data += write_len; + len -= write_len; + buf = buf->frags; + } + + NET_ASSERT(len == 0, "Not all bytes written"); + + return ret; +} + static bool tcp_window_full(struct tcp *conn) { bool window_full = (conn->send_data_total >= conn->send_win); @@ -3229,13 +3272,12 @@ int net_tcp_update_recv_wnd(struct net_context *context, int32_t delta) return ret; } -/* net_context queues the outgoing data for the TCP connection */ -int net_tcp_queue_data(struct net_context *context, struct net_pkt *pkt) +int net_tcp_queue(struct net_context *context, const void *data, size_t len, + const struct msghdr *msg) { struct tcp *conn = context->tcp; - struct net_buf *orig_buf = NULL; + size_t queued_len = 0; int ret = 0; - size_t len; if (!conn || conn->state != TCP_ESTABLISHED) { return -ENOTCONN; @@ -3252,72 +3294,69 @@ int net_tcp_queue_data(struct net_context *context, struct net_pkt *pkt) goto out; } - len = net_pkt_get_len(pkt); + if (msg) { + len = 0; - if (conn->send_data->buffer) { - orig_buf = net_buf_frag_last(conn->send_data->buffer); + for (int i = 0; i < msg->msg_iovlen; i++) { + len += msg->msg_iov[i].iov_len; + } } - net_pkt_append_buffer(conn->send_data, pkt->buffer); - conn->send_data_total += len; - NET_DBG("conn: %p Queued %zu bytes (total %zu)", conn, len, - conn->send_data_total); - pkt->buffer = NULL; + /* Queue no more than TX window permits. It's guaranteed at this point + * that conn->send_data_total is less than conn->send_win, as it was + * verified in tcp_window_full() check above. As the connection mutex + * is held, their values shall not change since. + */ + len = MIN(conn->send_win - conn->send_data_total, len); + + if (msg) { + for (int i = 0; i < msg->msg_iovlen; i++) { + int iovlen = MIN(msg->msg_iov[i].iov_len, len); + + ret = tcp_pkt_append(conn->send_data, + msg->msg_iov[i].iov_base, + iovlen); + if (ret < 0) { + if (queued_len == 0) { + goto out; + } else { + break; + } + } + queued_len += iovlen; + len -= iovlen; + + if (len == 0) { + break; + } + } + } else { + ret = tcp_pkt_append(conn->send_data, data, len); + if (ret < 0) { + goto out; + } + + queued_len = len; + } + + conn->send_data_total += queued_len; + + /* Successfully queued data for transmission. Even if there's a transmit + * failure now (out-of-buf case), it can be ignored for now, retransmit + * timer will take care of queued data retransmission. + */ ret = tcp_send_queued_data(conn); if (ret < 0 && ret != -ENOBUFS) { tcp_conn_close(conn, ret); goto out; } - if ((ret == -ENOBUFS) && - (conn->send_data_total < (conn->unacked_len + len))) { - /* Some of the data has been sent, we cannot remove the - * whole chunk, the remainder portion is already - * in the send_data and will be transmitted upon a - * received ack or the next send call - * - * Set the return code back to 0 to pretend we just - * transmitted the chunk - */ - ret = 0; + if (tcp_window_full(conn)) { + (void)k_sem_take(&conn->tx_sem, K_NO_WAIT); } - if (ret == -ENOBUFS) { - /* Restore the original data so that we do not resend the pkt - * data multiple times. - */ - conn->send_data_total -= len; - - if (orig_buf) { - pkt->buffer = orig_buf->frags; - orig_buf->frags = NULL; - } else { - pkt->buffer = conn->send_data->buffer; - conn->send_data->buffer = NULL; - } - - /* If we have out-of-bufs case, and the send_data buffer has - * become empty, till the retransmit timer, as there is no - * data to retransmit. - * The socket layer will catch this and resend data if needed. - * Only perform this when it is just the newly added packet, - * otherwise it can disrupt any pending transmission - */ - if (conn->send_data_total == 0) { - NET_DBG("No bufs, cancelling retransmit timer"); - k_work_cancel_delayable(&conn->send_data_timer); - } - } else { - if (tcp_window_full(conn)) { - (void)k_sem_take(&conn->tx_sem, K_NO_WAIT); - } - - /* We should not free the pkt if there was an error. It will be - * freed in net_context.c:context_sendto() - */ - tcp_pkt_unref(pkt); - } + ret = queued_len; out: k_mutex_unlock(&conn->lock); @@ -3674,7 +3713,9 @@ static size_t tp_tcp_recv_cb(struct tcp *conn, struct net_pkt *pkt) net_pkt_pull(up, net_pkt_get_len(up) - len); - net_tcp_queue_data(conn->context, up); + for (struct net_buf *buf = pkt->buffer; buf != NULL; buf = buf->frags) { + net_tcp_queue(conn->context, buf->data, buf->len); + } return len; } @@ -3817,12 +3858,7 @@ enum net_verdict tp_input(struct net_conn *net_conn, responded = true; NET_DBG("tcp_send(\"%s\")", tp->data); { - struct net_pkt *data_pkt; - - data_pkt = tcp_pkt_alloc(conn, len); - net_pkt_write(data_pkt, buf, len); - net_pkt_cursor_init(data_pkt); - net_tcp_queue_data(conn->context, data_pkt); + net_tcp_queue(conn->context, buf, len); } } break; diff --git a/subsys/net/ip/tcp.h b/subsys/net/ip/tcp.h index 77d5c5634a6cc..975f2a97624b8 100644 --- a/subsys/net/ip/tcp.h +++ b/subsys/net/ip/tcp.h @@ -31,6 +31,7 @@ extern "C" { #endif +#include #include /** @@ -72,18 +73,7 @@ int net_tcp_listen(struct net_context *context); */ int net_tcp_accept(struct net_context *context, net_tcp_accept_cb_t cb, void *user_data); -/** - * @brief Enqueue data for transmission - * - * @param context Network context - * @param buf Pointer to the data - * @param len Number of bytes - * @param msghdr Data for a vector array operation - * - * @return 0 if ok, < 0 if error - */ -int net_tcp_queue(struct net_context *context, const void *buf, size_t len, - const struct msghdr *msghdr); + /* TODO: split into 2 functions, conn -> context, queue -> send? */ /* The following functions are provided solely for the compatibility @@ -112,7 +102,6 @@ void net_tcp_init(void); #define net_tcp_init(...) #endif int net_tcp_update_recv_wnd(struct net_context *context, int32_t delta); -int net_tcp_queue_data(struct net_context *context, struct net_pkt *pkt); int net_tcp_finalize(struct net_pkt *pkt, bool force_chksum); #if defined(CONFIG_NET_TEST_PROTOCOL) diff --git a/subsys/net/ip/tcp_internal.h b/subsys/net/ip/tcp_internal.h index 49edae13f687d..1dd3f23553282 100644 --- a/subsys/net/ip/tcp_internal.h +++ b/subsys/net/ip/tcp_internal.h @@ -283,21 +283,27 @@ struct net_tcp_hdr *net_tcp_input(struct net_pkt *pkt, #endif /** - * @brief Enqueue a single packet for transmission + * @brief Enqueue data for transmission * - * @param context TCP context - * @param pkt Packet + * @param context Network context + * @param data Pointer to the data + * @param len Number of bytes + * @param msg Data for a vector array operation * * @return 0 if ok, < 0 if error */ #if defined(CONFIG_NET_NATIVE_TCP) -int net_tcp_queue_data(struct net_context *context, struct net_pkt *pkt); +int net_tcp_queue(struct net_context *context, const void *data, size_t len, + const struct msghdr *msg); #else -static inline int net_tcp_queue_data(struct net_context *context, - struct net_pkt *pkt) +static inline int net_tcp_queue(struct net_context *context, const void *data, + size_t len, const struct msghdr *msg) { ARG_UNUSED(context); - ARG_UNUSED(pkt); + ARG_UNUSED(data); + ARG_UNUSED(len); + ARG_UNUSED(msg); + return -EPROTONOSUPPORT; } #endif From 81ab2e3e4f8d2cfe0273e8b083a044d1d79ec8f9 Mon Sep 17 00:00:00 2001 From: Robert Lubos Date: Wed, 15 Nov 2023 16:32:22 +0100 Subject: [PATCH 3/5] net: tcp: Feed TX semaphore on connection close Otherwise, if the application was for example blocked on poll() pending POLLOUT, it won't be notified. Signed-off-by: Robert Lubos --- subsys/net/ip/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/subsys/net/ip/tcp.c b/subsys/net/ip/tcp.c index 2d0f753ca9115..6ffd688a20f2c 100644 --- a/subsys/net/ip/tcp.c +++ b/subsys/net/ip/tcp.c @@ -638,6 +638,8 @@ static int tcp_conn_close(struct tcp *conn, int status) status, conn->recv_user_data); } + k_sem_give(&conn->tx_sem); + return tcp_conn_unref(conn); } From c04398ae16cb74681244e0b4267af4df29e4cf8a Mon Sep 17 00:00:00 2001 From: Robert Lubos Date: Wed, 15 Nov 2023 16:33:24 +0100 Subject: [PATCH 4/5] net: sockets: tls: Set errno on TX waiting error In case underlying socket reported error while waiting for TX, the errno value was not set accordingly. This commit fixes this. Signed-off-by: Robert Lubos --- subsys/net/lib/sockets/sockets_tls.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/subsys/net/lib/sockets/sockets_tls.c b/subsys/net/lib/sockets/sockets_tls.c index 14b1de26af33a..c83d2c18b80e4 100644 --- a/subsys/net/lib/sockets/sockets_tls.c +++ b/subsys/net/lib/sockets/sockets_tls.c @@ -2230,10 +2230,9 @@ static ssize_t send_tls(struct tls_context *ctx, const void *buf, timeout_ms = timeout_to_ms(&timeout); ret = wait_for_reason(ctx->sock, timeout_ms, ret); if (ret != 0) { - /* Retry. */ + errno = -ret; break; } - } else { (void)tls_mbedtls_reset(ctx); errno = EIO; From 7cdd019de64d69dad59380118288dcc672f7b2cb Mon Sep 17 00:00:00 2001 From: Robert Lubos Date: Wed, 15 Nov 2023 17:49:00 +0100 Subject: [PATCH 5/5] net: zperf: Fix TCP packet counting Make sure we send the entire packet buffer before bumping the packet counter, send() does not guarantee that all of the requested data will be sent at once with STREAM socket. Signed-off-by: Robert Lubos --- subsys/net/lib/zperf/zperf_tcp_uploader.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/subsys/net/lib/zperf/zperf_tcp_uploader.c b/subsys/net/lib/zperf/zperf_tcp_uploader.c index 3e72f81e54bcb..5fcc0530c5c63 100644 --- a/subsys/net/lib/zperf/zperf_tcp_uploader.c +++ b/subsys/net/lib/zperf/zperf_tcp_uploader.c @@ -20,6 +20,22 @@ static char sample_packet[PACKET_SIZE_MAX]; static struct zperf_async_upload_context tcp_async_upload_ctx; +static ssize_t sendall(int sock, const void *buf, size_t len) +{ + while (len) { + ssize_t out_len = zsock_send(sock, buf, len, 0); + + if (out_len < 0) { + return out_len; + } + + buf = (const char *)buf + out_len; + len -= out_len; + } + + return 0; +} + static int tcp_upload(int sock, unsigned int duration_in_ms, unsigned int packet_size, @@ -50,7 +66,7 @@ static int tcp_upload(int sock, do { /* Send the packet */ - ret = zsock_send(sock, sample_packet, packet_size, 0); + ret = sendall(sock, sample_packet, packet_size); if (ret < 0) { if (nb_errors == 0 && ret != -ENOMEM) { NET_ERR("Failed to send the packet (%d)", errno);