Skip to content

Commit

Permalink
ipv4, ipv6: Use splice_eof() to flush
Browse files Browse the repository at this point in the history
[ Upstream commit 1d7e453 ]

Allow splice to undo the effects of MSG_MORE after prematurely ending a
splice/sendfile due to getting an EOF condition (->splice_read() returned
0) after splice had called sendmsg() with MSG_MORE set when the user didn't
set MSG_MORE.

For UDP, a pending packet will not be emitted if the socket is closed
before it is flushed; with this change, it be flushed by ->splice_eof().

For TCP, it's not clear that MSG_MORE is actually effective.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Kuniyuki Iwashima <kuniyu@amazon.com>
cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
cc: David Ahern <dsahern@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Stable-dep-of: a000212 ("udp: move udp->no_check6_tx to udp->udp_flags")
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
dhowells authored and gregkh committed Jan 10, 2024
1 parent 4713b7c commit 2489502
Show file tree
Hide file tree
Showing 10 changed files with 71 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/net/inet_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern);
int inet_send_prepare(struct sock *sk);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
void inet_splice_eof(struct socket *sock);
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
Expand Down
1 change: 1 addition & 0 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
size_t size, struct ubuf_info *uarg);
void tcp_splice_eof(struct socket *sock);
int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
Expand Down
1 change: 1 addition & 0 deletions include/net/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ int udp_get_port(struct sock *sk, unsigned short snum,
int udp_err(struct sk_buff *, u32);
int udp_abort(struct sock *sk, int err);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
void udp_splice_eof(struct socket *sock);
int udp_push_pending_frames(struct sock *sk);
void udp_flush_pending_frames(struct sock *sk);
int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
Expand Down
18 changes: 18 additions & 0 deletions net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,21 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
EXPORT_SYMBOL(inet_sendmsg);

void inet_splice_eof(struct socket *sock)
{
const struct proto *prot;
struct sock *sk = sock->sk;

if (unlikely(inet_send_prepare(sk)))
return;

/* IPV6_ADDRFORM can change sk->sk_prot under us. */
prot = READ_ONCE(sk->sk_prot);
if (prot->splice_eof)
prot->splice_eof(sock);
}
EXPORT_SYMBOL_GPL(inet_splice_eof);

ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags)
{
Expand Down Expand Up @@ -1057,6 +1072,7 @@ const struct proto_ops inet_stream_ops = {
#ifdef CONFIG_MMU
.mmap = tcp_mmap,
#endif
.splice_eof = inet_splice_eof,
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
Expand Down Expand Up @@ -1091,6 +1107,7 @@ const struct proto_ops inet_dgram_ops = {
.read_skb = udp_read_skb,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.splice_eof = inet_splice_eof,
.sendpage = inet_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
Expand Down Expand Up @@ -1122,6 +1139,7 @@ static const struct proto_ops inet_sockraw_ops = {
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.splice_eof = inet_splice_eof,
.sendpage = inet_sendpage,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet_compat_ioctl,
Expand Down
16 changes: 16 additions & 0 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1492,6 +1492,22 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
EXPORT_SYMBOL(tcp_sendmsg);

void tcp_splice_eof(struct socket *sock)
{
struct sock *sk = sock->sk;
struct tcp_sock *tp = tcp_sk(sk);
int mss_now, size_goal;

if (!tcp_write_queue_tail(sk))
return;

lock_sock(sk);
mss_now = tcp_send_mss(sk, &size_goal, 0);
tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
release_sock(sk);
}
EXPORT_SYMBOL_GPL(tcp_splice_eof);

/*
* Handle reading urgent data. BSD has very simple semantics for
* this, no blocking and very strange errors 8)
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -3067,6 +3067,7 @@ struct proto tcp_prot = {
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
.splice_eof = tcp_splice_eof,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
Expand Down
16 changes: 16 additions & 0 deletions net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1332,6 +1332,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
EXPORT_SYMBOL(udp_sendmsg);

void udp_splice_eof(struct socket *sock)
{
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);

if (!up->pending || READ_ONCE(up->corkflag))
return;

lock_sock(sk);
if (up->pending && !READ_ONCE(up->corkflag))
udp_push_pending_frames(sk);
release_sock(sk);
}
EXPORT_SYMBOL_GPL(udp_splice_eof);

int udp_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
Expand Down Expand Up @@ -2907,6 +2922,7 @@ struct proto udp_prot = {
.getsockopt = udp_getsockopt,
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.splice_eof = udp_splice_eof,
.sendpage = udp_sendpage,
.release_cb = ip4_datagram_release_cb,
.hash = udp_lib_hash,
Expand Down
1 change: 1 addition & 0 deletions net/ipv6/af_inet6.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,7 @@ const struct proto_ops inet6_stream_ops = {
#ifdef CONFIG_MMU
.mmap = tcp_mmap,
#endif
.splice_eof = inet_splice_eof,
.sendpage = inet_sendpage,
.sendmsg_locked = tcp_sendmsg_locked,
.sendpage_locked = tcp_sendpage_locked,
Expand Down
1 change: 1 addition & 0 deletions net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -2158,6 +2158,7 @@ struct proto tcpv6_prot = {
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
.splice_eof = tcp_splice_eof,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
Expand Down
15 changes: 15 additions & 0 deletions net/ipv6/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1657,6 +1657,20 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}

static void udpv6_splice_eof(struct socket *sock)
{
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);

if (!up->pending || READ_ONCE(up->corkflag))
return;

lock_sock(sk);
if (up->pending && !READ_ONCE(up->corkflag))
udp_v6_push_pending_frames(sk);
release_sock(sk);
}

void udpv6_destroy_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
Expand Down Expand Up @@ -1768,6 +1782,7 @@ struct proto udpv6_prot = {
.getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
.splice_eof = udpv6_splice_eof,
.release_cb = ip6_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
Expand Down

0 comments on commit 2489502

Please sign in to comment.