Skip to content

Commit

Permalink
SUNRPC: Handle TCP socket sends with kernel_sendpage() again
Browse files Browse the repository at this point in the history
[ Upstream commit 4a85a6a ]

Daire Byrne reports a ~50% aggregrate throughput regression on his
Linux NFS server after commit da1661b ("SUNRPC: Teach server to
use xprt_sock_sendmsg for socket sends"), which replaced
kernel_send_page() calls in NFSD's socket send path with calls to
sock_sendmsg() using iov_iter.

Investigation showed that tcp_sendmsg() was not using zero-copy to
send the xdr_buf's bvec pages, but instead was relying on memcpy.
This means copying every byte of a large NFS READ payload.

It looks like TLS sockets do indeed support a ->sendpage method,
so it's really not necessary to use xprt_sock_sendmsg() to support
TLS fully on the server. A mechanical reversion of da1661b is
not possible at this point, but we can re-implement the server's
TCP socket sendmsg path using kernel_sendpage().

Reported-by: Daire Byrne <daire@dneg.com>
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209439
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
chucklever authored and gregkh committed Jan 27, 2021
1 parent ae3e2f3 commit 00ee972
Showing 1 changed file with 85 additions and 1 deletion.
86 changes: 85 additions & 1 deletion net/sunrpc/svcsock.c
Expand Up @@ -1062,6 +1062,90 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
return 0; /* record not complete */
}

static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
int flags)
{
return kernel_sendpage(sock, virt_to_page(vec->iov_base),
offset_in_page(vec->iov_base),
vec->iov_len, flags);
}

/*
* kernel_sendpage() is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
*
* In addition, the logic assumes that * .bv_len is never larger
* than PAGE_SIZE.
*/
static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg,
struct xdr_buf *xdr, rpc_fraghdr marker,
unsigned int *sentp)
{
const struct kvec *head = xdr->head;
const struct kvec *tail = xdr->tail;
struct kvec rm = {
.iov_base = &marker,
.iov_len = sizeof(marker),
};
int flags, ret;

*sentp = 0;
xdr_alloc_bvec(xdr, GFP_KERNEL);

msg->msg_flags = MSG_MORE;
ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len);
if (ret < 0)
return ret;
*sentp += ret;
if (ret != rm.iov_len)
return -EAGAIN;

flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0;
ret = svc_tcp_send_kvec(sock, head, flags);
if (ret < 0)
return ret;
*sentp += ret;
if (ret != head->iov_len)
goto out;

if (xdr->page_len) {
unsigned int offset, len, remaining;
struct bio_vec *bvec;

bvec = xdr->bvec;
offset = xdr->page_base;
remaining = xdr->page_len;
flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
while (remaining > 0) {
if (remaining <= PAGE_SIZE && tail->iov_len == 0)
flags = 0;
len = min(remaining, bvec->bv_len);
ret = kernel_sendpage(sock, bvec->bv_page,
bvec->bv_offset + offset,
len, flags);
if (ret < 0)
return ret;
*sentp += ret;
if (ret != len)
goto out;
remaining -= len;
offset = 0;
bvec++;
}
}

if (tail->iov_len) {
ret = svc_tcp_send_kvec(sock, tail, 0);
if (ret < 0)
return ret;
*sentp += ret;
}

out:
return 0;
}

/**
* svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
Expand Down Expand Up @@ -1089,7 +1173,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent);
xdr_free_bvec(xdr);
trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
Expand Down

0 comments on commit 00ee972

Please sign in to comment.