Skip to content
This repository has been archived by the owner on Aug 21, 2018. It is now read-only.

Commit

Permalink
MFC r286227, r286443:
Browse files Browse the repository at this point in the history
r286227:

Decompose TCP INP_INFO lock to increase short-lived TCP connections scalability:

- The existing TCP INP_INFO lock continues to protect the global inpcb list
  stability during full list traversal (e.g. tcp_pcblist()).

- A new INP_LIST lock protects inpcb list actual modifications (inp allocation
  and free) and inpcb global counters.

It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input())
and INP_INFO_WLOCK only in occasional operations that walk all connections.

PR:			183659
Differential Revision:	https://reviews.freebsd.org/D2599
Reviewed by:		jhb, adrian
Tested by:		adrian, nitroboost-gmail.com
Sponsored by:		Verisign, Inc.

r286443:

Fix a kernel assertion issue introduced with r286227:
Avoid too strict INP_INFO_RLOCK_ASSERT checks due to
tcp_notify() being called from in6_pcbnotify().

Reported by:	Larry Rosenman <ler@lerctr.org>
Submitted by:	markj, jch
  • Loading branch information
jcharbon committed Jul 13, 2016
1 parent a74567e commit 787ff2d
Show file tree
Hide file tree
Showing 15 changed files with 341 additions and 250 deletions.
30 changes: 15 additions & 15 deletions sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
Expand Up @@ -638,7 +638,7 @@ t3_send_fin(struct toedev *tod, struct tcpcb *tp)
unsigned int tid = toep->tp_tid;
#endif

INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);

CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
Expand Down Expand Up @@ -924,12 +924,12 @@ do_act_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)

rc = act_open_rpl_status_to_errno(s);
if (rc != EAGAIN)
INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
toe_connect_failed(tod, inp, rc);
toepcb_release(toep); /* unlocks inp */
if (rc != EAGAIN)
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

m_freem(m);
return (0);
Expand Down Expand Up @@ -1060,7 +1060,7 @@ send_reset(struct toepcb *toep)
struct adapter *sc = tod->tod_softc;
struct mbuf *m;

INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);

CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
Expand Down Expand Up @@ -1171,12 +1171,12 @@ do_rx_data(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
SOCKBUF_UNLOCK(so_rcv);
INP_WUNLOCK(inp);

INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

m_freem(m);
return (0);
Expand Down Expand Up @@ -1221,7 +1221,7 @@ do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
struct tcpcb *tp;
struct socket *so;

INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);

Expand Down Expand Up @@ -1249,7 +1249,7 @@ do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
case TCPS_FIN_WAIT_2:
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

INP_WLOCK(inp);
toepcb_release(toep); /* no more CPLs expected */
Expand All @@ -1263,7 +1263,7 @@ do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)

done:
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

m_freem(m);
return (0);
Expand All @@ -1284,7 +1284,7 @@ do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
struct tcpcb *tp;
struct socket *so;

INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);

Expand All @@ -1302,7 +1302,7 @@ do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
tcp_twstart(tp);
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

INP_WLOCK(inp);
toepcb_release(toep); /* no more CPLs expected */
Expand All @@ -1327,7 +1327,7 @@ do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)

done:
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

m_freem(m);
return (0);
Expand Down Expand Up @@ -1488,7 +1488,7 @@ do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
return (do_abort_req_synqe(qs, r, m));

inp = toep->tp_inp;
INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */
INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
INP_WLOCK(inp);

tp = intotcpcb(inp);
Expand All @@ -1502,7 +1502,7 @@ do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
toep->tp_flags |= TP_ABORT_REQ_RCVD;
toep->tp_flags |= TP_ABORT_SHUTDOWN;
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
}
Expand All @@ -1522,7 +1522,7 @@ do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
INP_WLOCK(inp); /* re-acquire */
toepcb_release(toep); /* no more CPLs expected */
}
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

send_abort_rpl(tod, tid, qset);
m_freem(m);
Expand Down
28 changes: 15 additions & 13 deletions sys/dev/cxgb/ulp/tom/cxgb_listen.c
Expand Up @@ -553,11 +553,11 @@ do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */
}

INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);

/* Don't offload if the 4-tuple is already in use */
if (toe_4tuple_check(&inc, &th, ifp) != 0) {
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
REJECT_PASS_ACCEPT();
}

Expand All @@ -570,7 +570,7 @@ do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
* resources tied to this listen context.
*/
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
REJECT_PASS_ACCEPT();
}
so = inp->inp_socket;
Expand Down Expand Up @@ -698,7 +698,7 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
struct toepcb *toep;
struct socket *so;
struct listen_ctx *lctx = synqe->lctx;
struct inpcb *inp = lctx->inp;
struct inpcb *inp = lctx->inp, *new_inp;
struct tcpopt to;
struct tcphdr th;
struct in_conninfo inc;
Expand All @@ -712,7 +712,7 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
KASSERT(qs->idx == synqe->qset,
("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));

INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */
INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
INP_WLOCK(inp);

if (__predict_false(inp->inp_flags & INP_DROPPED)) {
Expand All @@ -726,7 +726,7 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
("%s: listen socket dropped but tid %u not aborted.",
__func__, tid));
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
}
Expand All @@ -742,7 +742,7 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
reset:
t3_send_reset_synqe(tod, synqe);
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
}
Expand All @@ -760,21 +760,23 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
goto reset;
}

if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
struct inpcb *new_inp = sotoinpcb(so);
/* New connection inpcb is already locked by syncache_expand(). */
new_inp = sotoinpcb(so);
INP_WLOCK_ASSERT(new_inp);

INP_WLOCK(new_inp);
if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
t3_offload_socket(tod, synqe, so);
INP_WUNLOCK(new_inp);
}

INP_WUNLOCK(new_inp);

/* Remove the synq entry and release its reference on the lctx */
TAILQ_REMOVE(&lctx->synq, synqe, link);
inp = release_lctx(td, lctx);
if (inp)
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
release_synqe(synqe);

m_freem(m);
Expand Down Expand Up @@ -1140,7 +1142,7 @@ t3_offload_socket(struct toedev *tod, void *arg, struct socket *so)
struct cpl_pass_establish *cpl = synqe->cpl;
struct toepcb *toep = synqe->toep;

INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
INP_WLOCK_ASSERT(inp);

offload_socket(so, toep);
Expand Down
4 changes: 2 additions & 2 deletions sys/dev/cxgbe/tom/t4_connect.c
Expand Up @@ -189,12 +189,12 @@ act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
toep->tid = -1;

if (status != EAGAIN)
INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
toe_connect_failed(tod, inp, status);
final_cpl_received(toep); /* unlocks inp */
if (status != EAGAIN)
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
}

static int
Expand Down
20 changes: 10 additions & 10 deletions sys/dev/cxgbe/tom/t4_cpl_io.c
Expand Up @@ -1085,7 +1085,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)

KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);

Expand Down Expand Up @@ -1139,7 +1139,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
case TCPS_FIN_WAIT_2:
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

INP_WLOCK(inp);
final_cpl_received(toep);
Expand All @@ -1151,7 +1151,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
}
done:
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}

Expand All @@ -1178,7 +1178,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);

Expand All @@ -1196,7 +1196,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
tcp_twstart(tp);
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

INP_WLOCK(inp);
final_cpl_received(toep); /* no more CPLs expected */
Expand All @@ -1220,7 +1220,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
}
done:
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}

Expand Down Expand Up @@ -1379,7 +1379,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
}

inp = toep->inp;
INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */
INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
INP_WLOCK(inp);

tp = intotcpcb(inp);
Expand Down Expand Up @@ -1413,7 +1413,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)

final_cpl_received(toep);
done:
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
return (0);
}
Expand Down Expand Up @@ -1527,12 +1527,12 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);

INP_INFO_WLOCK(&V_tcbinfo);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_INFO_RUNLOCK(&V_tcbinfo);

return (0);
}
Expand Down

0 comments on commit 787ff2d

Please sign in to comment.