Skip to content
This repository was archived by the owner on Aug 21, 2018. It is now read-only.

Commit 787ff2d

Browse files
committed
MFC r286227, r286443:
r286227: Decompose TCP INP_INFO lock to increase short-lived TCP connections scalability: - The existing TCP INP_INFO lock continues to protect the global inpcb list stability during full list traversal (e.g. tcp_pcblist()). - A new INP_LIST lock protects inpcb list actual modifications (inp allocation and free) and inpcb global counters. It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input()) and INP_INFO_WLOCK only in occasional operations that walk all connections. PR: 183659 Differential Revision: https://reviews.freebsd.org/D2599 Reviewed by: jhb, adrian Tested by: adrian, nitroboost-gmail.com Sponsored by: Verisign, Inc. r286443: Fix a kernel assertion issue introduced with r286227: Avoid too strict INP_INFO_RLOCK_ASSERT checks due to tcp_notify() being called from in6_pcbnotify(). Reported by: Larry Rosenman <ler@lerctr.org> Submitted by: markj, jch
1 parent a74567e commit 787ff2d

15 files changed

+341
-250
lines changed

sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ t3_send_fin(struct toedev *tod, struct tcpcb *tp)
638638
unsigned int tid = toep->tp_tid;
639639
#endif
640640

641-
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
641+
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
642642
INP_WLOCK_ASSERT(inp);
643643

644644
CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
@@ -924,12 +924,12 @@ do_act_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
924924

925925
rc = act_open_rpl_status_to_errno(s);
926926
if (rc != EAGAIN)
927-
INP_INFO_WLOCK(&V_tcbinfo);
927+
INP_INFO_RLOCK(&V_tcbinfo);
928928
INP_WLOCK(inp);
929929
toe_connect_failed(tod, inp, rc);
930930
toepcb_release(toep); /* unlocks inp */
931931
if (rc != EAGAIN)
932-
INP_INFO_WUNLOCK(&V_tcbinfo);
932+
INP_INFO_RUNLOCK(&V_tcbinfo);
933933

934934
m_freem(m);
935935
return (0);
@@ -1060,7 +1060,7 @@ send_reset(struct toepcb *toep)
10601060
struct adapter *sc = tod->tod_softc;
10611061
struct mbuf *m;
10621062

1063-
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1063+
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
10641064
INP_WLOCK_ASSERT(inp);
10651065

10661066
CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
@@ -1171,12 +1171,12 @@ do_rx_data(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
11711171
SOCKBUF_UNLOCK(so_rcv);
11721172
INP_WUNLOCK(inp);
11731173

1174-
INP_INFO_WLOCK(&V_tcbinfo);
1174+
INP_INFO_RLOCK(&V_tcbinfo);
11751175
INP_WLOCK(inp);
11761176
tp = tcp_drop(tp, ECONNRESET);
11771177
if (tp)
11781178
INP_WUNLOCK(inp);
1179-
INP_INFO_WUNLOCK(&V_tcbinfo);
1179+
INP_INFO_RUNLOCK(&V_tcbinfo);
11801180

11811181
m_freem(m);
11821182
return (0);
@@ -1221,7 +1221,7 @@ do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
12211221
struct tcpcb *tp;
12221222
struct socket *so;
12231223

1224-
INP_INFO_WLOCK(&V_tcbinfo);
1224+
INP_INFO_RLOCK(&V_tcbinfo);
12251225
INP_WLOCK(inp);
12261226
tp = intotcpcb(inp);
12271227

@@ -1249,7 +1249,7 @@ do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
12491249
case TCPS_FIN_WAIT_2:
12501250
tcp_twstart(tp);
12511251
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
1252-
INP_INFO_WUNLOCK(&V_tcbinfo);
1252+
INP_INFO_RUNLOCK(&V_tcbinfo);
12531253

12541254
INP_WLOCK(inp);
12551255
toepcb_release(toep); /* no more CPLs expected */
@@ -1263,7 +1263,7 @@ do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
12631263

12641264
done:
12651265
INP_WUNLOCK(inp);
1266-
INP_INFO_WUNLOCK(&V_tcbinfo);
1266+
INP_INFO_RUNLOCK(&V_tcbinfo);
12671267

12681268
m_freem(m);
12691269
return (0);
@@ -1284,7 +1284,7 @@ do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
12841284
struct tcpcb *tp;
12851285
struct socket *so;
12861286

1287-
INP_INFO_WLOCK(&V_tcbinfo);
1287+
INP_INFO_RLOCK(&V_tcbinfo);
12881288
INP_WLOCK(inp);
12891289
tp = intotcpcb(inp);
12901290

@@ -1302,7 +1302,7 @@ do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
13021302
tcp_twstart(tp);
13031303
release:
13041304
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
1305-
INP_INFO_WUNLOCK(&V_tcbinfo);
1305+
INP_INFO_RUNLOCK(&V_tcbinfo);
13061306

13071307
INP_WLOCK(inp);
13081308
toepcb_release(toep); /* no more CPLs expected */
@@ -1327,7 +1327,7 @@ do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
13271327

13281328
done:
13291329
INP_WUNLOCK(inp);
1330-
INP_INFO_WUNLOCK(&V_tcbinfo);
1330+
INP_INFO_RUNLOCK(&V_tcbinfo);
13311331

13321332
m_freem(m);
13331333
return (0);
@@ -1488,7 +1488,7 @@ do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
14881488
return (do_abort_req_synqe(qs, r, m));
14891489

14901490
inp = toep->tp_inp;
1491-
INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */
1491+
INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
14921492
INP_WLOCK(inp);
14931493

14941494
tp = intotcpcb(inp);
@@ -1502,7 +1502,7 @@ do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
15021502
toep->tp_flags |= TP_ABORT_REQ_RCVD;
15031503
toep->tp_flags |= TP_ABORT_SHUTDOWN;
15041504
INP_WUNLOCK(inp);
1505-
INP_INFO_WUNLOCK(&V_tcbinfo);
1505+
INP_INFO_RUNLOCK(&V_tcbinfo);
15061506
m_freem(m);
15071507
return (0);
15081508
}
@@ -1522,7 +1522,7 @@ do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
15221522
INP_WLOCK(inp); /* re-acquire */
15231523
toepcb_release(toep); /* no more CPLs expected */
15241524
}
1525-
INP_INFO_WUNLOCK(&V_tcbinfo);
1525+
INP_INFO_RUNLOCK(&V_tcbinfo);
15261526

15271527
send_abort_rpl(tod, tid, qset);
15281528
m_freem(m);

sys/dev/cxgb/ulp/tom/cxgb_listen.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -553,11 +553,11 @@ do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
553553
REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */
554554
}
555555

556-
INP_INFO_WLOCK(&V_tcbinfo);
556+
INP_INFO_RLOCK(&V_tcbinfo);
557557

558558
/* Don't offload if the 4-tuple is already in use */
559559
if (toe_4tuple_check(&inc, &th, ifp) != 0) {
560-
INP_INFO_WUNLOCK(&V_tcbinfo);
560+
INP_INFO_RUNLOCK(&V_tcbinfo);
561561
REJECT_PASS_ACCEPT();
562562
}
563563

@@ -570,7 +570,7 @@ do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
570570
* resources tied to this listen context.
571571
*/
572572
INP_WUNLOCK(inp);
573-
INP_INFO_WUNLOCK(&V_tcbinfo);
573+
INP_INFO_RUNLOCK(&V_tcbinfo);
574574
REJECT_PASS_ACCEPT();
575575
}
576576
so = inp->inp_socket;
@@ -698,7 +698,7 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
698698
struct toepcb *toep;
699699
struct socket *so;
700700
struct listen_ctx *lctx = synqe->lctx;
701-
struct inpcb *inp = lctx->inp;
701+
struct inpcb *inp = lctx->inp, *new_inp;
702702
struct tcpopt to;
703703
struct tcphdr th;
704704
struct in_conninfo inc;
@@ -712,7 +712,7 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
712712
KASSERT(qs->idx == synqe->qset,
713713
("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));
714714

715-
INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */
715+
INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
716716
INP_WLOCK(inp);
717717

718718
if (__predict_false(inp->inp_flags & INP_DROPPED)) {
@@ -726,7 +726,7 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
726726
("%s: listen socket dropped but tid %u not aborted.",
727727
__func__, tid));
728728
INP_WUNLOCK(inp);
729-
INP_INFO_WUNLOCK(&V_tcbinfo);
729+
INP_INFO_RUNLOCK(&V_tcbinfo);
730730
m_freem(m);
731731
return (0);
732732
}
@@ -742,7 +742,7 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
742742
reset:
743743
t3_send_reset_synqe(tod, synqe);
744744
INP_WUNLOCK(inp);
745-
INP_INFO_WUNLOCK(&V_tcbinfo);
745+
INP_INFO_RUNLOCK(&V_tcbinfo);
746746
m_freem(m);
747747
return (0);
748748
}
@@ -760,21 +760,23 @@ do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
760760
goto reset;
761761
}
762762

763-
if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
764-
struct inpcb *new_inp = sotoinpcb(so);
763+
/* New connection inpcb is already locked by syncache_expand(). */
764+
new_inp = sotoinpcb(so);
765+
INP_WLOCK_ASSERT(new_inp);
765766

766-
INP_WLOCK(new_inp);
767+
if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
767768
tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
768769
t3_offload_socket(tod, synqe, so);
769-
INP_WUNLOCK(new_inp);
770770
}
771771

772+
INP_WUNLOCK(new_inp);
773+
772774
/* Remove the synq entry and release its reference on the lctx */
773775
TAILQ_REMOVE(&lctx->synq, synqe, link);
774776
inp = release_lctx(td, lctx);
775777
if (inp)
776778
INP_WUNLOCK(inp);
777-
INP_INFO_WUNLOCK(&V_tcbinfo);
779+
INP_INFO_RUNLOCK(&V_tcbinfo);
778780
release_synqe(synqe);
779781

780782
m_freem(m);
@@ -1140,7 +1142,7 @@ t3_offload_socket(struct toedev *tod, void *arg, struct socket *so)
11401142
struct cpl_pass_establish *cpl = synqe->cpl;
11411143
struct toepcb *toep = synqe->toep;
11421144

1143-
INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
1145+
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
11441146
INP_WLOCK_ASSERT(inp);
11451147

11461148
offload_socket(so, toep);

sys/dev/cxgbe/tom/t4_connect.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,12 @@ act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
189189
toep->tid = -1;
190190

191191
if (status != EAGAIN)
192-
INP_INFO_WLOCK(&V_tcbinfo);
192+
INP_INFO_RLOCK(&V_tcbinfo);
193193
INP_WLOCK(inp);
194194
toe_connect_failed(tod, inp, status);
195195
final_cpl_received(toep); /* unlocks inp */
196196
if (status != EAGAIN)
197-
INP_INFO_WUNLOCK(&V_tcbinfo);
197+
INP_INFO_RUNLOCK(&V_tcbinfo);
198198
}
199199

200200
static int

sys/dev/cxgbe/tom/t4_cpl_io.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
10851085

10861086
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
10871087

1088-
INP_INFO_WLOCK(&V_tcbinfo);
1088+
INP_INFO_RLOCK(&V_tcbinfo);
10891089
INP_WLOCK(inp);
10901090
tp = intotcpcb(inp);
10911091

@@ -1139,7 +1139,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
11391139
case TCPS_FIN_WAIT_2:
11401140
tcp_twstart(tp);
11411141
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
1142-
INP_INFO_WUNLOCK(&V_tcbinfo);
1142+
INP_INFO_RUNLOCK(&V_tcbinfo);
11431143

11441144
INP_WLOCK(inp);
11451145
final_cpl_received(toep);
@@ -1151,7 +1151,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
11511151
}
11521152
done:
11531153
INP_WUNLOCK(inp);
1154-
INP_INFO_WUNLOCK(&V_tcbinfo);
1154+
INP_INFO_RUNLOCK(&V_tcbinfo);
11551155
return (0);
11561156
}
11571157

@@ -1178,7 +1178,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
11781178
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
11791179
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
11801180

1181-
INP_INFO_WLOCK(&V_tcbinfo);
1181+
INP_INFO_RLOCK(&V_tcbinfo);
11821182
INP_WLOCK(inp);
11831183
tp = intotcpcb(inp);
11841184

@@ -1196,7 +1196,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
11961196
tcp_twstart(tp);
11971197
release:
11981198
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
1199-
INP_INFO_WUNLOCK(&V_tcbinfo);
1199+
INP_INFO_RUNLOCK(&V_tcbinfo);
12001200

12011201
INP_WLOCK(inp);
12021202
final_cpl_received(toep); /* no more CPLs expected */
@@ -1220,7 +1220,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
12201220
}
12211221
done:
12221222
INP_WUNLOCK(inp);
1223-
INP_INFO_WUNLOCK(&V_tcbinfo);
1223+
INP_INFO_RUNLOCK(&V_tcbinfo);
12241224
return (0);
12251225
}
12261226

@@ -1379,7 +1379,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
13791379
}
13801380

13811381
inp = toep->inp;
1382-
INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */
1382+
INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
13831383
INP_WLOCK(inp);
13841384

13851385
tp = intotcpcb(inp);
@@ -1413,7 +1413,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
14131413

14141414
final_cpl_received(toep);
14151415
done:
1416-
INP_INFO_WUNLOCK(&V_tcbinfo);
1416+
INP_INFO_RUNLOCK(&V_tcbinfo);
14171417
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
14181418
return (0);
14191419
}
@@ -1527,12 +1527,12 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
15271527
SOCKBUF_UNLOCK(sb);
15281528
INP_WUNLOCK(inp);
15291529

1530-
INP_INFO_WLOCK(&V_tcbinfo);
1530+
INP_INFO_RLOCK(&V_tcbinfo);
15311531
INP_WLOCK(inp);
15321532
tp = tcp_drop(tp, ECONNRESET);
15331533
if (tp)
15341534
INP_WUNLOCK(inp);
1535-
INP_INFO_WUNLOCK(&V_tcbinfo);
1535+
INP_INFO_RUNLOCK(&V_tcbinfo);
15361536

15371537
return (0);
15381538
}

0 commit comments

Comments
 (0)