Skip to content

Commit

Permalink
tcp: tracking packets with CE marks in BW rate sample
Browse files Browse the repository at this point in the history
In order to track CE marks per rate sample (one round trip), TCP needs a
per-skb header field to record the tp->delivered_ce count when the skb
was sent. To make space, we replace the "last_in_flight" field which is
used exclusively for NV congestion control. The stat needed by NV can be
alternatively approximated by existing stats tcp_sock delivered and
mss_cache.

This patch counts the number of packets delivered which have CE marks in
the rate sample, using similar approach of delivery accounting.

Cc: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Luke Hsiao <lukehsiao@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
yuchungcheng authored and davem330 committed Sep 24, 2021
1 parent ae98f40 commit 40bc606
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 11 deletions.
9 changes: 6 additions & 3 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -874,10 +874,11 @@ struct tcp_skb_cb {
__u32 ack_seq; /* Sequence number ACK'd */
union {
struct {
#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
/* There is space for up to 24 bytes */
__u32 in_flight:30,/* Bytes in flight at transmit */
is_app_limited:1, /* cwnd not fully used? */
unused:1;
__u32 is_app_limited:1, /* cwnd not fully used? */
delivered_ce:20,
unused:11;
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
Expand Down Expand Up @@ -1029,7 +1030,9 @@ struct ack_sample {
struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
s32 delivered; /* number of packets delivered over interval */
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
long interval_us; /* time for tp->delivered to incr "delivered" */
u32 snd_interval_us; /* snd interval for delivered packets */
u32 rcv_interval_us; /* rcv interval for delivered packets */
Expand Down
11 changes: 5 additions & 6 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -3221,7 +3221,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
long seq_rtt_us = -1L;
long ca_rtt_us = -1L;
u32 pkts_acked = 0;
u32 last_in_flight = 0;
bool rtt_update;
int flag = 0;

Expand Down Expand Up @@ -3257,7 +3256,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
if (!first_ackt)
first_ackt = last_ackt;

last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
if (before(start_seq, reord))
reord = start_seq;
if (!after(scb->end_seq, tp->high_seq))
Expand Down Expand Up @@ -3323,8 +3321,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);

if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
last_in_flight && !prior_sacked && fully_acked &&
if (pkts_acked == 1 && fully_acked && !prior_sacked &&
(tp->snd_una - prior_snd_una) < tp->mss_cache &&
sack->rate->prior_delivered + 1 == tp->delivered &&
!(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
/* Conservatively mark a delayed ACK. It's typically
Expand Down Expand Up @@ -3381,9 +3379,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,

if (icsk->icsk_ca_ops->pkts_acked) {
struct ack_sample sample = { .pkts_acked = pkts_acked,
.rtt_us = sack->rate->rtt_us,
.in_flight = last_in_flight };
.rtt_us = sack->rate->rtt_us };

sample.in_flight = tp->mss_cache *
(tp->delivered - sack->rate->prior_delivered);
icsk->icsk_ca_ops->pkts_acked(sk, &sample);
}

Expand Down
2 changes: 0 additions & 2 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1256,8 +1256,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (clone_it) {
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
oskb = skb;

tcp_skb_tsorted_save(oskb) {
Expand Down
6 changes: 6 additions & 0 deletions net/ipv4/tcp_rate.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp;
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
}

Expand All @@ -86,6 +87,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,

if (!rs->prior_delivered ||
after(scb->tx.delivered, rs->prior_delivered)) {
rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
Expand Down Expand Up @@ -138,6 +140,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
}
rs->delivered = tp->delivered - rs->prior_delivered;

rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
/* delivered_ce occupies less than 32 bits in the skb control block */
rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;

/* Model sending data and receiving ACKs as separate pipeline phases
* for a window. Usually the ACK phase is longer, but with ACK
* compression the send phase can be longer. To be safe we use the
Expand Down

0 comments on commit 40bc606

Please sign in to comment.