Skip to content

Commit

Permalink
net-tcp_bbr: v2: Remove BBRv2 tcp congestion control patchset
Browse files Browse the repository at this point in the history
This reverts commit 2edceed...6cb3f4f

Signed-off-by: Alexandre Frade <kernel@xanmod.org>
  • Loading branch information
xanmod committed Aug 14, 2023
1 parent cc0a5da commit fc7f1e4
Show file tree
Hide file tree
Showing 21 changed files with 35 additions and 3,121 deletions.
58 changes: 0 additions & 58 deletions Documentation/networking/ip-sysctl.rst
Expand Up @@ -1085,64 +1085,6 @@ tcp_child_ehash_entries - INTEGER

Default: 0

tcp_plb_enabled - BOOLEAN
If set, TCP PLB (Protective Load Balancing) is enabled. PLB is
described in the following paper:
https://doi.org/10.1145/3544216.3544226. Based on PLB parameters,
upon sensing sustained congestion, TCP triggers a change in
flow label field for outgoing IPv6 packets. A change in flow label
field potentially changes the path of outgoing packets for switches
that use ECMP/WCMP for routing.

Default: 0

tcp_plb_cong_thresh - INTEGER
Fraction of packets marked with congestion over a round (RTT) to
tag that round as congested. This is referred to as K in the PLB paper:
https://doi.org/10.1145/3544216.3544226.

The 0-1 fraction range is mapped to 0-256 range to avoid floating
point operations. For example, 128 means that if at least 50% of
the packets in a round were marked as congested then the round
will be tagged as congested.

Possible Values: 0 - 256

Default: 128

tcp_plb_idle_rehash_rounds - INTEGER
Number of consecutive congested rounds (RTT) seen after which
a rehash can be performed, given there are no packets in flight.
This is referred to as M in PLB paper:
https://doi.org/10.1145/3544216.3544226.

Possible Values: 0 - 31

Default: 3

tcp_plb_rehash_rounds - INTEGER
Number of consecutive congested rounds (RTT) seen after which
a forced rehash can be performed. Be careful when setting this
parameter, as a small value increases the risk of retransmissions.
This is referred to as N in PLB paper:
https://doi.org/10.1145/3544216.3544226.

Possible Values: 0 - 31

Default: 12

tcp_plb_suspend_rto_sec - INTEGER
Time, in seconds, to suspend PLB in event of an RTO. In order to avoid
having PLB repath onto a connectivity "black hole", after an RTO a TCP
connection suspends PLB repathing for a random duration between 1x and
2x of this parameter. Randomness is added to avoid concurrent rehashing
of multiple TCP connections. This should be set corresponding to the
amount of time it takes to repair a failed link.

Possible Values: 0 - 255

Default: 60

UDP variables
=============

Expand Down
6 changes: 1 addition & 5 deletions include/linux/tcp.h
Expand Up @@ -255,8 +255,7 @@ struct tcp_sock {
u8 compressed_ack;
u8 dup_ack_counter:2,
tlp_retrans:1, /* TLP is a retransmission */
fast_ack_mode:2, /* which fast ack mode ? */
unused:3;
unused:5;
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
u8 chrono_type:2, /* current chronograph type */
Expand Down Expand Up @@ -450,9 +449,6 @@ struct tcp_sock {
*/
struct request_sock __rcu *fastopen_rsk;
struct saved_syn *saved_syn;

/* Rerouting information */
u16 ecn_rehash; /* PLB triggered rehash attempts */
};

enum tsq_enum {
Expand Down
3 changes: 1 addition & 2 deletions include/net/inet_connection_sock.h
Expand Up @@ -135,8 +135,7 @@ struct inet_connection_sock {
u32 icsk_probes_tstamp;
u32 icsk_user_timeout;

/* XXX inflated by temporary internal debugging info */
u64 icsk_ca_priv[224 / sizeof(u64)];
u64 icsk_ca_priv[104 / sizeof(u64)];
#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv)
};

Expand Down
5 changes: 0 additions & 5 deletions include/net/netns/ipv4.h
Expand Up @@ -183,11 +183,6 @@ struct netns_ipv4 {
unsigned long tfo_active_disable_stamp;
u32 tcp_challenge_timestamp;
u32 tcp_challenge_count;
u8 sysctl_tcp_plb_enabled;
int sysctl_tcp_plb_cong_thresh;
u8 sysctl_tcp_plb_idle_rehash_rounds;
u8 sysctl_tcp_plb_rehash_rounds;
u8 sysctl_tcp_plb_suspend_rto_sec;

int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
Expand Down
58 changes: 5 additions & 53 deletions include/net/tcp.h
Expand Up @@ -375,7 +375,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
#define TCP_ECN_ECT_PERMANENT 16

enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
Expand Down Expand Up @@ -825,11 +824,6 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
return max_t(s64, t1 - t0, 0);
}

static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
{
return max_t(s32, t1 - t0, 0);
}

static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
{
return tcp_ns_to_ts(skb->skb_mstamp_ns);
Expand Down Expand Up @@ -905,14 +899,9 @@ struct tcp_skb_cb {
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
u32 first_tx_mstamp;
u64 first_tx_mstamp;
/* when we reached the "delivered" count */
u32 delivered_mstamp;
#define TCPCB_IN_FLIGHT_BITS 20
#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
u32 in_flight:20, /* packets in flight at transmit */
unused2:12;
u32 lost; /* packets lost so far upon tx of skb */
u64 delivered_mstamp;
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
Expand Down Expand Up @@ -1038,11 +1027,7 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NON_RESTRICTED 0x1
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN 0x2
/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
#define TCP_CONG_WANTS_CE_EVENTS 0x4
#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \
TCP_CONG_NEEDS_ECN | \
TCP_CONG_WANTS_CE_EVENTS)
#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)

union tcp_cc_info;

Expand All @@ -1062,11 +1047,8 @@ struct ack_sample {
*/
struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_lost; /* tp->lost at "prior_mstamp" */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
u32 tx_in_flight; /* packets in flight at starting timestamp */
s32 lost; /* number of packets lost over interval */
s32 delivered; /* number of packets delivered over interval */
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
long interval_us; /* time for tp->delivered to incr "delivered" */
Expand All @@ -1080,7 +1062,6 @@ struct rate_sample {
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
bool is_ece; /* did this ACK have ECN marked? */
};

struct tcp_congestion_ops {
Expand All @@ -1104,11 +1085,8 @@ struct tcp_congestion_ops {
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);

/* pick target number of segments per TSO/GSO skb (optional): */
u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);

/* react to a specific lost skb (optional) */
void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
/* override sysctl_tcp_min_tso_segs */
u32 (*min_tso_segs)(struct sock *sk);

/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
Expand Down Expand Up @@ -1171,14 +1149,6 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
}
#endif

static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);

return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
TCP_CONG_WANTS_CE_EVENTS);
}

static inline bool tcp_ca_needs_ecn(const struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
Expand All @@ -1198,7 +1168,6 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
void tcp_set_ca_state(struct sock *sk, const u8 ca_state);

/* From tcp_rate.c */
void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs);
Expand Down Expand Up @@ -2191,23 +2160,6 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
extern void tcp_rack_reo_timeout(struct sock *sk);
extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);

/* tcp_plb.c */

#define TCP_PLB_SCALE 8 /* scaling factor for fractions in PLB (e.g. ce_ratio) */

/* State for PLB (Protective Load Balancing) for a single TCP connection. */
struct tcp_plb_state {
u8 consec_cong_rounds:5, /* consecutive congested rounds */
enabled:1, /* Check if PLB is enabled */
unused:2;
u32 pause_until; /* jiffies32 when PLB can resume repathing */
};

void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb,
const int cong_ratio);
void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb);
void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb);

/* At how many usecs into the future should the RTO fire? */
static inline s64 tcp_rto_delta_us(const struct sock *sk)
{
Expand Down
33 changes: 0 additions & 33 deletions include/uapi/linux/inet_diag.h
Expand Up @@ -231,42 +231,9 @@ struct tcp_bbr_info {
__u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */
};

/* Phase as reported in netlink/ss stats. */
enum tcp_bbr2_phase {
BBR2_PHASE_INVALID = 0,
BBR2_PHASE_STARTUP = 1,
BBR2_PHASE_DRAIN = 2,
BBR2_PHASE_PROBE_RTT = 3,
BBR2_PHASE_PROBE_BW_UP = 4,
BBR2_PHASE_PROBE_BW_DOWN = 5,
BBR2_PHASE_PROBE_BW_CRUISE = 6,
BBR2_PHASE_PROBE_BW_REFILL = 7
};

struct tcp_bbr2_info {
/* u64 bw: bandwidth (app throughput) estimate in Byte per sec: */
__u32 bbr_bw_lsb; /* lower 32 bits of bw */
__u32 bbr_bw_msb; /* upper 32 bits of bw */
__u32 bbr_min_rtt; /* min-filtered RTT in uSec */
__u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */
__u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */
__u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */
__u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */
__u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */
__u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */
__u8 bbr_mode; /* current bbr_mode in state machine */
__u8 bbr_phase; /* current state machine phase */
__u8 unused1; /* alignment padding; not used yet */
__u8 bbr_version; /* MUST be at this offset in struct */
__u32 bbr_inflight_lo; /* lower/short-term data volume bound */
__u32 bbr_inflight_hi; /* higher/long-term data volume bound */
__u32 bbr_extra_acked; /* max excess packets ACKed in epoch */
};

union tcp_cc_info {
struct tcpvegas_info vegas;
struct tcp_dctcp_info dctcp;
struct tcp_bbr_info bbr;
struct tcp_bbr2_info bbr2;
};
#endif /* _UAPI_INET_DIAG_H_ */
1 change: 0 additions & 1 deletion include/uapi/linux/snmp.h
Expand Up @@ -292,7 +292,6 @@ enum
LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */
LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */
LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */
LINUX_MIB_TCPECNREHASH, /* TCPECNRehash */
__LINUX_MIB_MAX
};

Expand Down
22 changes: 0 additions & 22 deletions net/ipv4/Kconfig
Expand Up @@ -678,24 +678,6 @@ config TCP_CONG_BBR
AQM schemes that do not provide a delay signal. It requires the fq
("Fair Queue") pacing packet scheduler.

config TCP_CONG_BBR2
tristate "BBR2 TCP"
default n
help

BBR2 TCP congestion control is a model-based congestion control
algorithm that aims to maximize network utilization, keep queues and
retransmit rates low, and to be able to coexist with Reno/CUBIC in
common scenarios. It builds an explicit model of the network path. It
tolerates a targeted degree of random packet loss and delay that are
unrelated to congestion. It can operate over LAN, WAN, cellular, wifi,
or cable modem links, and can use DCTCP-L4S-style ECN signals. It can
coexist with flows that use loss-based congestion control, and can
operate with shallow buffers, deep buffers, bufferbloat, policers, or
AQM schemes that do not provide a delay signal. It requires pacing,
using either TCP internal pacing or the fq ("Fair Queue") pacing packet
scheduler.

choice
prompt "Default TCP congestion control"
default DEFAULT_CUBIC
Expand Down Expand Up @@ -733,9 +715,6 @@ choice
config DEFAULT_BBR
bool "BBR" if TCP_CONG_BBR=y

config DEFAULT_BBR2
bool "BBR2" if TCP_CONG_BBR2=y

config DEFAULT_RENO
bool "Reno"
endchoice
Expand All @@ -760,7 +739,6 @@ config DEFAULT_TCP_CONG
default "dctcp" if DEFAULT_DCTCP
default "cdg" if DEFAULT_CDG
default "bbr" if DEFAULT_BBR
default "bbr2" if DEFAULT_BBR2
default "cubic"

config TCP_MD5SIG
Expand Down
3 changes: 1 addition & 2 deletions net/ipv4/Makefile
Expand Up @@ -10,7 +10,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
tcp_rate.o tcp_recovery.o tcp_ulp.o \
tcp_offload.o tcp_plb.o datagram.o raw.o udp.o udplite.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
Expand Down Expand Up @@ -46,7 +46,6 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
obj-$(CONFIG_TCP_CONG_BBR2) += tcp_bbr2.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
Expand Down
1 change: 0 additions & 1 deletion net/ipv4/proc.c
Expand Up @@ -297,7 +297,6 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
SNMP_MIB_ITEM("TCPECNRehash", LINUX_MIB_TCPECNREHASH),
SNMP_MIB_SENTINEL
};

Expand Down

0 comments on commit fc7f1e4

Please sign in to comment.