diff options
| author | David S. Miller <davem@nuts.davemloft.net> | 2004-09-06 19:20:50 -0700 |
|---|---|---|
| committer | Patrick McHardy <kaber@trash.net> | 2004-09-06 19:20:50 -0700 |
| commit | 14a1f44569619b2dfda526dc0f73b9bf0df74171 (patch) | |
| tree | f1200dfaf23a0013babab9fd458051b14d723ad1 /include | |
| parent | 10bc956350e6821a1a9757065962f1924649b12d (diff) | |
[TCP]: Make TSO play nice with congestion window.
Previously TSO would not abide by the congestion
window properly. Essentially, each TSO packet would
be trated just like 1 normal packet, even though a TSO
packet generates more than 1 normal packet. This
violates congestion window rules entirely.
So now we record the TSO factor, a count of how many
real packets a TSO packet will generate, and include
this in all the packet counting routines.
This initial version has a bug in that skb_entail() is
not the correct time to figure out the TSO factor for
the SKB, and tp->mss_tso_factor is not necessarily the
right value for a given SKB. Will fix this up next.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/tcp.h | 17 | ||||
| -rw-r--r-- | include/net/tcp.h | 115 |
2 files changed, 109 insertions, 23 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9c42ac0b0322..ebf15b6a8162 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -201,6 +201,10 @@ struct tcp_sack_block { __u32 end_seq; }; +typedef struct tcp_pcount { + __u32 val; +} tcp_pcount_t; + struct tcp_opt { int tcp_header_len; /* Bytes of tcp header to send */ @@ -250,6 +254,7 @@ struct tcp_opt { __u32 max_window; /* Maximal window ever seen from peer */ __u32 pmtu_cookie; /* Last pmtu seen by socket */ __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 mss_tso_factor; /* Real packets per TSO packet */ __u16 mss_cache_std; /* Like mss_cache, but without TSO */ __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ @@ -274,9 +279,9 @@ struct tcp_opt { __u32 rtt_seq; /* sequence number to update rttvar */ __u32 rto; /* retransmit timeout */ - __u32 packets_out; /* Packets which are "in flight" */ - __u32 left_out; /* Packets which leaved network */ - __u32 retrans_out; /* Retransmitted packets out */ + tcp_pcount_t packets_out; /* Packets which are "in flight" */ + tcp_pcount_t left_out; /* Packets which leaved network */ + tcp_pcount_t retrans_out; /* Retransmitted packets out */ /* @@ -337,9 +342,9 @@ struct tcp_opt { __u8 syn_retries; /* num of allowed syn retries */ __u8 ecn_flags; /* ECN status bits. */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ - __u32 lost_out; /* Lost packets */ - __u32 sacked_out; /* SACK'd packets */ - __u32 fackets_out; /* FACK'd packets */ + tcp_pcount_t lost_out; /* Lost packets */ + tcp_pcount_t sacked_out;/* SACK'd packets */ + tcp_pcount_t fackets_out;/* FACK'd packets */ __u32 high_seq; /* snd_nxt at onset of congestion */ __u32 retrans_stamp; /* Timestamp of the last retransmit, diff --git a/include/net/tcp.h b/include/net/tcp.h index a5be63c232e3..efda37b84207 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1047,13 +1047,18 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long * is not a big flaw. */ -static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large) +static inline unsigned int tcp_current_mss(struct sock *sk, int large, int *factor) { struct tcp_opt *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); - int mss_now = large && (sk->sk_route_caps & NETIF_F_TSO) && - !tp->urg_mode ? - tp->mss_cache : tp->mss_cache_std; + int do_large, mss_now; + + do_large = (large && + (sk->sk_route_caps & NETIF_F_TSO) && + !tp->urg_mode); + mss_now = do_large ? tp->mss_cache : tp->mss_cache_std; + if (factor) + *factor = do_large ? tp->mss_tso_factor : 1; if (dst) { u32 mtu = dst_pmtu(dst); @@ -1181,12 +1186,76 @@ struct tcp_skb_cb { __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ + __u32 tso_factor; }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) #include <net/tcp_ecn.h> +/* Due to TSO, an SKB can be composed of multiple actual + * packets. To keep these tracked properly, we use this. + */ +static inline int tcp_skb_pcount(struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->tso_factor; +} + +static inline void tcp_inc_pcount(tcp_pcount_t *count, struct sk_buff *skb) +{ + count->val += tcp_skb_pcount(skb); +} + +static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt) +{ + count->val += amt; +} + +static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt) +{ + count->val -= amt; +} + +static inline void tcp_dec_pcount(tcp_pcount_t *count, struct sk_buff *skb) +{ + count->val -= tcp_skb_pcount(skb); +} + +static inline void tcp_dec_pcount_approx(tcp_pcount_t *count, + struct sk_buff *skb) +{ + if (count->val) { + count->val -= tcp_skb_pcount(skb); + if ((int)count->val < 0) + count->val = 0; + } +} + +static inline __u32 tcp_get_pcount(tcp_pcount_t *count) +{ + return count->val; +} + +static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val) +{ + count->val = val; +} + +static inline void tcp_packets_out_inc(struct sock *sk, struct tcp_opt *tp, + struct sk_buff *skb) +{ + int orig = tcp_get_pcount(&tp->packets_out); + + tcp_inc_pcount(&tp->packets_out, skb); + if (!orig) + tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); +} + +static inline void tcp_packets_out_dec(struct tcp_opt *tp, struct sk_buff *skb) +{ + tcp_dec_pcount(&tp->packets_out, skb); +} + /* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK @@ -1203,7 +1272,9 @@ struct tcp_skb_cb { */ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp) { - return tp->packets_out - tp->left_out + tp->retrans_out; + return (tcp_get_pcount(&tp->packets_out) - + tcp_get_pcount(&tp->left_out) + + tcp_get_pcount(&tp->retrans_out)); } /* Recalculate snd_ssthresh, we want to set it to: @@ -1304,9 +1375,15 @@ static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp) static inline void tcp_sync_left_out(struct tcp_opt *tp) { - if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out) - tp->sacked_out = tp->packets_out - tp->lost_out; - tp->left_out = tp->sacked_out + tp->lost_out; + if (tp->sack_ok && + (tcp_get_pcount(&tp->sacked_out) >= + tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out))) + tcp_set_pcount(&tp->sacked_out, + (tcp_get_pcount(&tp->packets_out) - + tcp_get_pcount(&tp->lost_out))); + tcp_set_pcount(&tp->left_out, + (tcp_get_pcount(&tp->sacked_out) + + tcp_get_pcount(&tp->lost_out))); } extern void tcp_cwnd_application_limited(struct sock *sk); @@ -1315,14 +1392,16 @@ extern void tcp_cwnd_application_limited(struct sock *sk); static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp) { - if (tp->packets_out >= tp->snd_cwnd) { + __u32 packets_out = tcp_get_pcount(&tp->packets_out); + + if (packets_out >= tp->snd_cwnd) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; } else { /* Network starves. */ - if (tp->packets_out > tp->snd_cwnd_used) - tp->snd_cwnd_used = tp->packets_out; + if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used) + tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out); if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) tcp_cwnd_application_limited(sk); @@ -1388,7 +1467,7 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && ((nonagle&TCP_NAGLE_CORK) || (!nonagle && - tp->packets_out && + tcp_get_pcount(&tp->packets_out) && tcp_minshall_check(tp)))); } @@ -1398,6 +1477,8 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, unsigned cur_mss, int nonagle) { + int pkts = TCP_SKB_CB(skb)->tso_factor; + /* RFC 1122 - section 4.2.3.4 * * We must queue if @@ -1424,14 +1505,14 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, */ return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && - ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || + (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); } static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp) { - if (!tp->packets_out && !tp->pending) + if (!tcp_get_pcount(&tp->packets_out) && !tp->pending) tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); } @@ -1464,7 +1545,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk, static __inline__ void tcp_push_pending_frames(struct sock *sk, struct tcp_opt *tp) { - __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); + __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1, NULL), tp->nonagle); } static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) @@ -1472,7 +1553,7 @@ static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) struct sk_buff *skb = sk->sk_send_head; return (skb && - tcp_snd_test(tp, skb, tcp_current_mss(sk, 1), + tcp_snd_test(tp, skb, tcp_current_mss(sk, 1, NULL), tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle)); } @@ -1964,7 +2045,7 @@ static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp) { return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) / - (__u32) (tp->mss_cache), + (__u32) (tp->mss_cache_std), 2U); } |
