From 831eba9c117286066d1afa308c38038cafdac9e4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Sep 2004 10:29:47 -0700 Subject: [PKT_SCHED]: Fixed missed return in tcf_hash_init(). Noticed by Andrew Morton. Signed-off-by: David S. Miller --- include/net/pkt_act.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h index 09b899d87629..be5d651e4fe3 100644 --- a/include/net/pkt_act.h +++ b/include/net/pkt_act.h @@ -274,11 +274,11 @@ tcf_hash_create(struct tc_st *parm, struct rtattr *est, struct tc_action *a, int static inline struct tcf_st * tcf_hash_init(struct tc_st *parm, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind) { - struct tcf_st *p; - p = tcf_hash_check (parm,a,ovr,bind); - if (NULL == p) { - return tcf_hash_create(parm, est, a, size, ovr, bind); - } + struct tcf_st *p = tcf_hash_check (parm,a,ovr,bind); + + if (!p) + p = tcf_hash_create(parm, est, a, size, ovr, bind); + return p; } #endif -- cgit v1.2.3 From 954bebf6b0d387241bf9a15a68791c13f1af32e3 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 2 Sep 2004 10:34:46 -0700 Subject: [CREDITS]: Update netfilter entries. Add missing entries for netfilter core team members, and update Rusty's personal URL. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- CREDITS | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 08f699ba8b2b..2ba97bc2d7cf 100644 --- a/CREDITS +++ b/CREDITS @@ -1573,10 +1573,30 @@ D: Backport/Forwardport merge monkey. D: Various Janitor work. S: United Kingdom +N: Martin Josfsson +E: gandalf@wlug.westbo.se +P: 1024D/F6B6D3B1 7610 7CED 5C34 4AA6 DBA2 8BE1 5A6D AF95 F6B6 D3B1 +D: netfilter: SAME target +D: netfilter: helper target +D: netfilter: various other hacks +S: Ronneby +S: Sweden + N: Ani Joshi E: ajoshi@shell.unixbox.com D: fbdev hacking +N: Jozsef Kadlecsik +E: kadlec@blackhole.kfki.hu +P: 1024D/470DB964 4CB3 1A05 713E 9BF7 FAC5 5809 DD8C B7B1 470D B964 +D: netfilter: TCP window tracking code +D: netfilter: raw table +D: netfilter: iprange match +D: netfilter: new logging interfaces +D: netfilter: various other hacks +S: Tata +S: Hungary + N: Bernhard Kaindl E: bkaindl@netway.at E: edv@bartelt.via.at @@ -2137,6 +2157,16 @@ D: OV511 driver S: (address available on request) S: USA +N: Patrick McHardy +E: kaber@trash.net +P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80 +D: netfilter: endless number of bugfixes +D: netfilter: CLASSIFY target +D: netfilter: addrtype match +D: tc: HFSC scheduler +S: Freiburg +S: Germany + N: Mike McLagan E: mike.mclagan@linux.org W: http://www.invlogic.com/~mmclagan @@ -2806,7 +2836,7 @@ S: Germany N: Paul `Rusty' Russell E: rusty@rustcorp.com.au -W: http://www.samba.org/netfilter +W: http://ozlabs.org/~rusty D: Ruggedly handsome. D: netfilter, ipchains with Michael Neuling. S: 52 Moore St @@ -3399,6 +3429,18 @@ S: UC Berkeley S: Berkeley, CA 94720-1776 S: USA +N: Harald Welte +E: laforge@netfilter.org +P: 1024D/30F48BFF DBDE 6912 8831 9A53 879B 9190 5DA5 C655 30F4 8BFF +W: http://gnumonks.org/users/laforge +D: netfilter: new nat helper infrastructure +D: netfilter: ULOG, ECN, DSCP target +D: netfilter: TTL match +D: netfilter: IPv6 mangle table +D: netfilter: various other hacks +S: Berlin +S: Germany + N: Bill Wendling E: wendling@ganymede.isdn.uiuc.edu W: http://www.ncsa.uiuc.edu/~wendling/ -- cgit v1.2.3 From 10bc956350e6821a1a9757065962f1924649b12d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 2 Sep 2004 19:01:02 -0700 Subject: [NET]: Add reference counting to neigh_parms. I've added a refcnt on neigh_parms as well as a dead flag. The latter is checked under the tbl_lock before adding a neigh entry to the hash table. The non-trivial bit of the patch is the first chunk of net/core/neighbour.c. I removed that line because not doing so would mean that I have to drop the reference to the parms right there. That would've lead to race conditions since many places dereference neigh->parms without holding locks. It's also unnecessary to reset n->parms since we're no longer in a hurry to see it go due to the new ref counting. You'll also notice that I've put all dereferences of dev->*_ptr under the rcu_read_lock(). Without this we may get a neigh_parms that's already been released. Incidentally a lot of these places were racy even before the RCU change. For example, in the IPv6 case neigh->parms may be set to a value that's just been released. Finally in order to make sure that all stale entries are purged as quickly as possible I've added neigh_ifdown/arp_ifdown calls after every neigh_parms_release call. In many cases we now have multiple calls to neigh_ifdown in the shutdown path. I didn't remove the earlier calls because there may be hidden dependencies for them to be there. Once the respective maintainers have looked at them we can probably remove most of them. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/s390/net/qeth_main.c | 23 ++++++++++++++++------- include/net/neighbour.h | 20 ++++++++++++++++++++ net/atm/clip.c | 21 ++++++++++++++++++--- net/core/neighbour.c | 24 +++++++++++++++++++----- net/decnet/dn_dev.c | 1 + net/decnet/dn_neigh.c | 18 ++++++++++++++---- net/ipv4/arp.c | 23 ++++++++++++++++------- net/ipv4/devinet.c | 6 +++++- net/ipv6/addrconf.c | 1 + net/ipv6/ndisc.c | 18 ++++++++++++++---- 10 files changed, 124 insertions(+), 31 deletions(-) diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index f6740ca511bd..d5285d105c65 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -6710,19 +6710,28 @@ static int qeth_arp_constructor(struct neighbour *neigh) { struct net_device *dev = neigh->dev; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev; + struct neigh_parms *parms; - if (in_dev == NULL) - return -EINVAL; if (!qeth_verify_dev(dev)) { - in_dev_put(in_dev); return qeth_old_arp_constructor(neigh); } + rcu_read_lock(); + in_dev = __in_dev_get(dev); + if (in_dev == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + + parms = in_dev->arp_parms; + if (parms) { + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + } + rcu_read_unlock(); + neigh->type = inet_addr_type(*(u32 *) neigh->primary_key); - if (in_dev->arp_parms) - neigh->parms = in_dev->arp_parms; - in_dev_put(in_dev); neigh->nud_state = NUD_NOARP; neigh->ops = arp_direct_ops; neigh->output = neigh->ops->queue_xmit; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3df412c9f386..8c97932a98e7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -67,6 +67,8 @@ struct neigh_parms void *sysctl_table; + int dead; + atomic_t refcnt; struct rcu_head rcu_head; int base_reachable_time; @@ -199,6 +201,7 @@ extern struct neighbour *neigh_event_ns(struct neigh_table *tbl, extern struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl); extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms); +extern void neigh_parms_destroy(struct neigh_parms *parms); extern unsigned long neigh_rand_reach_time(unsigned long base); extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, @@ -220,6 +223,23 @@ extern int neigh_sysctl_register(struct net_device *dev, proc_handler *proc_handler); extern void neigh_sysctl_unregister(struct neigh_parms *p); +static inline void __neigh_parms_put(struct neigh_parms *parms) +{ + atomic_dec(&parms->refcnt); +} + +static inline void neigh_parms_put(struct neigh_parms *parms) +{ + if (atomic_dec_and_test(&parms->refcnt)) + neigh_parms_destroy(parms); +} + +static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) +{ + atomic_inc(&parms->refcnt); + return parms; +} + /* * Neighbour references */ diff --git a/net/atm/clip.c b/net/atm/clip.c index 5de7c1fd73b5..f7756e1f93ce 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -26,6 +26,7 @@ #include #include #include +#include #include /* for struct rtable and routing */ #include /* icmp_send */ #include /* for HZ */ @@ -311,13 +312,27 @@ static int clip_constructor(struct neighbour *neigh) { struct atmarp_entry *entry = NEIGH2ENTRY(neigh); struct net_device *dev = neigh->dev; - struct in_device *in_dev = dev->ip_ptr; + struct in_device *in_dev; + struct neigh_parms *parms; DPRINTK("clip_constructor (neigh %p, entry %p)\n",neigh,entry); - if (!in_dev) return -EINVAL; neigh->type = inet_addr_type(entry->ip); if (neigh->type != RTN_UNICAST) return -EINVAL; - if (in_dev->arp_parms) neigh->parms = in_dev->arp_parms; + + rcu_read_lock(); + in_dev = __in_dev_get(dev); + if (!in_dev) { + rcu_read_unlock(); + return -EINVAL; + } + + parms = in_dev->arp_parms; + if (parms) { + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + } + rcu_read_unlock(); + neigh->ops = &clip_neigh_ops; neigh->output = neigh->nud_state & NUD_VALID ? neigh->ops->connected_output : neigh->ops->output; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f5deae1541c4..c9a747e89e5d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -227,7 +227,6 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - n->parms = &tbl->parms; skb_queue_purge(&n->arp_queue); n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -273,7 +272,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; - n->parms = &tbl->parms; + n->parms = neigh_parms_clone(&tbl->parms); init_timer(&n->timer); n->timer.function = neigh_timer_handler; n->timer.data = (unsigned long)n; @@ -340,12 +339,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, hash_val = tbl->hash(pkey, dev); write_lock_bh(&tbl->lock); + if (n->parms->dead) { + rc = ERR_PTR(-EINVAL); + goto out_tbl_unlock; + } + for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { neigh_hold(n1); - write_unlock_bh(&tbl->lock); rc = n1; - goto out_neigh_release; + goto out_tbl_unlock; } } @@ -358,6 +361,8 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, rc = n; out: return rc; +out_tbl_unlock: + write_unlock_bh(&tbl->lock); out_neigh_release: neigh_release(n); goto out; @@ -494,6 +499,7 @@ void neigh_destroy(struct neighbour *neigh) skb_queue_purge(&neigh->arp_queue); dev_put(neigh->dev); + neigh_parms_put(neigh->parms); NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); @@ -1120,6 +1126,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, if (p) { memcpy(p, &tbl->parms, sizeof(*p)); p->tbl = tbl; + atomic_set(&p->refcnt, 1); INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); @@ -1141,7 +1148,7 @@ static void neigh_rcu_free_parms(struct rcu_head *head) struct neigh_parms *parms = container_of(head, struct neigh_parms, rcu_head); - kfree(parms); + neigh_parms_put(parms); } void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) @@ -1154,6 +1161,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) for (p = &tbl->parms.next; *p; p = &(*p)->next) { if (*p == parms) { *p = parms->next; + parms->dead = 1; write_unlock_bh(&tbl->lock); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); return; @@ -1163,11 +1171,17 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) NEIGH_PRINTK1("neigh_parms_release: not found\n"); } +void neigh_parms_destroy(struct neigh_parms *parms) +{ + kfree(parms); +} + void neigh_table_init(struct neigh_table *tbl) { unsigned long now = jiffies; + atomic_set(&tbl->parms.refcnt, 1); INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 5a05efb83092..733b1cf6c440 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1215,6 +1215,7 @@ static void dn_dev_delete(struct net_device *dev) dev->dn_ptr = NULL; neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms); + neigh_ifdown(&dn_neigh_table, dev); if (dn_db->router) neigh_release(dn_db->router); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index ab64b850c12b..e874232ec54b 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -134,13 +135,22 @@ static int dn_neigh_construct(struct neighbour *neigh) { struct net_device *dev = neigh->dev; struct dn_neigh *dn = (struct dn_neigh *)neigh; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db; + struct neigh_parms *parms; - if (dn_db == NULL) + rcu_read_lock(); + dn_db = dev->dn_ptr; + if (dn_db == NULL) { + rcu_read_unlock(); return -EINVAL; + } - if (dn_db->neigh_parms) - neigh->parms = dn_db->neigh_parms; + parms = dn_db->neigh_parms; + if (parms) { + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + } + rcu_read_unlock(); if (dn_db->use_long) neigh->ops = &dn_long_ops; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 562702d99ba2..f4e6a4a368ec 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -96,6 +96,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -237,16 +238,24 @@ static int arp_constructor(struct neighbour *neigh) { u32 addr = *(u32*)neigh->primary_key; struct net_device *dev = neigh->dev; - struct in_device *in_dev = in_dev_get(dev); - - if (in_dev == NULL) - return -EINVAL; + struct in_device *in_dev; + struct neigh_parms *parms; neigh->type = inet_addr_type(addr); - if (in_dev->arp_parms) - neigh->parms = in_dev->arp_parms; - in_dev_put(in_dev); + rcu_read_lock(); + in_dev = __in_dev_get(dev); + if (in_dev == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + + parms = in_dev->arp_parms; + if (parms) { + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + } + rcu_read_unlock(); if (dev->hard_header == NULL) { neigh->nud_state = NUD_NOARP; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index fc9930460864..19eb795a1140 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -184,6 +184,7 @@ static void in_dev_rcu_put(struct rcu_head *head) static void inetdev_destroy(struct in_device *in_dev) { struct in_ifaddr *ifa; + struct net_device *dev; ASSERT_RTNL(); @@ -200,12 +201,15 @@ static void inetdev_destroy(struct in_device *in_dev) devinet_sysctl_unregister(&in_dev->cnf); #endif - in_dev->dev->ip_ptr = NULL; + dev = in_dev->dev; + dev->ip_ptr = NULL; #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(in_dev->arp_parms); #endif neigh_parms_release(&arp_tbl, in_dev->arp_parms); + arp_ifdown(dev); + call_rcu(&in_dev->rcu_head, in_dev_rcu_put); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7150375908a8..d2091c5ce489 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2072,6 +2072,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) neigh_sysctl_unregister(idev->nd_parms); #endif neigh_parms_release(&nd_tbl, idev->nd_parms); + neigh_ifdown(&nd_tbl, dev); in6_dev_put(idev); } return 0; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b278e5a04ca8..6d23ea909aca 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -58,6 +58,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -284,14 +285,23 @@ static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key; struct net_device *dev = neigh->dev; - struct inet6_dev *in6_dev = in6_dev_get(dev); + struct inet6_dev *in6_dev; + struct neigh_parms *parms; int is_multicast = ipv6_addr_is_multicast(addr); - if (in6_dev == NULL) + rcu_read_lock(); + in6_dev = in6_dev_get(dev); + if (in6_dev == NULL) { + rcu_read_unlock(); return -EINVAL; + } - if (in6_dev->nd_parms) - neigh->parms = in6_dev->nd_parms; + parms = in6_dev->nd_parms; + if (parms) { + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + } + rcu_read_unlock(); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; if (dev->hard_header == NULL) { -- cgit v1.2.3 From 14a1f44569619b2dfda526dc0f73b9bf0df74171 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Sep 2004 19:20:50 -0700 Subject: [TCP]: Make TSO play nice with congestion window. Previously TSO would not abide by the congestion window properly. Essentially, each TSO packet would be trated just like 1 normal packet, even though a TSO packet generates more than 1 normal packet. This violates congestion window rules entirely. So now we record the TSO factor, a count of how many real packets a TSO packet will generate, and include this in all the packet counting routines. This initial version has a bug in that skb_entail() is not the correct time to figure out the TSO factor for the SKB, and tp->mss_tso_factor is not necessarily the right value for a given SKB. Will fix this up next. Signed-off-by: David S. Miller --- include/linux/tcp.h | 17 ++-- include/net/tcp.h | 115 +++++++++++++++++---- net/ipv4/tcp.c | 23 +++-- net/ipv4/tcp_diag.c | 12 +-- net/ipv4/tcp_input.c | 256 +++++++++++++++++++++++++---------------------- net/ipv4/tcp_ipv4.c | 3 +- net/ipv4/tcp_minisocks.c | 10 +- net/ipv4/tcp_output.c | 114 +++++++++++++-------- net/ipv4/tcp_timer.c | 8 +- net/ipv6/tcp_ipv6.c | 3 +- 10 files changed, 350 insertions(+), 211 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9c42ac0b0322..ebf15b6a8162 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -201,6 +201,10 @@ struct tcp_sack_block { __u32 end_seq; }; +typedef struct tcp_pcount { + __u32 val; +} tcp_pcount_t; + struct tcp_opt { int tcp_header_len; /* Bytes of tcp header to send */ @@ -250,6 +254,7 @@ struct tcp_opt { __u32 max_window; /* Maximal window ever seen from peer */ __u32 pmtu_cookie; /* Last pmtu seen by socket */ __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 mss_tso_factor; /* Real packets per TSO packet */ __u16 mss_cache_std; /* Like mss_cache, but without TSO */ __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ @@ -274,9 +279,9 @@ struct tcp_opt { __u32 rtt_seq; /* sequence number to update rttvar */ __u32 rto; /* retransmit timeout */ - __u32 packets_out; /* Packets which are "in flight" */ - __u32 left_out; /* Packets which leaved network */ - __u32 retrans_out; /* Retransmitted packets out */ + tcp_pcount_t packets_out; /* Packets which are "in flight" */ + tcp_pcount_t left_out; /* Packets which leaved network */ + tcp_pcount_t retrans_out; /* Retransmitted packets out */ /* @@ -337,9 +342,9 @@ struct tcp_opt { __u8 syn_retries; /* num of allowed syn retries */ __u8 ecn_flags; /* ECN status bits. */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ - __u32 lost_out; /* Lost packets */ - __u32 sacked_out; /* SACK'd packets */ - __u32 fackets_out; /* FACK'd packets */ + tcp_pcount_t lost_out; /* Lost packets */ + tcp_pcount_t sacked_out;/* SACK'd packets */ + tcp_pcount_t fackets_out;/* FACK'd packets */ __u32 high_seq; /* snd_nxt at onset of congestion */ __u32 retrans_stamp; /* Timestamp of the last retransmit, diff --git a/include/net/tcp.h b/include/net/tcp.h index a5be63c232e3..efda37b84207 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1047,13 +1047,18 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long * is not a big flaw. */ -static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large) +static inline unsigned int tcp_current_mss(struct sock *sk, int large, int *factor) { struct tcp_opt *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); - int mss_now = large && (sk->sk_route_caps & NETIF_F_TSO) && - !tp->urg_mode ? - tp->mss_cache : tp->mss_cache_std; + int do_large, mss_now; + + do_large = (large && + (sk->sk_route_caps & NETIF_F_TSO) && + !tp->urg_mode); + mss_now = do_large ? tp->mss_cache : tp->mss_cache_std; + if (factor) + *factor = do_large ? tp->mss_tso_factor : 1; if (dst) { u32 mtu = dst_pmtu(dst); @@ -1181,12 +1186,76 @@ struct tcp_skb_cb { __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ + __u32 tso_factor; }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) #include +/* Due to TSO, an SKB can be composed of multiple actual + * packets. To keep these tracked properly, we use this. + */ +static inline int tcp_skb_pcount(struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->tso_factor; +} + +static inline void tcp_inc_pcount(tcp_pcount_t *count, struct sk_buff *skb) +{ + count->val += tcp_skb_pcount(skb); +} + +static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt) +{ + count->val += amt; +} + +static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt) +{ + count->val -= amt; +} + +static inline void tcp_dec_pcount(tcp_pcount_t *count, struct sk_buff *skb) +{ + count->val -= tcp_skb_pcount(skb); +} + +static inline void tcp_dec_pcount_approx(tcp_pcount_t *count, + struct sk_buff *skb) +{ + if (count->val) { + count->val -= tcp_skb_pcount(skb); + if ((int)count->val < 0) + count->val = 0; + } +} + +static inline __u32 tcp_get_pcount(tcp_pcount_t *count) +{ + return count->val; +} + +static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val) +{ + count->val = val; +} + +static inline void tcp_packets_out_inc(struct sock *sk, struct tcp_opt *tp, + struct sk_buff *skb) +{ + int orig = tcp_get_pcount(&tp->packets_out); + + tcp_inc_pcount(&tp->packets_out, skb); + if (!orig) + tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); +} + +static inline void tcp_packets_out_dec(struct tcp_opt *tp, struct sk_buff *skb) +{ + tcp_dec_pcount(&tp->packets_out, skb); +} + /* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK @@ -1203,7 +1272,9 @@ struct tcp_skb_cb { */ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp) { - return tp->packets_out - tp->left_out + tp->retrans_out; + return (tcp_get_pcount(&tp->packets_out) - + tcp_get_pcount(&tp->left_out) + + tcp_get_pcount(&tp->retrans_out)); } /* Recalculate snd_ssthresh, we want to set it to: @@ -1304,9 +1375,15 @@ static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp) static inline void tcp_sync_left_out(struct tcp_opt *tp) { - if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out) - tp->sacked_out = tp->packets_out - tp->lost_out; - tp->left_out = tp->sacked_out + tp->lost_out; + if (tp->sack_ok && + (tcp_get_pcount(&tp->sacked_out) >= + tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out))) + tcp_set_pcount(&tp->sacked_out, + (tcp_get_pcount(&tp->packets_out) - + tcp_get_pcount(&tp->lost_out))); + tcp_set_pcount(&tp->left_out, + (tcp_get_pcount(&tp->sacked_out) + + tcp_get_pcount(&tp->lost_out))); } extern void tcp_cwnd_application_limited(struct sock *sk); @@ -1315,14 +1392,16 @@ extern void tcp_cwnd_application_limited(struct sock *sk); static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp) { - if (tp->packets_out >= tp->snd_cwnd) { + __u32 packets_out = tcp_get_pcount(&tp->packets_out); + + if (packets_out >= tp->snd_cwnd) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; } else { /* Network starves. */ - if (tp->packets_out > tp->snd_cwnd_used) - tp->snd_cwnd_used = tp->packets_out; + if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used) + tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out); if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) tcp_cwnd_application_limited(sk); @@ -1388,7 +1467,7 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && ((nonagle&TCP_NAGLE_CORK) || (!nonagle && - tp->packets_out && + tcp_get_pcount(&tp->packets_out) && tcp_minshall_check(tp)))); } @@ -1398,6 +1477,8 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, unsigned cur_mss, int nonagle) { + int pkts = TCP_SKB_CB(skb)->tso_factor; + /* RFC 1122 - section 4.2.3.4 * * We must queue if @@ -1424,14 +1505,14 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, */ return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && - ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || + (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); } static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp) { - if (!tp->packets_out && !tp->pending) + if (!tcp_get_pcount(&tp->packets_out) && !tp->pending) tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); } @@ -1464,7 +1545,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk, static __inline__ void tcp_push_pending_frames(struct sock *sk, struct tcp_opt *tp) { - __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); + __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1, NULL), tp->nonagle); } static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) @@ -1472,7 +1553,7 @@ static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) struct sk_buff *skb = sk->sk_send_head; return (skb && - tcp_snd_test(tp, skb, tcp_current_mss(sk, 1), + tcp_snd_test(tp, skb, tcp_current_mss(sk, 1, NULL), tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle)); } @@ -1964,7 +2045,7 @@ static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp) { return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) / - (__u32) (tp->mss_cache), + (__u32) (tp->mss_cache_std), 2U); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f34bdec2f31e..9df826c8e22b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -590,13 +590,14 @@ static inline int forced_push(struct tcp_opt *tp) } static inline void skb_entail(struct sock *sk, struct tcp_opt *tp, - struct sk_buff *skb) + struct sk_buff *skb, int tso_factor) { skb->csum = 0; TCP_SKB_CB(skb)->seq = tp->write_seq; TCP_SKB_CB(skb)->end_seq = tp->write_seq; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tso_factor = tso_factor; __skb_queue_tail(&sk->sk_write_queue, skb); sk_charge_skb(sk, skb); if (!sk->sk_send_head) @@ -632,7 +633,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse size_t psize, int flags) { struct tcp_opt *tp = tcp_sk(sk); - int mss_now; + int mss_now, mss_factor_now; int err; ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -644,7 +645,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), &mss_factor_now); copied = 0; err = -EPIPE; @@ -668,7 +669,7 @@ new_segment: if (!skb) goto wait_for_memory; - skb_entail(sk, tp, skb); + skb_entail(sk, tp, skb, mss_factor_now); copy = mss_now; } @@ -719,7 +720,8 @@ wait_for_memory: if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), + &mss_factor_now); } out: @@ -780,7 +782,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; int iovlen, flags; - int mss_now; + int mss_now, mss_factor_now; int err, copied; long timeo; @@ -798,7 +800,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* This should be in poll */ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), &mss_factor_now); /* Ok commence sending. */ iovlen = msg->msg_iovlen; @@ -843,7 +845,7 @@ new_segment: NETIF_F_HW_CSUM)) skb->ip_summed = CHECKSUM_HW; - skb_entail(sk, tp, skb); + skb_entail(sk, tp, skb, mss_factor_now); copy = mss_now; } @@ -962,7 +964,8 @@ wait_for_memory: if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), + &mss_factor_now); } } @@ -1818,7 +1821,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->backoff = 0; tp->snd_cwnd = 2; tp->probes_out = 0; - tp->packets_out = 0; + tcp_set_pcount(&tp->packets_out, 0); tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; tcp_set_ca_state(tp, TCP_CA_Open); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5506944b7e7c..e0f8a7664f7e 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -70,14 +70,14 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rto = (1000000*tp->rto)/HZ; info->tcpi_ato = (1000000*tp->ack.ato)/HZ; - info->tcpi_snd_mss = tp->mss_cache; + info->tcpi_snd_mss = tp->mss_cache_std; info->tcpi_rcv_mss = tp->ack.rcv_mss; - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; - info->tcpi_lost = tp->lost_out; - info->tcpi_retrans = tp->retrans_out; - info->tcpi_fackets = tp->fackets_out; + info->tcpi_unacked = tcp_get_pcount(&tp->packets_out); + info->tcpi_sacked = tcp_get_pcount(&tp->sacked_out); + info->tcpi_lost = tcp_get_pcount(&tp->lost_out); + info->tcpi_retrans = tcp_get_pcount(&tp->retrans_out); + info->tcpi_fackets = tcp_get_pcount(&tp->fackets_out); info->tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ; info->tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 85643472b84d..f4ec16169906 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -897,7 +897,9 @@ static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts) #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", tp->sack_ok, tp->ca_state, - tp->reordering, tp->fackets_out, tp->sacked_out, + tp->reordering, + tcp_get_pcount(&tp->fackets_out), + tcp_get_pcount(&tp->sacked_out), tp->undo_marker ? tp->undo_retrans : 0); #endif /* Disable FACK yet. */ @@ -960,7 +962,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; - int reord = tp->packets_out; + int reord = tcp_get_pcount(&tp->packets_out); int prior_fackets; u32 lost_retrans = 0; int flag = 0; @@ -972,11 +974,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ sk->sk_route_caps &= ~NETIF_F_TSO; sk->sk_no_largesend = 1; tp->mss_cache = tp->mss_cache_std; + tp->mss_tso_factor = 1; } - if (!tp->sacked_out) - tp->fackets_out = 0; - prior_fackets = tp->fackets_out; + if (!tcp_get_pcount(&tp->sacked_out)) + tcp_set_pcount(&tp->fackets_out, 0); + prior_fackets = tcp_get_pcount(&tp->fackets_out); for (i=0; isacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); - tp->lost_out--; - tp->retrans_out--; + tcp_dec_pcount(&tp->lost_out, skb); + tcp_dec_pcount(&tp->retrans_out, skb); } } else { /* New sack for not retransmitted frame, @@ -1087,16 +1090,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - tp->lost_out--; + tcp_dec_pcount(&tp->lost_out, skb); } } TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; flag |= FLAG_DATA_SACKED; - tp->sacked_out++; + tcp_inc_pcount(&tp->sacked_out, skb); - if (fack_count > tp->fackets_out) - tp->fackets_out = fack_count; + if (fack_count > tcp_get_pcount(&tp->fackets_out)) + tcp_set_pcount(&tp->fackets_out, fack_count); } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); @@ -1110,7 +1113,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (dup_sack && (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); } } } @@ -1134,12 +1137,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ (IsFack(tp) || !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq + tp->reordering * - tp->mss_cache))) { + tp->mss_cache_std))) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; flag |= FLAG_DATA_SACKED; NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); @@ -1148,15 +1151,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } } - tp->left_out = tp->sacked_out + tp->lost_out; + tcp_set_pcount(&tp->left_out, + (tcp_get_pcount(&tp->sacked_out) + + tcp_get_pcount(&tp->lost_out))); - if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss) - tcp_update_reordering(tp, (tp->fackets_out + 1) - reord, 0); + if ((reord < tcp_get_pcount(&tp->fackets_out)) && + tp->ca_state != TCP_CA_Loss) + tcp_update_reordering(tp, + ((tcp_get_pcount(&tp->fackets_out) + 1) - + reord), 0); #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0); BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0); #endif return flag; @@ -1186,7 +1194,7 @@ void tcp_enter_frto(struct sock *sk) * If something was really lost, it is eventually caught up * in tcp_enter_frto_loss. */ - tp->retrans_out = 0; + tcp_set_pcount(&tp->retrans_out, 0); tp->undo_marker = tp->snd_una; tp->undo_retrans = 0; @@ -1209,26 +1217,26 @@ static void tcp_enter_frto_loss(struct sock *sk) struct sk_buff *skb; int cnt = 0; - tp->sacked_out = 0; - tp->lost_out = 0; - tp->fackets_out = 0; + tcp_set_pcount(&tp->sacked_out, 0); + tcp_set_pcount(&tp->lost_out, 0); + tcp_set_pcount(&tp->fackets_out, 0); sk_stream_for_retrans_queue(skb, sk) { - cnt++; + cnt += TCP_SKB_CB(skb)->tso_factor;; TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { /* Do not mark those segments lost that were * forward transmitted after RTO */ - if(!after(TCP_SKB_CB(skb)->end_seq, + if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); } } else { - tp->sacked_out++; - tp->fackets_out = cnt; + tcp_inc_pcount(&tp->sacked_out, skb); + tcp_set_pcount(&tp->fackets_out, cnt); } } tcp_sync_left_out(tp); @@ -1250,12 +1258,12 @@ static void tcp_enter_frto_loss(struct sock *sk) void tcp_clear_retrans(struct tcp_opt *tp) { - tp->left_out = 0; - tp->retrans_out = 0; + tcp_set_pcount(&tp->left_out, 0); + tcp_set_pcount(&tp->retrans_out, 0); - tp->fackets_out = 0; - tp->sacked_out = 0; - tp->lost_out = 0; + tcp_set_pcount(&tp->fackets_out, 0); + tcp_set_pcount(&tp->sacked_out, 0); + tcp_set_pcount(&tp->lost_out, 0); tp->undo_marker = 0; tp->undo_retrans = 0; @@ -1289,17 +1297,17 @@ void tcp_enter_loss(struct sock *sk, int how) tp->undo_marker = tp->snd_una; sk_stream_for_retrans_queue(skb, sk) { - cnt++; + cnt += TCP_SKB_CB(skb)->tso_factor; if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) tp->undo_marker = 0; TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); } else { - tp->sacked_out++; - tp->fackets_out = cnt; + tcp_inc_pcount(&tp->sacked_out, skb); + tcp_set_pcount(&tp->fackets_out, cnt); } } tcp_sync_left_out(tp); @@ -1336,7 +1344,8 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp) static inline int tcp_fackets_out(struct tcp_opt *tp) { - return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; + return IsReno(tp) ? tcp_get_pcount(&tp->sacked_out)+1 : + tcp_get_pcount(&tp->fackets_out); } static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb) @@ -1346,7 +1355,7 @@ static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb) static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp) { - return tp->packets_out && + return tcp_get_pcount(&tp->packets_out) && tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue)); } @@ -1446,8 +1455,10 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp) static int tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp) { + __u32 packets_out; + /* Trick#1: The loss is proven. */ - if (tp->lost_out) + if (tcp_get_pcount(&tp->lost_out)) return 1; /* Not-A-Trick#2 : Classic rule... */ @@ -1463,8 +1474,9 @@ tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp) /* Trick#4: It is still not OK... But will it be useful to delay * recovery more? */ - if (tp->packets_out <= tp->reordering && - tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) && + packets_out = tcp_get_pcount(&tp->packets_out); + if (packets_out <= tp->reordering && + tcp_get_pcount(&tp->sacked_out) >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && !tcp_may_send_now(sk, tp)) { /* We have nothing to send. This connection is limited * either by receiver window or by application. @@ -1483,12 +1495,16 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend) { u32 holes; - holes = max(tp->lost_out, 1U); - holes = min(holes, tp->packets_out); + holes = max(tcp_get_pcount(&tp->lost_out), 1U); + holes = min(holes, tcp_get_pcount(&tp->packets_out)); - if (tp->sacked_out + holes > tp->packets_out) { - tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(tp, tp->packets_out+addend, 0); + if ((tcp_get_pcount(&tp->sacked_out) + holes) > + tcp_get_pcount(&tp->packets_out)) { + tcp_set_pcount(&tp->sacked_out, + (tcp_get_pcount(&tp->packets_out) - holes)); + tcp_update_reordering(tp, + tcp_get_pcount(&tp->packets_out)+addend, + 0); } } @@ -1496,7 +1512,7 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend) static void tcp_add_reno_sack(struct tcp_opt *tp) { - ++tp->sacked_out; + tcp_inc_pcount_explicit(&tp->sacked_out, 1); tcp_check_reno_reordering(tp, 0); tcp_sync_left_out(tp); } @@ -1507,10 +1523,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked { if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ - if (acked-1 >= tp->sacked_out) - tp->sacked_out = 0; + if (acked-1 >= tcp_get_pcount(&tp->sacked_out)) + tcp_set_pcount(&tp->sacked_out, 0); else - tp->sacked_out -= acked-1; + tcp_dec_pcount_explicit(&tp->sacked_out, acked-1); } tcp_check_reno_reordering(tp, acked); tcp_sync_left_out(tp); @@ -1518,8 +1534,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked static inline void tcp_reset_reno_sack(struct tcp_opt *tp) { - tp->sacked_out = 0; - tp->left_out = tp->lost_out; + tcp_set_pcount(&tp->sacked_out, 0); + tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->lost_out)); } /* Mark head of queue up as lost. */ @@ -1529,14 +1545,15 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se struct sk_buff *skb; int cnt = packets; - BUG_TRAP(cnt <= tp->packets_out); + BUG_TRAP(cnt <= tcp_get_pcount(&tp->packets_out)); sk_stream_for_retrans_queue(skb, sk) { - if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) + cnt -= TCP_SKB_CB(skb)->tso_factor; + if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) break; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); } } tcp_sync_left_out(tp); @@ -1547,7 +1564,7 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp) { if (IsFack(tp)) { - int lost = tp->fackets_out - tp->reordering; + int lost = tcp_get_pcount(&tp->fackets_out) - tp->reordering; if (lost <= 0) lost = 1; tcp_mark_head_lost(sk, tp, lost, tp->high_seq); @@ -1567,7 +1584,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp) if (tcp_skb_timedout(tp, skb) && !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); } } tcp_sync_left_out(tp); @@ -1632,8 +1649,9 @@ static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg) printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, NIPQUAD(inet->daddr), ntohs(inet->dport), - tp->snd_cwnd, tp->left_out, - tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); + tp->snd_cwnd, tcp_get_pcount(&tp->left_out), + tp->snd_ssthresh, tp->prior_ssthresh, + tcp_get_pcount(&tp->packets_out)); } #else #define DBGUNDO(x...) do { } while (0) @@ -1703,13 +1721,13 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp) static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked) { /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = IsReno(tp) || tp->fackets_out>tp->reordering; + int failed = IsReno(tp) || tcp_get_pcount(&tp->fackets_out)>tp->reordering; if (tcp_may_undo(tp)) { /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ - if (tp->retrans_out == 0) + if (tcp_get_pcount(&tp->retrans_out) == 0) tp->retrans_stamp = 0; tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); @@ -1736,8 +1754,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp) TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } DBGUNDO(sk, tp, "partial loss"); - tp->lost_out = 0; - tp->left_out = tp->sacked_out; + tcp_set_pcount(&tp->lost_out, 0); + tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out)); tcp_undo_cwr(tp, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); tp->retransmits = 0; @@ -1760,9 +1778,9 @@ static __inline__ void tcp_complete_cwr(struct tcp_opt *tp) static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag) { - tp->left_out = tp->sacked_out; + tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out)); - if (tp->retrans_out == 0) + if (tcp_get_pcount(&tp->retrans_out) == 0) tp->retrans_stamp = 0; if (flag&FLAG_ECE) @@ -1771,8 +1789,8 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag) if (tp->ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; - if (tp->left_out || - tp->retrans_out || + if (tcp_get_pcount(&tp->left_out) || + tcp_get_pcount(&tp->retrans_out) || tp->undo_marker) state = TCP_CA_Disorder; @@ -1806,11 +1824,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Some technical things: * 1. Reno does not count dupacks (sacked_out) automatically. */ - if (!tp->packets_out) - tp->sacked_out = 0; + if (!tcp_get_pcount(&tp->packets_out)) + tcp_set_pcount(&tp->sacked_out, 0); /* 2. SACK counts snd_fack in packets inaccurately. */ - if (tp->sacked_out == 0) - tp->fackets_out = 0; + if (tcp_get_pcount(&tp->sacked_out) == 0) + tcp_set_pcount(&tp->fackets_out, 0); /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ @@ -1818,15 +1836,15 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) + if (tcp_get_pcount(&tp->sacked_out) && tcp_check_sack_reneging(sk, tp)) return; /* C. Process data loss notification, provided it is valid. */ if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && tp->ca_state != TCP_CA_Open && - tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); + tcp_get_pcount(&tp->fackets_out) > tp->reordering) { + tcp_mark_head_lost(sk, tp, tcp_get_pcount(&tp->fackets_out)-tp->reordering, tp->high_seq); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } @@ -1837,7 +1855,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, * when high_seq is ACKed. */ if (tp->ca_state == TCP_CA_Open) { if (!sysctl_tcp_frto) - BUG_TRAP(tp->retrans_out == 0); + BUG_TRAP(tcp_get_pcount(&tp->retrans_out) == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (tp->ca_state) { @@ -1884,7 +1902,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, if (IsReno(tp) && is_dupack) tcp_add_reno_sack(tp); } else { - int acked = prior_packets - tp->packets_out; + int acked = prior_packets - + tcp_get_pcount(&tp->packets_out); if (IsReno(tp)) tcp_remove_reno_sacks(sk, tp, acked); is_dupack = tcp_try_undo_partial(sk, tp, acked); @@ -1927,7 +1946,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out; + tp->undo_retrans = tcp_get_pcount(&tp->retrans_out); if (tp->ca_state < TCP_CA_CWR) { if (!(flag&FLAG_ECE)) @@ -2156,7 +2175,7 @@ static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt) * is the cwnd during the previous RTT. */ old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) / - tp->mss_cache; + tp->mss_cache_std; old_snd_cwnd = tp->vegas.beg_snd_cwnd; /* Save the extent of the current window so we can use this @@ -2327,7 +2346,7 @@ static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt) static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp) { - if (tp->packets_out==0) { + if (!tcp_get_pcount(&tp->packets_out)) { tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); } else { tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); @@ -2343,7 +2362,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) int acked = 0; __s32 seq_rtt = -1; - while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) { + while ((skb = skb_peek(&sk->sk_write_queue)) && + skb != sk->sk_send_head) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); __u8 sacked = scb->sacked; @@ -2361,7 +2381,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if(!(scb->flags & TCPCB_FLAG_SYN)) { + if (!(scb->flags & TCPCB_FLAG_SYN)) { acked |= FLAG_DATA_ACKED; } else { acked |= FLAG_SYN_ACKED; @@ -2369,27 +2389,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } if (sacked) { - if(sacked & TCPCB_RETRANS) { + if (sacked & TCPCB_RETRANS) { if(sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); acked |= FLAG_RETRANS_DATA_ACKED; seq_rtt = -1; } else if (seq_rtt < 0) seq_rtt = now - scb->when; - if(sacked & TCPCB_SACKED_ACKED) - tp->sacked_out--; - if(sacked & TCPCB_LOST) - tp->lost_out--; - if(sacked & TCPCB_URG) { + if (sacked & TCPCB_SACKED_ACKED) + tcp_dec_pcount(&tp->sacked_out, skb); + if (sacked & TCPCB_LOST) + tcp_dec_pcount(&tp->lost_out, skb); + if (sacked & TCPCB_URG) { if (tp->urg_mode && !before(scb->end_seq, tp->snd_up)) tp->urg_mode = 0; } } else if (seq_rtt < 0) seq_rtt = now - scb->when; - if (tp->fackets_out) - tp->fackets_out--; - tp->packets_out--; + tcp_dec_pcount_approx(&tp->fackets_out, skb); + tcp_packets_out_dec(tp, skb); __skb_unlink(skb, skb->list); sk_stream_free_skb(sk, skb); } @@ -2400,24 +2419,27 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); - if (!tp->packets_out && tp->sack_ok) { - if (tp->lost_out) { - printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, - tp->ca_state); - tp->lost_out = 0; + BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0); + if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) { + if (tcp_get_pcount(&tp->lost_out)) { + printk(KERN_DEBUG "Leak l=%u %d\n", + tcp_get_pcount(&tp->lost_out), + tp->ca_state); + tcp_set_pcount(&tp->lost_out, 0); } - if (tp->sacked_out) { - printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out, - tp->ca_state); - tp->sacked_out = 0; + if (tcp_get_pcount(&tp->sacked_out)) { + printk(KERN_DEBUG "Leak s=%u %d\n", + tcp_get_pcount(&tp->sacked_out), + tp->ca_state); + tcp_set_pcount(&tp->sacked_out, 0); } - if (tp->retrans_out) { - printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out, - tp->ca_state); - tp->retrans_out = 0; + if (tcp_get_pcount(&tp->retrans_out)) { + printk(KERN_DEBUG "Leak r=%u %d\n", + tcp_get_pcount(&tp->retrans_out), + tp->ca_state); + tcp_set_pcount(&tp->retrans_out, 0); } } #endif @@ -2712,19 +2734,19 @@ static void westwood_dupack_update(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); - tp->westwood.accounted += tp->mss_cache; - tp->westwood.cumul_ack = tp->mss_cache; + tp->westwood.accounted += tp->mss_cache_std; + tp->westwood.cumul_ack = tp->mss_cache_std; } static inline int westwood_may_change_cumul(struct tcp_opt *tp) { - return ((tp->westwood.cumul_ack) > tp->mss_cache); + return ((tp->westwood.cumul_ack) > tp->mss_cache_std); } static inline void westwood_partial_update(struct tcp_opt *tp) { tp->westwood.accounted -= tp->westwood.cumul_ack; - tp->westwood.cumul_ack = tp->mss_cache; + tp->westwood.cumul_ack = tp->mss_cache_std; } static inline void westwood_complete_update(struct tcp_opt *tp) @@ -2835,7 +2857,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) */ sk->sk_err_soft = 0; tp->rcv_tstamp = tcp_time_stamp; - prior_packets = tp->packets_out; + prior_packets = tcp_get_pcount(&tp->packets_out); if (!prior_packets) goto no_queue; @@ -3857,11 +3879,11 @@ static void tcp_new_space(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); - if (tp->packets_out < tp->snd_cwnd && + if (tcp_get_pcount(&tp->packets_out) < tp->snd_cwnd && !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && !tcp_memory_pressure && atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { - int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) + + int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache_std) + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2cfd74fbf566..0fb326e84f28 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2075,7 +2075,8 @@ static int tcp_v4_init_sock(struct sock *sk) */ tp->snd_ssthresh = 0x7fffffff; /* Infinity */ tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache_std = tp->mss_cache = 536; + tp->mss_tso_factor = 1; tp->reordering = sysctl_tcp_reordering; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 79c1884c2b8b..ab04144245e5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -752,11 +752,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newtp->mdev = TCP_TIMEOUT_INIT; newtp->rto = TCP_TIMEOUT_INIT; - newtp->packets_out = 0; - newtp->left_out = 0; - newtp->retrans_out = 0; - newtp->sacked_out = 0; - newtp->fackets_out = 0; + tcp_set_pcount(&newtp->packets_out, 0); + tcp_set_pcount(&newtp->left_out, 0); + tcp_set_pcount(&newtp->retrans_out, 0); + tcp_set_pcount(&newtp->sacked_out, 0); + tcp_set_pcount(&newtp->fackets_out, 0); newtp->snd_ssthresh = 0x7fffffff; /* So many TCP implementations out there (incorrectly) count the diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bd3d0133f724..0a70d082028c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -52,8 +52,7 @@ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + tcp_packets_out_inc(sk, tp, skb); } /* SND.NXT, if window was not shrunk. @@ -123,7 +122,8 @@ static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *s { u32 now = tcp_time_stamp; - if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) + if (!tcp_get_pcount(&tp->packets_out) && + (s32)(now - tp->lsndtime) > tp->rto) tcp_cwnd_restart(tp, __sk_dst_get(sk)); tp->lsndtime = now; @@ -259,7 +259,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) */ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) { - if(skb != NULL) { + if (skb != NULL) { struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -268,6 +268,8 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) int sysctl_flags; int err; + BUG_ON(!TCP_SKB_CB(skb)->tso_factor); + #define SYSCTL_FLAG_TSTAMPS 0x1 #define SYSCTL_FLAG_WSCALE 0x2 #define SYSCTL_FLAG_SACK 0x4 @@ -414,8 +416,7 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss) if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) { sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + tcp_packets_out_inc(sk, tp, skb); return; } } @@ -453,10 +454,13 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); + TCP_SKB_CB(buff)->sacked = + (TCP_SKB_CB(skb)->sacked & + (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); + TCP_SKB_CB(buff)->tso_factor = tp->mss_tso_factor; if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { - tp->lost_out++; - tp->left_out++; + tcp_inc_pcount(&tp->lost_out, buff); + tcp_inc_pcount(&tp->left_out, buff); } TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; @@ -594,9 +598,10 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) /* And store cached results */ tp->pmtu_cookie = pmtu; tp->mss_cache = tp->mss_cache_std = mss_now; + tp->mss_tso_factor = 1; if (sk->sk_route_caps & NETIF_F_TSO) { - int large_mss; + int large_mss, factor; large_mss = 65535 - tp->af_specific->net_header_len - tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len; @@ -604,8 +609,15 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) if (tp->max_window && large_mss > (tp->max_window>>1)) large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len); - /* Always keep large mss multiple of real mss. */ - tp->mss_cache = mss_now*(large_mss/mss_now); + /* Always keep large mss multiple of real mss, but + * do not exceed congestion window. + */ + factor = large_mss / mss_now; + if (factor > tp->snd_cwnd) + factor = tp->snd_cwnd; + + tp->mss_cache = mss_now * factor; + tp->mss_tso_factor = factor; } return mss_now; @@ -637,7 +649,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) * We also handle things correctly when the user adds some * IP options mid-stream. Silly to do, but cover it. */ - mss_now = tcp_current_mss(sk, 1); + mss_now = tcp_current_mss(sk, 1, NULL); while ((skb = sk->sk_send_head) && tcp_snd_test(tp, skb, mss_now, @@ -662,7 +674,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) return 0; } - return !tp->packets_out && sk->sk_send_head; + return !tcp_get_pcount(&tp->packets_out) && sk->sk_send_head; } return 0; } @@ -788,7 +800,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m /* The first test we must make is that neither of these two * SKB's are still referenced by someone else. */ - if(!skb_cloned(skb) && !skb_cloned(next_skb)) { + if (!skb_cloned(skb) && !skb_cloned(next_skb)) { int skb_size = skb->len, next_skb_size = next_skb->len; u16 flags = TCP_SKB_CB(skb)->flags; @@ -831,24 +843,23 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, next_skb); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { - tp->lost_out--; - tp->left_out--; + tcp_dec_pcount(&tp->lost_out, next_skb); + tcp_dec_pcount(&tp->left_out, next_skb); } /* Reno case is special. Sigh... */ - if (!tp->sack_ok && tp->sacked_out) { - tp->sacked_out--; - tp->left_out--; + if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) { + tcp_dec_pcount_approx(&tp->sacked_out, next_skb); + tcp_dec_pcount(&tp->left_out, next_skb); } /* Not quite right: it can be > snd.fack, but * it is better to underestimate fackets. */ - if (tp->fackets_out) - tp->fackets_out--; + tcp_dec_pcount_approx(&tp->fackets_out, next_skb); + tcp_packets_out_dec(tp, next_skb); sk_stream_free_skb(sk, next_skb); - tp->packets_out--; } } @@ -860,7 +871,7 @@ void tcp_simple_retransmit(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0); + unsigned int mss = tcp_current_mss(sk, 0, NULL); int lost = 0; sk_stream_for_retrans_queue(skb, sk) { @@ -868,11 +879,11 @@ void tcp_simple_retransmit(struct sock *sk) !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); } if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); lost = 1; } } @@ -905,7 +916,7 @@ void tcp_simple_retransmit(struct sock *sk) int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_opt *tp = tcp_sk(sk); - unsigned int cur_mss = tcp_current_mss(sk, 0); + unsigned int cur_mss = tcp_current_mss(sk, 0, NULL); int err; /* Do not sent more than we queued. 1/4 is reserved for possible @@ -923,6 +934,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) sk->sk_route_caps &= ~NETIF_F_TSO; sk->sk_no_largesend = 1; tp->mss_cache = tp->mss_cache_std; + tp->mss_tso_factor = 1; } if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) @@ -938,12 +950,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) && TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; - if(skb->len > cur_mss) { - if(tcp_fragment(sk, skb, cur_mss)) + if (skb->len > cur_mss) { + if (tcp_fragment(sk, skb, cur_mss)) return -ENOMEM; /* We'll try again later. */ /* New SKB created, account for it. */ - tp->packets_out++; + tcp_inc_pcount(&tp->packets_out, skb); } /* Collapse two adjacent packets if worthwhile and we can. */ @@ -992,7 +1004,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } #endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; - tp->retrans_out++; + tcp_inc_pcount(&tp->retrans_out, skb); /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) @@ -1020,14 +1032,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; - int packet_cnt = tp->lost_out; + int packet_cnt = tcp_get_pcount(&tp->lost_out); /* First pass: retransmit lost packets. */ if (packet_cnt) { sk_stream_for_retrans_queue(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; + int pkts = TCP_SKB_CB(skb)->tso_factor; - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) + if ((tcp_packets_in_flight(tp) + (pkts-1)) >= + tp->snd_cwnd) return; if (sacked&TCPCB_LOST) { @@ -1044,7 +1058,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } - if (--packet_cnt <= 0) + packet_cnt -= TCP_SKB_CB(skb)->tso_factor; + if (packet_cnt <= 0) break; } } @@ -1073,17 +1088,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk) packet_cnt = 0; sk_stream_for_retrans_queue(skb, sk) { - if(++packet_cnt > tp->fackets_out) + int pkts = TCP_SKB_CB(skb)->tso_factor; + + packet_cnt += pkts; + if (packet_cnt > tcp_get_pcount(&tp->fackets_out)) break; - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) + if ((tcp_packets_in_flight(tp) + (pkts-1)) >= tp->snd_cwnd) break; - if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) + if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) continue; /* Ok, retransmit it. */ - if(tcp_retransmit_skb(sk, skb)) + if (tcp_retransmit_skb(sk, skb)) break; if (skb == skb_peek(&sk->sk_write_queue)) @@ -1101,13 +1119,13 @@ void tcp_send_fin(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue); - unsigned int mss_now; + int mss_now; /* Optimization, tack on the FIN if we have a queue of * unsent frames. But be careful about outgoing SACKS * and IP options. */ - mss_now = tcp_current_mss(sk, 1); + mss_now = tcp_current_mss(sk, 1, NULL); if (sk->sk_send_head != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; @@ -1127,6 +1145,7 @@ void tcp_send_fin(struct sock *sk) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tso_factor = 1; /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ TCP_SKB_CB(skb)->seq = tp->write_seq; @@ -1158,6 +1177,7 @@ void tcp_send_active_reset(struct sock *sk, int priority) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tso_factor = 1; /* Send it off. */ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); @@ -1237,6 +1257,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->dest = req->rmt_port; TCP_SKB_CB(skb)->seq = req->snt_isn; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; + TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tso_factor = 1; th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(req->rcv_isn + 1); if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ @@ -1338,6 +1360,7 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; TCP_ECN_send_syn(sk, tp, buff); TCP_SKB_CB(buff)->sacked = 0; + TCP_SKB_CB(buff)->tso_factor = 1; buff->csum = 0; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; @@ -1350,7 +1373,7 @@ int tcp_connect(struct sock *sk) tp->retrans_stamp = TCP_SKB_CB(buff)->when; __skb_queue_tail(&sk->sk_write_queue, buff); sk_charge_skb(sk, buff); - tp->packets_out++; + tcp_inc_pcount(&tp->packets_out, buff); tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); @@ -1437,6 +1460,7 @@ void tcp_send_ack(struct sock *sk) buff->csum = 0; TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(buff)->sacked = 0; + TCP_SKB_CB(buff)->tso_factor = 1; /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); @@ -1471,6 +1495,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) skb->csum = 0; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->sacked = urgent; + TCP_SKB_CB(skb)->tso_factor = 1; /* Use a previous sequence. This should cause the other * end to send an ack. Don't queue or clone SKB, just @@ -1491,7 +1516,7 @@ int tcp_write_wakeup(struct sock *sk) if ((skb = sk->sk_send_head) != NULL && before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { int err; - int mss = tcp_current_mss(sk, 0); + int mss = tcp_current_mss(sk, 0, NULL); int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) @@ -1513,6 +1538,7 @@ int tcp_write_wakeup(struct sock *sk) sk->sk_no_largesend = 1; sk->sk_route_caps &= ~NETIF_F_TSO; tp->mss_cache = tp->mss_cache_std; + tp->mss_tso_factor = 1; } } TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; @@ -1542,7 +1568,7 @@ void tcp_send_probe0(struct sock *sk) err = tcp_write_wakeup(sk); - if (tp->packets_out || !sk->sk_send_head) { + if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ tp->probes_out = 0; tp->backoff = 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 72a5a50b50ab..c060bb333471 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -121,7 +121,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) * 1. Last segment was sent recently. */ if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ - (!tp->snd_wnd && !tp->packets_out)) + (!tp->snd_wnd && !tcp_get_pcount(&tp->packets_out))) do_reset = 1; if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); @@ -269,7 +269,7 @@ static void tcp_probe_timer(struct sock *sk) struct tcp_opt *tp = tcp_sk(sk); int max_probes; - if (tp->packets_out || !sk->sk_send_head) { + if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) { tp->probes_out = 0; return; } @@ -316,7 +316,7 @@ static void tcp_retransmit_timer(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); - if (tp->packets_out == 0) + if (!tcp_get_pcount(&tp->packets_out)) goto out; BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue)); @@ -606,7 +606,7 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ - if (tp->packets_out || sk->sk_send_head) + if (tcp_get_pcount(&tp->packets_out) || sk->sk_send_head) goto resched; elapsed = tcp_time_stamp - tp->rcv_tstamp; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 73b34df7fd17..0ecd0d8dfa0f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1929,7 +1929,8 @@ static int tcp_v6_init_sock(struct sock *sk) */ tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache_std = tp->mss_cache = 536; + tp->mss_tso_factor = 1; tp->reordering = sysctl_tcp_reordering; -- cgit v1.2.3 From 95d267365e82205bada1b50fe699fb2284aa090e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Sep 2004 20:01:22 -0700 Subject: [TCP]: Calculate SKB tso factor more accurately. Eliminate tp->mss_tso_factor. Instead, we calculate the SKB tso factor as we walk the write queue for initial transmit or fragment SKBs. Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - include/net/tcp.h | 8 +++----- net/ipv4/tcp.c | 21 +++++++++------------ net/ipv4/tcp_input.c | 1 - net/ipv4/tcp_ipv4.c | 1 - net/ipv4/tcp_output.c | 49 +++++++++++++++++++++++++++++++++++-------------- net/ipv6/tcp_ipv6.c | 1 - 7 files changed, 47 insertions(+), 35 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ebf15b6a8162..86ca98c5ef8f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -254,7 +254,6 @@ struct tcp_opt { __u32 max_window; /* Maximal window ever seen from peer */ __u32 pmtu_cookie; /* Last pmtu seen by socket */ __u32 mss_cache; /* Cached effective mss, not including SACKS */ - __u32 mss_tso_factor; /* Real packets per TSO packet */ __u16 mss_cache_std; /* Like mss_cache, but without TSO */ __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ diff --git a/include/net/tcp.h b/include/net/tcp.h index efda37b84207..1de15c7a560a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1047,7 +1047,7 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long * is not a big flaw. */ -static inline unsigned int tcp_current_mss(struct sock *sk, int large, int *factor) +static inline unsigned int tcp_current_mss(struct sock *sk, int large) { struct tcp_opt *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); @@ -1057,8 +1057,6 @@ static inline unsigned int tcp_current_mss(struct sock *sk, int large, int *fact (sk->sk_route_caps & NETIF_F_TSO) && !tp->urg_mode); mss_now = do_large ? tp->mss_cache : tp->mss_cache_std; - if (factor) - *factor = do_large ? tp->mss_tso_factor : 1; if (dst) { u32 mtu = dst_pmtu(dst); @@ -1545,7 +1543,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk, static __inline__ void tcp_push_pending_frames(struct sock *sk, struct tcp_opt *tp) { - __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1, NULL), tp->nonagle); + __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); } static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) @@ -1553,7 +1551,7 @@ static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) struct sk_buff *skb = sk->sk_send_head; return (skb && - tcp_snd_test(tp, skb, tcp_current_mss(sk, 1, NULL), + tcp_snd_test(tp, skb, tcp_current_mss(sk, 1), tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle)); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9df826c8e22b..36953ef7e6c2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -590,14 +590,13 @@ static inline int forced_push(struct tcp_opt *tp) } static inline void skb_entail(struct sock *sk, struct tcp_opt *tp, - struct sk_buff *skb, int tso_factor) + struct sk_buff *skb) { skb->csum = 0; TCP_SKB_CB(skb)->seq = tp->write_seq; TCP_SKB_CB(skb)->end_seq = tp->write_seq; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->sacked = 0; - TCP_SKB_CB(skb)->tso_factor = tso_factor; __skb_queue_tail(&sk->sk_write_queue, skb); sk_charge_skb(sk, skb); if (!sk->sk_send_head) @@ -633,7 +632,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse size_t psize, int flags) { struct tcp_opt *tp = tcp_sk(sk); - int mss_now, mss_factor_now; + int mss_now; int err; ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -645,7 +644,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), &mss_factor_now); + mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); copied = 0; err = -EPIPE; @@ -669,7 +668,7 @@ new_segment: if (!skb) goto wait_for_memory; - skb_entail(sk, tp, skb, mss_factor_now); + skb_entail(sk, tp, skb); copy = mss_now; } @@ -720,8 +719,7 @@ wait_for_memory: if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), - &mss_factor_now); + mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); } out: @@ -782,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; int iovlen, flags; - int mss_now, mss_factor_now; + int mss_now; int err, copied; long timeo; @@ -800,7 +798,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* This should be in poll */ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), &mss_factor_now); + mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); /* Ok commence sending. */ iovlen = msg->msg_iovlen; @@ -845,7 +843,7 @@ new_segment: NETIF_F_HW_CSUM)) skb->ip_summed = CHECKSUM_HW; - skb_entail(sk, tp, skb, mss_factor_now); + skb_entail(sk, tp, skb); copy = mss_now; } @@ -964,8 +962,7 @@ wait_for_memory: if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), - &mss_factor_now); + mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f4ec16169906..d7fb3cde4f20 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -974,7 +974,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ sk->sk_route_caps &= ~NETIF_F_TSO; sk->sk_no_largesend = 1; tp->mss_cache = tp->mss_cache_std; - tp->mss_tso_factor = 1; } if (!tcp_get_pcount(&tp->sacked_out)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0fb326e84f28..73f12904c7c3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2076,7 +2076,6 @@ static int tcp_v4_init_sock(struct sock *sk) tp->snd_ssthresh = 0x7fffffff; /* Infinity */ tp->snd_cwnd_clamp = ~0; tp->mss_cache_std = tp->mss_cache = 536; - tp->mss_tso_factor = 1; tp->reordering = sysctl_tcp_reordering; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0a70d082028c..336c7121b6b6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -422,6 +422,23 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss) } } +static void tcp_set_skb_tso_factor(struct sk_buff *skb, unsigned int mss, + unsigned int mss_std) +{ + if (skb->len <= mss_std) { + /* Avoid the costly divide in the normal + * non-TSO case. + */ + TCP_SKB_CB(skb)->tso_factor = 1; + } else { + unsigned int factor; + + factor = skb->len + (mss_std - 1); + factor /= mss; + TCP_SKB_CB(skb)->tso_factor = factor; + } +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -457,7 +474,6 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) TCP_SKB_CB(buff)->sacked = (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); - TCP_SKB_CB(buff)->tso_factor = tp->mss_tso_factor; if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { tcp_inc_pcount(&tp->lost_out, buff); tcp_inc_pcount(&tp->left_out, buff); @@ -484,6 +500,10 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) */ TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; + /* Fix up tso_factor for both original and new SKB. */ + tcp_set_skb_tso_factor(skb, tp->mss_cache, tp->mss_cache_std); + tcp_set_skb_tso_factor(buff, tp->mss_cache, tp->mss_cache_std); + /* Link BUFF into the send queue. */ __skb_append(skb, buff); @@ -598,7 +618,6 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) /* And store cached results */ tp->pmtu_cookie = pmtu; tp->mss_cache = tp->mss_cache_std = mss_now; - tp->mss_tso_factor = 1; if (sk->sk_route_caps & NETIF_F_TSO) { int large_mss, factor; @@ -617,7 +636,6 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) factor = tp->snd_cwnd; tp->mss_cache = mss_now * factor; - tp->mss_tso_factor = factor; } return mss_now; @@ -634,7 +652,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) int tcp_write_xmit(struct sock *sk, int nonagle) { struct tcp_opt *tp = tcp_sk(sk); - unsigned int mss_now; + unsigned int mss_now, mss_std; /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and all @@ -649,7 +667,8 @@ int tcp_write_xmit(struct sock *sk, int nonagle) * We also handle things correctly when the user adds some * IP options mid-stream. Silly to do, but cover it. */ - mss_now = tcp_current_mss(sk, 1, NULL); + mss_now = tcp_current_mss(sk, 1); + mss_std = tp->mss_cache_std; while ((skb = sk->sk_send_head) && tcp_snd_test(tp, skb, mss_now, @@ -658,7 +677,8 @@ int tcp_write_xmit(struct sock *sk, int nonagle) if (skb->len > mss_now) { if (tcp_fragment(sk, skb, mss_now)) break; - } + } else + tcp_set_skb_tso_factor(skb, mss_now, mss_std); TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) @@ -871,7 +891,7 @@ void tcp_simple_retransmit(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0, NULL); + unsigned int mss = tcp_current_mss(sk, 0); int lost = 0; sk_stream_for_retrans_queue(skb, sk) { @@ -916,7 +936,7 @@ void tcp_simple_retransmit(struct sock *sk) int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_opt *tp = tcp_sk(sk); - unsigned int cur_mss = tcp_current_mss(sk, 0, NULL); + unsigned int cur_mss = tcp_current_mss(sk, 0); int err; /* Do not sent more than we queued. 1/4 is reserved for possible @@ -934,7 +954,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) sk->sk_route_caps &= ~NETIF_F_TSO; sk->sk_no_largesend = 1; tp->mss_cache = tp->mss_cache_std; - tp->mss_tso_factor = 1; } if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) @@ -1125,7 +1144,7 @@ void tcp_send_fin(struct sock *sk) * unsent frames. But be careful about outgoing SACKS * and IP options. */ - mss_now = tcp_current_mss(sk, 1, NULL); + mss_now = tcp_current_mss(sk, 1); if (sk->sk_send_head != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; @@ -1516,8 +1535,9 @@ int tcp_write_wakeup(struct sock *sk) if ((skb = sk->sk_send_head) != NULL && before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { int err; - int mss = tcp_current_mss(sk, 0, NULL); - int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; + unsigned int mss = tcp_current_mss(sk, 0); + unsigned int mss_std = tp->mss_cache_std; + unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; @@ -1538,9 +1558,10 @@ int tcp_write_wakeup(struct sock *sk) sk->sk_no_largesend = 1; sk->sk_route_caps &= ~NETIF_F_TSO; tp->mss_cache = tp->mss_cache_std; - tp->mss_tso_factor = 1; } - } + } else + tcp_set_skb_tso_factor(skb, mss, mss_std); + TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0ecd0d8dfa0f..ebed7e197aac 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1930,7 +1930,6 @@ static int tcp_v6_init_sock(struct sock *sk) tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_clamp = ~0; tp->mss_cache_std = tp->mss_cache = 536; - tp->mss_tso_factor = 1; tp->reordering = sysctl_tcp_reordering; -- cgit v1.2.3 From 5a6bdc92894c920dcc7fcf7010d0eb05de2e3d21 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Sep 2004 21:03:16 -0700 Subject: [TCP]: Make sure SKB tso factor is setup early enough. It needs to be set so that congestion window calculations have a valid value to work with. This means that doing it at write queue running time is too late. Signed-off-by: David S. Miller --- include/net/tcp.h | 7 +++++++ net/ipv4/tcp_output.c | 19 ++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1de15c7a560a..1a8a317f2bd5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1469,6 +1469,8 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n tcp_minshall_check(tp)))); } +extern void tcp_set_skb_tso_factor(struct sk_buff *, unsigned int, unsigned int); + /* This checks if the data bearing packet SKB (usually sk->sk_send_head) * should be put on the wire right now. */ @@ -1477,6 +1479,11 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, { int pkts = TCP_SKB_CB(skb)->tso_factor; + if (!pkts) { + tcp_set_skb_tso_factor(skb, cur_mss, tp->mss_cache_std); + pkts = TCP_SKB_CB(skb)->tso_factor; + } + /* RFC 1122 - section 4.2.3.4 * * We must queue if diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 336c7121b6b6..32174549304e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -422,8 +422,8 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss) } } -static void tcp_set_skb_tso_factor(struct sk_buff *skb, unsigned int mss, - unsigned int mss_std) +void tcp_set_skb_tso_factor(struct sk_buff *skb, unsigned int mss, + unsigned int mss_std) { if (skb->len <= mss_std) { /* Avoid the costly divide in the normal @@ -652,7 +652,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) int tcp_write_xmit(struct sock *sk, int nonagle) { struct tcp_opt *tp = tcp_sk(sk); - unsigned int mss_now, mss_std; + unsigned int mss_now; /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and all @@ -668,7 +668,6 @@ int tcp_write_xmit(struct sock *sk, int nonagle) * IP options mid-stream. Silly to do, but cover it. */ mss_now = tcp_current_mss(sk, 1); - mss_std = tp->mss_cache_std; while ((skb = sk->sk_send_head) && tcp_snd_test(tp, skb, mss_now, @@ -677,8 +676,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) if (skb->len > mss_now) { if (tcp_fragment(sk, skb, mss_now)) break; - } else - tcp_set_skb_tso_factor(skb, mss_now, mss_std); + } TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) @@ -1059,6 +1057,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) __u8 sacked = TCP_SKB_CB(skb)->sacked; int pkts = TCP_SKB_CB(skb)->tso_factor; + BUG_ON(!pkts); + if ((tcp_packets_in_flight(tp) + (pkts-1)) >= tp->snd_cwnd) return; @@ -1109,6 +1109,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) sk_stream_for_retrans_queue(skb, sk) { int pkts = TCP_SKB_CB(skb)->tso_factor; + BUG_ON(!pkts); + packet_cnt += pkts; if (packet_cnt > tcp_get_pcount(&tp->fackets_out)) break; @@ -1536,7 +1538,6 @@ int tcp_write_wakeup(struct sock *sk) before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { int err; unsigned int mss = tcp_current_mss(sk, 0); - unsigned int mss_std = tp->mss_cache_std; unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) @@ -1559,8 +1560,8 @@ int tcp_write_wakeup(struct sock *sk) sk->sk_route_caps &= ~NETIF_F_TSO; tp->mss_cache = tp->mss_cache_std; } - } else - tcp_set_skb_tso_factor(skb, mss, mss_std); + } else if (!TCP_SKB_CB(skb)->tso_factor) + tcp_set_skb_tso_factor(skb, mss, tp->mss_cache_std); TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; -- cgit v1.2.3 From f77bdc6fc029002e55a99ada37f9b38854cceef3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 6 Sep 2004 22:55:45 -0700 Subject: [NET]: Fix CONFIG_COMPAT build with networking disabled. Signed-off-by: David S. Miller --- fs/compat_ioctl.c | 10 ++++++---- net/Makefile | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index dc1a2d2c41d5..a493b5b5871b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -407,6 +408,7 @@ out: return err; } +#ifdef CONFIG_NET static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg) { struct compat_timeval __user *up = compat_ptr(arg); @@ -461,7 +463,6 @@ struct ifconf32 { compat_caddr_t ifcbuf; }; -#ifdef CONFIG_NET static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) { struct net_device *dev; @@ -481,7 +482,6 @@ static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32)); return (err ? -EFAULT : 0); } -#endif static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) { @@ -797,6 +797,7 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) return ret; } +#endif struct hd_geometry32 { unsigned char heads; @@ -1872,7 +1873,8 @@ static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg) return -EINVAL; } -static int ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) +static __attribute_used__ int +ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) { return -EINVAL; } @@ -3162,7 +3164,6 @@ HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) #ifdef CONFIG_NET HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32) -#endif HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf) HANDLE_IOCTL(SIOCGIFFLAGS, dev_ifsioc) HANDLE_IOCTL(SIOCSIFFLAGS, dev_ifsioc) @@ -3206,6 +3207,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc) /* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */ HANDLE_IOCTL(SIOCRTMSG, ret_einval) HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp) +#endif HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo) HANDLE_IOCTL(BLKRAGET, w_long) HANDLE_IOCTL(BLKGETSIZE, w_long) diff --git a/net/Makefile b/net/Makefile index 61740b47a67d..a46436e0fcc2 100644 --- a/net/Makefile +++ b/net/Makefile @@ -9,7 +9,8 @@ obj-y := nonet.o obj-$(CONFIG_NET) := socket.o core/ -obj-$(CONFIG_COMPAT) += compat.o +tmp-$(CONFIG_COMPAT) := compat.o +obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -- cgit v1.2.3 From 6cb553d2ed53c92ac9e845abb0b383ef0914d924 Mon Sep 17 00:00:00 2001 From: Eric Lemoine Date: Mon, 6 Sep 2004 23:18:30 -0700 Subject: [SUNGEM]: Add NAPI support. Signed-off-by: David S. Miller --- drivers/net/sungem.c | 137 ++++++++++++++++++++++++++++++++++++++++----------- drivers/net/sungem.h | 4 ++ 2 files changed, 111 insertions(+), 30 deletions(-) diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 72138612d2fb..6177397c30cb 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -181,6 +181,18 @@ static inline void phy_write(struct gem *gp, int reg, u16 val) __phy_write(gp, gp->mii_phy_addr, reg, val); } +static inline void gem_enable_ints(struct gem *gp) +{ + /* Enable all interrupts but TXDONE */ + writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK); +} + +static inline void gem_disable_ints(struct gem *gp) +{ + /* Disable all interrupts, including TXDONE */ + writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK); +} + static void gem_handle_mif_event(struct gem *gp, u32 reg_val, u32 changed_bits) { if (netif_msg_intr(gp)) @@ -639,7 +651,7 @@ static __inline__ void gem_tx(struct net_device *dev, struct gem *gp, u32 gem_st } gp->net_stats.tx_packets++; - dev_kfree_skb_irq(skb); + dev_kfree_skb(skb); } gp->tx_old = entry; @@ -678,12 +690,12 @@ static __inline__ void gem_post_rxds(struct gem *gp, int limit) } } -static void gem_rx(struct gem *gp) +static int gem_rx(struct gem *gp, int work_to_do) { - int entry, drops; + int entry, drops, work_done = 0; u32 done; - if (netif_msg_intr(gp)) + if (netif_msg_rx_status(gp)) printk(KERN_DEBUG "%s: rx interrupt, done: %d, rx_new: %d\n", gp->dev->name, readl(gp->regs + RXDMA_DONE), gp->rx_new); @@ -700,6 +712,9 @@ static void gem_rx(struct gem *gp) if ((status & RXDCTRL_OWN) != 0) break; + if (work_done >= RX_RING_SIZE || work_done >= work_to_do) + break; + /* When writing back RX descriptor, GEM writes status * then buffer address, possibly in seperate transactions. * If we don't wait for the chip to write both, we could @@ -713,6 +728,9 @@ static void gem_rx(struct gem *gp) break; } + /* We can now account for the work we're about to do */ + work_done++; + skb = gp->rx_skbs[entry]; len = (status & RXDCTRL_BUFSZ) >> 16; @@ -775,7 +793,8 @@ static void gem_rx(struct gem *gp) skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff); skb->ip_summed = CHECKSUM_HW; skb->protocol = eth_type_trans(skb, gp->dev); - netif_rx(skb); + + netif_receive_skb(skb); gp->net_stats.rx_packets++; gp->net_stats.rx_bytes += len; @@ -792,32 +811,88 @@ static void gem_rx(struct gem *gp) if (drops) printk(KERN_INFO "%s: Memory squeeze, deferring packet.\n", gp->dev->name); + + return work_done; +} + +static int gem_poll(struct net_device *dev, int *budget) +{ + struct gem *gp = dev->priv; + unsigned long flags; + + spin_lock_irqsave(&gp->lock, flags); + + do { + int work_to_do, work_done; + + /* Handle anomalies */ + if (gp->status & GREG_STAT_ABNORMAL) { + if (gem_abnormal_irq(dev, gp, gp->status)) + break; + } + + /* Run TX completion thread */ + gem_tx(dev, gp, gp->status); + + spin_unlock_irqrestore(&gp->lock, flags); + + /* Run RX thread. We don't use any locking here, + * code willing to do bad things - like cleaning the + * rx ring - must call netif_poll_disable(), which + * schedule_timeout()'s if polling is already disabled. + */ + work_to_do = min(*budget, dev->quota); + + work_done = gem_rx(gp, work_to_do); + + *budget -= work_done; + dev->quota -= work_done; + + if (work_done >= work_to_do) + return 1; + + spin_lock_irqsave(&gp->lock, flags); + + gp->status = readl(gp->regs + GREG_STAT); + } while (gp->status & GREG_STAT_NAPI); + + __netif_rx_complete(dev); + gem_enable_ints(gp); + + spin_unlock_irqrestore(&gp->lock, flags); + return 0; } static irqreturn_t gem_interrupt(int irq, void *dev_id, struct pt_regs *regs) { struct net_device *dev = dev_id; struct gem *gp = dev->priv; - u32 gem_status = readl(gp->regs + GREG_STAT); + unsigned long flags; /* Swallow interrupts when shutting the chip down */ - if (gp->hw_running == 0) - goto out; + if (!gp->hw_running) + return IRQ_HANDLED; - spin_lock(&gp->lock); + spin_lock_irqsave(&gp->lock, flags); + + if (netif_rx_schedule_prep(dev)) { + u32 gem_status = readl(gp->regs + GREG_STAT); - if (gem_status & GREG_STAT_ABNORMAL) { - if (gem_abnormal_irq(dev, gp, gem_status)) - goto out_unlock; + if (gem_status == 0) { + spin_unlock_irqrestore(&gp->lock, flags); + return IRQ_NONE; + } + gp->status = gem_status; + gem_disable_ints(gp); + __netif_rx_schedule(dev); } - if (gem_status & (GREG_STAT_TXALL | GREG_STAT_TXINTME)) - gem_tx(dev, gp, gem_status); - if (gem_status & GREG_STAT_RXDONE) - gem_rx(gp); -out_unlock: - spin_unlock(&gp->lock); -out: + spin_unlock_irqrestore(&gp->lock, flags); + + /* If polling was disabled at the time we received that + * interrupt, we may return IRQ_HANDLED here while we + * should return IRQ_NONE. No big deal... + */ return IRQ_HANDLED; } @@ -1312,19 +1387,12 @@ static void gem_reset_task(void *data) { struct gem *gp = (struct gem *) data; - /* The link went down, we reset the ring, but keep - * DMA stopped. Todo: Use this function for reset - * on error as well. - */ - + netif_poll_disable(gp->dev); spin_lock_irq(&gp->lock); if (gp->hw_running && gp->opened) { - /* Make sure we don't get interrupts or tx packets */ netif_stop_queue(gp->dev); - writel(0xffffffff, gp->regs + GREG_IMASK); - /* Reset the chip & rings */ gem_stop(gp); gem_init_rings(gp); @@ -1337,6 +1405,7 @@ static void gem_reset_task(void *data) gp->reset_task_pending = 0; spin_unlock_irq(&gp->lock); + netif_poll_enable(gp->dev); } static void gem_link_timer(unsigned long data) @@ -2214,11 +2283,15 @@ static int gem_close(struct net_device *dev) /* Make sure we don't get distracted by suspend/resume */ down(&gp->pm_sem); + /* Note: we don't need to call netif_poll_disable() here because + * our caller (dev_close) already did it for us + */ + /* Stop traffic, mark us closed */ spin_lock_irq(&gp->lock); gp->opened = 0; - writel(0xffffffff, gp->regs + GREG_IMASK); + netif_stop_queue(dev); /* Stop chip */ @@ -2247,6 +2320,8 @@ static int gem_suspend(struct pci_dev *pdev, u32 state) struct net_device *dev = pci_get_drvdata(pdev); struct gem *gp = dev->priv; + netif_poll_disable(dev); + /* We hold the PM semaphore during entire driver * sleep time */ @@ -2262,8 +2337,6 @@ static int gem_suspend(struct pci_dev *pdev, u32 state) /* Stop traffic, mark us closed */ netif_device_detach(dev); - writel(0xffffffff, gp->regs + GREG_IMASK); - /* Stop chip */ gem_stop(gp); @@ -2317,6 +2390,8 @@ static int gem_resume(struct pci_dev *pdev) } up(&gp->pm_sem); + netif_poll_enable(dev); + return 0; } #endif /* CONFIG_PM */ @@ -2806,6 +2881,8 @@ static int __devinit gem_init_one(struct pci_dev *pdev, dev->get_stats = gem_get_stats; dev->set_multicast_list = gem_set_multicast; dev->do_ioctl = gem_ioctl; + dev->poll = gem_poll; + dev->weight = 64; dev->ethtool_ops = &gem_ethtool_ops; dev->tx_timeout = gem_tx_timeout; dev->watchdog_timeo = 5 * HZ; diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h index eed77bfe1b60..bc0175acb52e 100644 --- a/drivers/net/sungem.h +++ b/drivers/net/sungem.h @@ -60,6 +60,9 @@ GREG_STAT_PCS | GREG_STAT_TXMAC | GREG_STAT_RXMAC | \ GREG_STAT_MAC | GREG_STAT_MIF | GREG_STAT_PCIERR) +#define GREG_STAT_NAPI (GREG_STAT_TXALL | GREG_STAT_TXINTME | \ + GREG_STAT_RXDONE | GREG_STAT_ABNORMAL) + /* The layout of GREG_IMASK and GREG_IACK is identical to GREG_STAT. * Bits set in GREG_IMASK will prevent that interrupt type from being * signalled to the cpu. GREG_IACK can be used to clear specific top-level @@ -969,6 +972,7 @@ struct gem { struct sk_buff *tx_skbs[RX_RING_SIZE]; u32 msg_enable; + u32 status; struct net_device_stats net_stats; -- cgit v1.2.3 From 2f122062ec392241d0f0453423284861934acb94 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 6 Sep 2004 23:21:21 -0700 Subject: [AX25]: Fix digipeat leak. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 47fbd98e0e81..3a84182f4474 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1176,13 +1176,16 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, /* check if we can remove this feature. It is broken. */ printk(KERN_WARNING "ax25_connect(): %s uses autobind, please contact jreuter@yaina.de\n", current->comm); - if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) + if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) { + kfree(digi); goto out; + } ax25_fillin_cb(ax25, ax25->ax25_dev); ax25_cb_add(ax25); } else { if (ax25->ax25_dev == NULL) { + kfree(digi); err = -EHOSTUNREACH; goto out; } @@ -1191,8 +1194,7 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_type == SOCK_SEQPACKET && (ax25t=ax25_find_cb(&ax25->source_addr, &fsa->fsa_ax25.sax25_call, digi, ax25->ax25_dev->dev))) { - if (digi != NULL) - kfree(digi); + kfree(digi); err = -EADDRINUSE; /* Already such a connection */ ax25_cb_put(ax25t); goto out; -- cgit v1.2.3 From 0c5b8d8a0c82e3ec85588a306f9ae1df55706e4f Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 6 Sep 2004 23:24:36 -0700 Subject: [PACKET]: Fix deref before NULL check in packet_release() Using the automated source checker at coverity.com, they picked up on some code in packet_release() where a NULL check was done after dereferencing. Patch below. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 51d0514fd2a7..1b441a628b71 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -786,11 +786,13 @@ out: static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; - struct packet_opt *po = pkt_sk(sk); + struct packet_opt *po; if (!sk) return 0; + po = pkt_sk(sk); + write_lock_bh(&packet_sklist_lock); sk_del_node_init(sk); write_unlock_bh(&packet_sklist_lock); -- cgit v1.2.3 From cb40262783a52f8b4e44519b33684b76b2fa07dc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 6 Sep 2004 23:35:09 -0700 Subject: [NET]: Fully plug netigh_create/inetdev_destroy race. So here is a patch to make sure that there is a barrier between the reading of dev->*_ptr and *dev->neigh_parms. With these barriers in place, it's clear that *dev->neigh_parms can no longer be NULL since once the parms are allocated, that pointer is never reset to NULL again. Therefore I've also removed the parms check in these paths. They were bogus to begin with since if they ever triggered then we'll have dead neigh entries stuck in the hash table. Unfortunately I couldn't arrange for this to happen with DECnet due to the dn_db->parms.up() call that's sandwiched between the assignment of dev->dn_ptr and dn_db->neigh_parms. So I've kept the parms check there but it will now fail instead of continuing. I've also added an smp_wmb() there so that at least we won't be reading garbage from dn_db->neigh_parms. DECnet is also buggy since there is no locking at all in the destruction path. It either needs locking or RCU like IPv4. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/s390/net/qeth_main.c | 8 +++----- net/atm/clip.c | 8 +++----- net/decnet/dn_dev.c | 2 ++ net/decnet/dn_neigh.c | 11 +++++++---- net/ipv4/arp.c | 8 +++----- net/ipv6/ndisc.c | 6 ++---- 6 files changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index d5285d105c65..a8e034b156cf 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -6718,17 +6718,15 @@ qeth_arp_constructor(struct neighbour *neigh) } rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = rcu_dereference(__in_dev_get(dev)); if (in_dev == NULL) { rcu_read_unlock(); return -EINVAL; } parms = in_dev->arp_parms; - if (parms) { - __neigh_parms_put(neigh->parms); - neigh->parms = neigh_parms_clone(parms); - } + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); neigh->type = inet_addr_type(*(u32 *) neigh->primary_key); diff --git a/net/atm/clip.c b/net/atm/clip.c index f7756e1f93ce..104dd4d19da4 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -320,17 +320,15 @@ static int clip_constructor(struct neighbour *neigh) if (neigh->type != RTN_UNICAST) return -EINVAL; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = rcu_dereference(__in_dev_get(dev)); if (!in_dev) { rcu_read_unlock(); return -EINVAL; } parms = in_dev->arp_parms; - if (parms) { - __neigh_parms_put(neigh->parms); - neigh->parms = neigh_parms_clone(parms); - } + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); neigh->ops = &clip_neigh_ops; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 733b1cf6c440..a21a326808b4 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -1108,6 +1109,7 @@ struct dn_dev *dn_dev_create(struct net_device *dev, int *err) memset(dn_db, 0, sizeof(struct dn_dev)); memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); + smp_wmb(); dev->dn_ptr = dn_db; dn_db->dev = dev; init_timer(&dn_db->timer); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index e874232ec54b..d3d6c592a5cb 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -139,17 +139,20 @@ static int dn_neigh_construct(struct neighbour *neigh) struct neigh_parms *parms; rcu_read_lock(); - dn_db = dev->dn_ptr; + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) { rcu_read_unlock(); return -EINVAL; } parms = dn_db->neigh_parms; - if (parms) { - __neigh_parms_put(neigh->parms); - neigh->parms = neigh_parms_clone(parms); + if (!parms) { + rcu_read_unlock(); + return -EINVAL; } + + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); if (dn_db->use_long) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f4e6a4a368ec..41e726ac3337 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -244,17 +244,15 @@ static int arp_constructor(struct neighbour *neigh) neigh->type = inet_addr_type(addr); rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = rcu_dereference(__in_dev_get(dev)); if (in_dev == NULL) { rcu_read_unlock(); return -EINVAL; } parms = in_dev->arp_parms; - if (parms) { - __neigh_parms_put(neigh->parms); - neigh->parms = neigh_parms_clone(parms); - } + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); if (dev->hard_header == NULL) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6d23ea909aca..e1f5aeb79258 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -297,10 +297,8 @@ static int ndisc_constructor(struct neighbour *neigh) } parms = in6_dev->nd_parms; - if (parms) { - __neigh_parms_put(neigh->parms); - neigh->parms = neigh_parms_clone(parms); - } + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; -- cgit v1.2.3 From 43da55cbd54ed79f38556b39facb89d06448a267 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 6 Sep 2004 23:36:36 -0700 Subject: [NET]: Do less atomic count changes in dev_queue_xmit. With suggestions from Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7a50c543e505..47b3d8497a5d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1249,17 +1249,17 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask) return 0; } -#define HARD_TX_LOCK_BH(dev, cpu) { \ +#define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - spin_lock_bh(&dev->xmit_lock); \ + spin_lock(&dev->xmit_lock); \ dev->xmit_lock_owner = cpu; \ } \ } -#define HARD_TX_UNLOCK_BH(dev) { \ +#define HARD_TX_UNLOCK(dev) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ dev->xmit_lock_owner = -1; \ - spin_unlock_bh(&dev->xmit_lock); \ + spin_unlock(&dev->xmit_lock); \ } \ } @@ -1313,7 +1313,12 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb_checksum_help(&skb, 0)) goto out_kfree_skb; - rcu_read_lock(); + + /* Disable soft irqs for various locks below. Also + * stops preemption for RCU. + */ + local_bh_disable(); + /* Updates of qdisc are serialized by queue_lock. * The struct Qdisc which is pointed to by qdisc is now a * rcu structure - it may be accessed without acquiring @@ -1332,18 +1337,16 @@ int dev_queue_xmit(struct sk_buff *skb) #endif if (q->enqueue) { /* Grab device queue */ - spin_lock_bh(&dev->queue_lock); + spin_lock(&dev->queue_lock); rc = q->enqueue(skb, q); qdisc_run(dev); - spin_unlock_bh(&dev->queue_lock); - rcu_read_unlock(); + spin_unlock(&dev->queue_lock); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; goto out; } - rcu_read_unlock(); /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... @@ -1358,12 +1361,11 @@ int dev_queue_xmit(struct sk_buff *skb) Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { - int cpu = get_cpu(); + int cpu = smp_processor_id(); /* ok because BHs are off */ if (dev->xmit_lock_owner != cpu) { - HARD_TX_LOCK_BH(dev, cpu); - put_cpu(); + HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev)) { if (netdev_nit) @@ -1371,17 +1373,16 @@ int dev_queue_xmit(struct sk_buff *skb) rc = 0; if (!dev->hard_start_xmit(skb, dev)) { - HARD_TX_UNLOCK_BH(dev); + HARD_TX_UNLOCK(dev); goto out; } } - HARD_TX_UNLOCK_BH(dev); + HARD_TX_UNLOCK(dev); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); goto out_enetdown; } else { - put_cpu(); /* Recursion is detected! It is possible, * unfortunately */ if (net_ratelimit()) @@ -1394,6 +1395,7 @@ out_enetdown: out_kfree_skb: kfree_skb(skb); out: + local_bh_enable(); return rc; } -- cgit v1.2.3 From f758b3aee98a3f74c3377431d9e44c016c91a4cb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 6 Sep 2004 23:38:22 -0700 Subject: [NET]: Move SOCK_foo types into linux/net.h Every arch defines them the same without exception and with this we only need to update one spot when adding new socket types. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/asm-alpha/socket.h | 16 ---------------- include/asm-arm/socket.h | 16 ---------------- include/asm-arm26/socket.h | 16 ---------------- include/asm-cris/socket.h | 15 --------------- include/asm-h8300/socket.h | 16 ---------------- include/asm-i386/socket.h | 16 ---------------- include/asm-ia64/socket.h | 16 ---------------- include/asm-m68k/socket.h | 16 ---------------- include/asm-mips/socket.h | 16 ---------------- include/asm-parisc/socket.h | 14 -------------- include/asm-ppc/socket.h | 16 ---------------- include/asm-ppc64/socket.h | 16 ---------------- include/asm-s390/socket.h | 16 ---------------- include/asm-sh/socket.h | 16 ---------------- include/asm-sparc/socket.h | 16 ---------------- include/asm-sparc64/socket.h | 16 ---------------- include/asm-v850/socket.h | 16 ---------------- include/asm-x86_64/socket.h | 16 ---------------- include/linux/net.h | 21 +++++++++++++++++++++ 19 files changed, 21 insertions(+), 285 deletions(-) diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h index 88912c4c8931..d00259d3dc78 100644 --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h @@ -55,20 +55,4 @@ #define SO_SECURITY_ENCRYPTION_TRANSPORT 20 #define SO_SECURITY_ENCRYPTION_NETWORK 21 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h index b05e717397a1..46d20585d951 100644 --- a/include/asm-arm/socket.h +++ b/include/asm-arm/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h index b05e717397a1..46d20585d951 100644 --- a/include/asm-arm26/socket.h +++ b/include/asm-arm26/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h index b4421808b676..f159b4f165f7 100644 --- a/include/asm-cris/socket.h +++ b/include/asm-cris/socket.h @@ -49,21 +49,6 @@ #define SO_PEERSEC 31 -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h index 070d46d2344e..af33b8525dcf 100644 --- a/include/asm-h8300/socket.h +++ b/include/asm-h8300/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 711f906193fd..07f6b38ad140 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nasty libc5 fixup - bletch */ -#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h index bf4434e26a81..21a9f10d6baa 100644 --- a/include/asm-ia64/socket.h +++ b/include/asm-ia64/socket.h @@ -56,20 +56,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_IA64_SOCKET_H */ diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h index 68a33bfbae03..8d0b9fc2d07e 100644 --- a/include/asm-m68k/socket.h +++ b/include/asm-m68k/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h index 6556c10be8f0..855b86f3ea0e 100644 --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h @@ -68,20 +68,4 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_PEERSEC 30 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_DGRAM 1 /* datagram (conn.less) socket */ -#define SOCK_STREAM 2 /* stream (connection) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h index fd3f0f29eb3c..4a77996c1862 100644 --- a/include/asm-parisc/socket.h +++ b/include/asm-parisc/socket.h @@ -47,18 +47,4 @@ #define SO_PEERSEC 0x401d -#if defined(__KERNEL__) -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h index bad94c36f1b8..4134376b0f66 100644 --- a/include/asm-ppc/socket.h +++ b/include/asm-ppc/socket.h @@ -53,20 +53,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif /* __KERNEL__ */ - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h index 1021a5268346..59e00dfc8b8e 100644 --- a/include/asm-ppc64/socket.h +++ b/include/asm-ppc64/socket.h @@ -54,20 +54,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h index 1855ec7a112b..0e96eeca4e6b 100644 --- a/include/asm-s390/socket.h +++ b/include/asm-s390/socket.h @@ -55,20 +55,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h index 2b9469472f76..dde696c3b4c7 100644 --- a/include/asm-sh/socket.h +++ b/include/asm-sh/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* __ASM_SH_SOCKET_H */ diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h index cfa529d9bb96..c1154e3ecfdf 100644 --- a/include/asm-sparc/socket.h +++ b/include/asm-sparc/socket.h @@ -52,20 +52,4 @@ #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_NETWORK 0x5004 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h index ba2230ba1d7d..865547a23908 100644 --- a/include/asm-sparc64/socket.h +++ b/include/asm-sparc64/socket.h @@ -52,20 +52,4 @@ #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_NETWORK 0x5004 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h index 7d7f80efa8b3..213b852af53e 100644 --- a/include/asm-v850/socket.h +++ b/include/asm-v850/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* __V850_SOCKET_H__ */ diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h index 373e7abadfb5..d9a252ea8210 100644 --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nasty libc5 fixup - bletch */ -#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/linux/net.h b/include/linux/net.h index 80e7fec727e3..0f710b7e4121 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -61,6 +61,27 @@ typedef enum { #define SOCK_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 +/** sock_type - Socket types + * + * @SOCK_STREAM - stream (connection) socket + * @SOCK_DGRAM - datagram (conn.less) socket + * @SOCK_RAW - raw socket + * @SOCK_RDM - reliably-delivered message + * @SOCK_SEQPACKET - sequential packet socket + * @SOCK_PACKET - linux specific way of getting packets at the dev level. + * For writing rarp and other similar things on the user level. + */ +enum sock_type { + SOCK_STREAM = 1, + SOCK_DGRAM = 2, + SOCK_RAW = 3, + SOCK_RDM = 4, + SOCK_SEQPACKET = 5, + SOCK_PACKET = 10, +}; + +#define SOCK_MAX (SOCK_PACKET + 1) + /** * struct socket - general BSD socket * @state - socket state (%SS_CONNECTED, etc) -- cgit v1.2.3 From e31cd2a33e417cd7e9328dfa32449223c968297c Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 6 Sep 2004 23:40:52 -0700 Subject: [PKT_SCHED]: Fix gact compile warnings. Signed-off-by: David S. Miller --- net/sched/gact.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/gact.c b/net/sched/gact.c index cd1a58c60485..5607f5e8cd83 100644 --- a/net/sched/gact.c +++ b/net/sched/gact.c @@ -76,7 +76,9 @@ tcf_gact_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,int ov { struct rtattr *tb[TCA_GACT_MAX]; struct tc_gact *parm = NULL; +#ifdef CONFIG_GACT_PROB struct tc_gact_p *p_parm = NULL; +#endif struct tcf_gact *p = NULL; int ret = 0; int size = sizeof (*p); @@ -176,7 +178,9 @@ tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; struct tc_gact opt; +#ifdef CONFIG_GACT_PROB struct tc_gact_p p_opt; +#endif struct tcf_gact *p; struct tcf_t t; -- cgit v1.2.3 From a88031e42f7a3066c2cf73846ebb8c6d48d33cc9 Mon Sep 17 00:00:00 2001 From: Wensong Zhang Date: Mon, 6 Sep 2004 23:43:52 -0700 Subject: [IPVS] fixed to call nf_reset() to reset netfilter related fields Recommended by Harald Welte Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_xmit.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 3a85f7a8d02a..9a8f051208d1 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -127,6 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) #define IP_VS_XMIT(skb, rt) \ do { \ + nf_reset(skb); \ (skb)->nfcache |= NFC_IPVS_PROPERTY; \ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ @@ -201,9 +202,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ IP_VS_XMIT(skb, rt); LeaveFunction(10); @@ -280,9 +278,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ IP_VS_XMIT(skb, rt); LeaveFunction(10); @@ -418,10 +413,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ - IP_VS_XMIT(skb, rt); LeaveFunction(10); @@ -480,9 +471,6 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ IP_VS_XMIT(skb, rt); LeaveFunction(10); @@ -557,9 +545,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ IP_VS_XMIT(skb, rt); rc = NF_STOLEN; -- cgit v1.2.3 From fc2520bc1110209f84131c8ca2a6a7168385d46f Mon Sep 17 00:00:00 2001 From: Wensong Zhang Date: Mon, 6 Sep 2004 23:47:50 -0700 Subject: [IPVS] add the MAINTAINERS entry Signed-off-by: David S. Miller --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 629e0a82aafd..56d12e09cddd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1548,6 +1548,14 @@ M: kaber@coreworks.de L: netdev@oss.sgi.com S: Maintained +IPVS +P: Wensong Zhang +M: wensong@linux-vs.org +P: Julian Anastasov +M: ja@ssi.bg +L: lvs-users@linuxvirtualserver.org +S: Maintained + NFS CLIENT P: Trond Myklebust M: trond.myklebust@fys.uio.no -- cgit v1.2.3 From 7ad9cf1c3ef14cd0cf165dcf76d52d25855ea15b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 6 Sep 2004 23:49:43 -0700 Subject: [BRIDGE]: deadlock on device removal Fixes: https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=131569 Dead lock in bridge when removing device interface module. br_del_if assumes br->lock not held. This fixes case of: brctl addbr b0 brctl addif b0 eth0 rmmod eth0 Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_notify.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index d0702fbcb21d..f8fb49e34764 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -76,10 +76,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; case NETDEV_UNREGISTER: + spin_unlock_bh(&br->lock); br_del_if(br, dev); - break; + goto done; } spin_unlock_bh(&br->lock); + done: return NOTIFY_DONE; } -- cgit v1.2.3 From aeb2cc38c4d50ca1fc4a8815c7eba8bb8eb14059 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 6 Sep 2004 23:50:58 -0700 Subject: [NET]: Fix compat layer setsockopt overzealous conversions. compat_sys_setsockopt() is a little overzealous about converting 32-bit stuff into 64-bit. It should match on level _and_ optname, not just optname. Currently it eats the IPV6_V6ONLY sockopt because its value (26) happens to match SO_ATTACH_FILTER. This makes it at least check 'level' for everything but IPT_SO_SET_REPLACE == IPT6_SO_SET_REPLACE, because that does seem to be the same in different levels. But do_netfilter_replace() is another can of worms entirely -- it doesn't actually work either, because some netfilter modules (like ipt_limit) include kernel-only bits which change size in the structure they share with userspace. Signed-off-by: David S. Miller --- net/compat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/compat.c b/net/compat.c index 998b21b65363..6080b6439b96 100644 --- a/net/compat.c +++ b/net/compat.c @@ -455,13 +455,15 @@ static int do_set_sock_timeout(int fd, int level, int optname, char __user *optv asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen) { + /* SO_SET_REPLACE seems to be the same in all levels */ if (optname == IPT_SO_SET_REPLACE) return do_netfilter_replace(fd, level, optname, optval, optlen); - if (optname == SO_ATTACH_FILTER) + if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER) return do_set_attach_filter(fd, level, optname, optval, optlen); - if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) + if (level == SOL_SOCKET && + (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) return do_set_sock_timeout(fd, level, optname, optval, optlen); return sys_setsockopt(fd, level, optname, optval, optlen); -- cgit v1.2.3 From d81b34dd264c876be4b8fc86bc9754e2b566a6a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Sep 2004 23:52:08 -0700 Subject: [IPV4]: Mark inet_family_ops static Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 +-- net/sctp/protocol.c | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ed2923791e0e..c859b31fd0f1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -837,7 +837,7 @@ struct proto_ops inet_dgram_ops = { .sendpage = inet_sendpage, }; -struct net_proto_family inet_family_ops = { +static struct net_proto_family inet_family_ops = { .family = PF_INET, .create = inet_create, .owner = THIS_MODULE, @@ -1157,7 +1157,6 @@ EXPORT_SYMBOL(inet_accept); EXPORT_SYMBOL(inet_bind); EXPORT_SYMBOL(inet_dgram_connect); EXPORT_SYMBOL(inet_dgram_ops); -EXPORT_SYMBOL(inet_family_ops); EXPORT_SYMBOL(inet_getname); EXPORT_SYMBOL(inet_ioctl); EXPORT_SYMBOL(inet_listen); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index bae07708eb01..97f66fd770f4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -81,8 +81,6 @@ static struct sctp_af *sctp_af_v6_specific; kmem_cache_t *sctp_chunk_cachep; kmem_cache_t *sctp_bucket_cachep; -extern struct net_proto_family inet_family_ops; - extern int sctp_snmp_proc_init(void); extern int sctp_snmp_proc_exit(void); extern int sctp_eps_proc_init(void); -- cgit v1.2.3 From 50e2daaaae756c25216e2274acb61386f8f33a31 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Sep 2004 23:53:00 -0700 Subject: [NET]: Unexport {alloc,free}_divert_blk() these are called by dev.c for every device (and nowhere else) Signed-off-by: David S. Miller --- net/core/dv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/dv.c b/net/core/dv.c index c1340cc53b75..f8e3f9c6b282 100644 --- a/net/core/dv.c +++ b/net/core/dv.c @@ -553,6 +553,3 @@ void divert_frame(struct sk_buff *skb) break; } } - -EXPORT_SYMBOL(alloc_divert_blk); -EXPORT_SYMBOL(free_divert_blk); -- cgit v1.2.3 From 5c96c965be27556d2345a9534e36ff0e93604cec Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 7 Sep 2004 00:18:46 -0700 Subject: [NETFILTER]: Fix build error with CONFIG_SYSCTL disabled. ip_ct_log_invalid was added without testing that it compiles without CONFIG_SYSCTL. Since sysctl is the only way of turning it on, there should be no references to it if \!CONFIG_SYSCTL. Also, that turns off CONFIG_PROC_FS, which elicits more warnings. Squish them too. Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- .../linux/netfilter_ipv4/ip_conntrack_protocol.h | 4 ++++ net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 2 +- net/ipv4/netfilter/ip_conntrack_standalone.c | 21 +++++++++++++++------ net/ipv4/netfilter/ip_queue.c | 2 ++ 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index 55d57404acb8..6edb801fa51f 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -75,6 +75,7 @@ extern int ip_conntrack_protocol_tcp_init(void); /* Log invalid packets */ extern unsigned int ip_ct_log_invalid; +#ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS #define LOG_INVALID(proto) \ (ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) @@ -83,5 +84,8 @@ extern unsigned int ip_ct_log_invalid; ((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \ && net_ratelimit()) #endif +#else +#define LOG_INVALID(proto) 0 +#endif /* CONFIG_SYSCTL */ #endif /*_IP_CONNTRACK_PROTOCOL_H*/ diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 64755c5aed6e..3e51036e5065 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -623,8 +623,8 @@ int __init init(void) return ret; - cleanup: #ifdef CONFIG_SYSCTL + cleanup: ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp); #endif out: diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ceff26dbff47..f4c3899771c4 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -54,6 +54,7 @@ static int kill_proto(const struct ip_conntrack *i, void *data) *((u_int8_t *) data)); } +#ifdef CONFIG_PROC_FS static unsigned int print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple, struct ip_conntrack_protocol *proto) @@ -367,6 +368,7 @@ static struct file_operations ct_cpu_seq_fops = { .llseek = seq_lseek, .release = seq_release_private, }; +#endif static unsigned int ip_confirm(unsigned int hooknum, struct sk_buff **pskb, @@ -726,10 +728,15 @@ static ctl_table ip_ct_net_table[] = { }, { .ctl_name = 0 } }; -#endif + +EXPORT_SYMBOL(ip_ct_log_invalid); +#endif /* CONFIG_SYSCTL */ + static int init_or_cleanup(int init) { +#ifdef CONFIG_PROC_FS struct proc_dir_entry *proc, *proc_exp, *proc_stat; +#endif int ret = 0; if (!init) goto cleanup; @@ -738,19 +745,20 @@ static int init_or_cleanup(int init) if (ret < 0) goto cleanup_nothing; - proc = proc_net_create("ip_conntrack", 0440, NULL); +#ifdef CONFIG_PROC_FS + proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc->proc_fops = &ct_file_ops; - proc_exp = proc_net_create("ip_conntrack_expect", 0440, NULL); + proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + &exp_file_ops); if (!proc_exp) goto cleanup_proc; - proc_exp->proc_fops = &exp_file_ops; proc_stat = proc_net_fops_create("ip_conntrack_stat", S_IRUGO, &ct_cpu_seq_fops); if (!proc_stat) goto cleanup_proc_exp; proc_stat->owner = THIS_MODULE; +#endif ret = nf_register_hook(&ip_conntrack_defrag_ops); if (ret < 0) { @@ -814,12 +822,14 @@ static int init_or_cleanup(int init) local_bh_enable(); nf_unregister_hook(&ip_conntrack_defrag_ops); cleanup_proc_stat: +#ifdef CONFIG_PROC_FS proc_net_remove("ip_conntrack_stat"); cleanup_proc_exp: proc_net_remove("ip_conntrack_exp"); cleanup_proc: proc_net_remove("ip_conntrack"); cleanup_init: +#endif /* CONFIG_PROC_FS */ ip_conntrack_cleanup(); cleanup_nothing: return ret; @@ -912,4 +922,3 @@ EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_put); -EXPORT_SYMBOL(ip_ct_log_invalid); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 906b89df2f19..26dca38f692a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -619,6 +619,7 @@ static ctl_table ipq_root_table[] = { { .ctl_name = 0 } }; +#ifdef CONFIG_PROC_FS static int ipq_get_info(char *buffer, char **start, off_t offset, int length) { @@ -648,6 +649,7 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) len = 0; return len; } +#endif /* CONFIG_PROC_FS */ static int init_or_cleanup(int init) -- cgit v1.2.3 From ef31c0c83addad073e6fda9885f0196b9308b6e3 Mon Sep 17 00:00:00 2001 From: Pozsar Balazs Date: Tue, 7 Sep 2004 00:20:26 -0700 Subject: [PKT_SCHED]: Add missing MODULE_LICENSE. Signed-off-by: Pozsar Balazs Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index fe530156875a..ff61f8e698c9 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -714,3 +714,4 @@ static void __exit atm_exit(void) module_init(atm_init) module_exit(atm_exit) +MODULE_LICENSE("GPL"); -- cgit v1.2.3