From 0b58a811461ccf3cf848aba4cc192538fd3b0516 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 19 Mar 2007 17:01:17 -0700 Subject: [SCTP]: Clean up stale data during association restart During association restart we may have stale data sitting on the ULP queue waiting for ordering or reassembly. This data may cause severe problems if not cleaned up. In particular stale data pending ordering may cause problems with receive window exhaustion if our peer has decided to restart the association. Signed-off-by: Vlad Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/ulpqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index a43c8788b650..ab26ab3adae1 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -59,6 +59,7 @@ struct sctp_ulpq { /* Prototypes. */ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *, struct sctp_association *); +void sctp_ulpq_flush(struct sctp_ulpq *ulpq); void sctp_ulpq_free(struct sctp_ulpq *); /* Add a new DATA chunk for processing. */ -- cgit v1.2.3 From 749bf9215ed1a8b6edb4bb03693c2b62c6b9c2a4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 19 Mar 2007 17:02:30 -0700 Subject: [SCTP]: Reset some transport and association variables on restart If the association has been restarted, we need to reset the transport congestion variables as well as accumulated error counts and CACC variables. If we do not, the association will use the wrong values and may terminate prematurely. This was found with a scenario where the peer restarted the association when lksctp was in the last HB timeout for its association. The restart happened, but the error counts have not been reset and when the timeout occurred, a newly restarted association was terminated due to excessive retransmits. Signed-off-by: Vlad Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/associola.c | 9 +++++++++ net/sctp/transport.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 31a8e88f1a74..f431acf3dcea 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1002,6 +1002,7 @@ void sctp_transport_update_rto(struct sctp_transport *, __u32); void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32); void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); unsigned long sctp_transport_timeout(struct sctp_transport *); +void sctp_transport_reset(struct sctp_transport *); /* This is the structure we use to queue packets as they come into diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 2505cd3b8d29..78d2ddb5ca18 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1046,6 +1046,9 @@ void sctp_assoc_update(struct sctp_association *asoc, trans = list_entry(pos, struct sctp_transport, transports); if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr)) sctp_assoc_del_peer(asoc, &trans->ipaddr); + + if (asoc->state >= SCTP_STATE_ESTABLISHED) + sctp_transport_reset(trans); } /* If the case is A (association restart), use @@ -1069,6 +1072,12 @@ void sctp_assoc_update(struct sctp_association *asoc, */ sctp_ulpq_flush(&asoc->ulpq); + /* reset the overall association error count so + * that the restarted association doesn't get torn + * down on the next retransmission timer. + */ + asoc->overall_error_count = 0; + } else { /* Add any peer addresses from the new association. */ list_for_each(pos, &new->peer.transport_addr_list) { diff --git a/net/sctp/transport.c b/net/sctp/transport.c index a596f5308cb1..c4699f5c409d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -526,3 +526,35 @@ unsigned long sctp_transport_timeout(struct sctp_transport *t) timeout += jiffies; return timeout; } + +/* Reset transport variables to their initial values */ +void sctp_transport_reset(struct sctp_transport *t) +{ + struct sctp_association *asoc = t->asoc; + + /* RFC 2960 (bis), Section 5.2.4 + * All the congestion control parameters (e.g., cwnd, ssthresh) + * related to this peer MUST be reset to their initial values + * (see Section 6.2.1) + */ + t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); + t->ssthresh = SCTP_DEFAULT_MAXWINDOW; + t->rto = asoc->rto_initial; + t->rtt = 0; + t->srtt = 0; + t->rttvar = 0; + + /* Reset these additional varibles so that we have a clean + * slate. + */ + t->partial_bytes_acked = 0; + t->flight_size = 0; + t->error_count = 0; + t->rto_pending = 0; + + /* Initialize the state information for SFR-CACC */ + t->cacc.changeover_active = 0; + t->cacc.cycling_changeover = 0; + t->cacc.next_tsn_at_change = 0; + t->cacc.cacc_saw_newack = 0; +} -- cgit v1.2.3 From e1701c68c1d1aeb3213d7016593ea9a1d4309417 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 24 Mar 2007 12:46:02 -0700 Subject: [NET]: Fix fib_rules compatibility breakage Based upon a patch from Patrick McHardy. The fib_rules netlink attribute policy introduced in 2.6.19 broke userspace compatibilty. When specifying a rule with "from all" or "to all", iproute adds a zero byte long netlink attribute, but the policy requires all addresses to have a size equal to sizeof(struct in_addr)/sizeof(struct in6_addr), resulting in a validation error. Check attribute length of FRA_SRC/FRA_DST in the generic framework by letting the family specific rules implementation provide the length of an address. Report an error if address length is non zero but no address attribute is provided. Fix actual bug by checking address length for non-zero instead of relying on availability of attribute. Signed-off-by: Thomas Graf Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 + net/core/fib_rules.c | 30 ++++++++++++++++++++++++++++++ net/decnet/dn_rules.c | 13 ++++++------- net/ipv4/fib_rules.c | 14 ++++++-------- net/ipv6/fib6_rules.c | 14 +++++--------- 5 files changed, 48 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index bc3c26494c3d..d585ea9fa97d 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -34,6 +34,7 @@ struct fib_rules_ops int family; struct list_head list; int rule_size; + int addr_size; int (*action)(struct fib_rule *, struct flowi *, int, diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3aea4e87d3d7..d011819a8058 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -152,6 +152,28 @@ out: EXPORT_SYMBOL_GPL(fib_rules_lookup); +static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, + struct fib_rules_ops *ops) +{ + int err = -EINVAL; + + if (frh->src_len) + if (tb[FRA_SRC] == NULL || + frh->src_len > (ops->addr_size * 8) || + nla_len(tb[FRA_SRC]) != ops->addr_size) + goto errout; + + if (frh->dst_len) + if (tb[FRA_DST] == NULL || + frh->dst_len > (ops->addr_size * 8) || + nla_len(tb[FRA_DST]) != ops->addr_size) + goto errout; + + err = 0; +errout: + return err; +} + int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -173,6 +195,10 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout; + err = validate_rulemsg(frh, tb, ops); + if (err < 0) + goto errout; + rule = kzalloc(ops->rule_size, GFP_KERNEL); if (rule == NULL) { err = -ENOMEM; @@ -260,6 +286,10 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout; + err = validate_rulemsg(frh, tb, ops); + if (err < 0) + goto errout; + list_for_each_entry(rule, ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index b6c98ac93dc8..5e86dd542302 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -109,8 +109,6 @@ errout: static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { FRA_GENERIC_POLICY, - [FRA_SRC] = { .type = NLA_U16 }, - [FRA_DST] = { .type = NLA_U16 }, }; static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) @@ -133,7 +131,7 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, int err = -EINVAL; struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos) + if (frh->tos) goto errout; if (rule->table == RT_TABLE_UNSPEC) { @@ -150,10 +148,10 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } } - if (tb[FRA_SRC]) + if (frh->src_len) r->src = nla_get_le16(tb[FRA_SRC]); - if (tb[FRA_DST]) + if (frh->dst_len) r->dst = nla_get_le16(tb[FRA_DST]); r->src_len = frh->src_len; @@ -176,10 +174,10 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->dst_len && (r->dst_len != frh->dst_len)) return 0; - if (tb[FRA_SRC] && (r->src != nla_get_le16(tb[FRA_SRC]))) + if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC]))) return 0; - if (tb[FRA_DST] && (r->dst != nla_get_le16(tb[FRA_DST]))) + if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST]))) return 0; return 1; @@ -249,6 +247,7 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) static struct fib_rules_ops dn_fib_rules_ops = { .family = AF_DECnet, .rule_size = sizeof(struct dn_fib_rule), + .addr_size = sizeof(u16), .action = dn_fib_rule_action, .match = dn_fib_rule_match, .configure = dn_fib_rule_configure, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b837c33e0404..c660c074c76c 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -171,8 +171,6 @@ static struct fib_table *fib_empty_table(void) static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { FRA_GENERIC_POLICY, - [FRA_SRC] = { .type = NLA_U32 }, - [FRA_DST] = { .type = NLA_U32 }, [FRA_FLOW] = { .type = NLA_U32 }, }; @@ -183,8 +181,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; - if (frh->src_len > 32 || frh->dst_len > 32 || - (frh->tos & ~IPTOS_TOS_MASK)) + if (frh->tos & ~IPTOS_TOS_MASK) goto errout; if (rule->table == RT_TABLE_UNSPEC) { @@ -201,10 +198,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } } - if (tb[FRA_SRC]) + if (frh->src_len) rule4->src = nla_get_be32(tb[FRA_SRC]); - if (tb[FRA_DST]) + if (frh->dst_len) rule4->dst = nla_get_be32(tb[FRA_DST]); #ifdef CONFIG_NET_CLS_ROUTE @@ -242,10 +239,10 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, return 0; #endif - if (tb[FRA_SRC] && (rule4->src != nla_get_be32(tb[FRA_SRC]))) + if (frh->src_len && (rule4->src != nla_get_be32(tb[FRA_SRC]))) return 0; - if (tb[FRA_DST] && (rule4->dst != nla_get_be32(tb[FRA_DST]))) + if (frh->dst_len && (rule4->dst != nla_get_be32(tb[FRA_DST]))) return 0; return 1; @@ -309,6 +306,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) static struct fib_rules_ops fib4_rules_ops = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), + .addr_size = sizeof(u32), .action = fib4_rule_action, .match = fib4_rule_match, .configure = fib4_rule_configure, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 0862809ffcf7..ea3035b4e3e8 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -131,8 +131,6 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { FRA_GENERIC_POLICY, - [FRA_SRC] = { .len = sizeof(struct in6_addr) }, - [FRA_DST] = { .len = sizeof(struct in6_addr) }, }; static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, @@ -142,9 +140,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, int err = -EINVAL; struct fib6_rule *rule6 = (struct fib6_rule *) rule; - if (frh->src_len > 128 || frh->dst_len > 128) - goto errout; - if (rule->action == FR_ACT_TO_TBL) { if (rule->table == RT6_TABLE_UNSPEC) goto errout; @@ -155,11 +150,11 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } } - if (tb[FRA_SRC]) + if (frh->src_len) nla_memcpy(&rule6->src.addr, tb[FRA_SRC], sizeof(struct in6_addr)); - if (tb[FRA_DST]) + if (frh->dst_len) nla_memcpy(&rule6->dst.addr, tb[FRA_DST], sizeof(struct in6_addr)); @@ -186,11 +181,11 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->tos && (rule6->tclass != frh->tos)) return 0; - if (tb[FRA_SRC] && + if (frh->src_len && nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) return 0; - if (tb[FRA_DST] && + if (frh->dst_len && nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) return 0; @@ -240,6 +235,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) static struct fib_rules_ops fib6_rules_ops = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), + .addr_size = sizeof(struct in6_addr), .action = fib6_rule_action, .match = fib6_rule_match, .configure = fib6_rule_configure, -- cgit v1.2.3 From ecbb416939da77c0d107409976499724baddce7b Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Sat, 24 Mar 2007 12:52:16 -0700 Subject: [NET]: Fix neighbour destructor handling. ->neigh_destructor() is killed (not used), replaced with ->neigh_cleanup(), which is called when neighbor entry goes to dead state. At this point everything is still valid: neigh->dev, neigh->parms etc. The device should guarantee that dead neighbor entries (neigh->dead != 0) do not get private part initialized, otherwise nobody will cleanup it. I think this is enough for ipoib which is the only user of this thing. Initialization private part of neighbor entries happens in ipib start_xmit routine, which is not reached when device is down. But it would be better to add explicit test for neigh->dead in any case. Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 6 +++--- include/net/neighbour.h | 2 +- net/atm/clip.c | 9 --------- net/core/neighbour.c | 14 ++++++++++---- 4 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0741c6d1337c..f2a40ae8e7d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -814,7 +814,7 @@ static void ipoib_set_mcast_list(struct net_device *dev) queue_work(ipoib_workqueue, &priv->restart_task); } -static void ipoib_neigh_destructor(struct neighbour *n) +static void ipoib_neigh_cleanup(struct neighbour *n) { struct ipoib_neigh *neigh; struct ipoib_dev_priv *priv = netdev_priv(n->dev); @@ -822,7 +822,7 @@ static void ipoib_neigh_destructor(struct neighbour *n) struct ipoib_ah *ah = NULL; ipoib_dbg(priv, - "neigh_destructor for %06x " IPOIB_GID_FMT "\n", + "neigh_cleanup for %06x " IPOIB_GID_FMT "\n", IPOIB_QPN(n->ha), IPOIB_GID_RAW_ARG(n->ha + 4)); @@ -874,7 +874,7 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) { - parms->neigh_destructor = ipoib_neigh_destructor; + parms->neigh_cleanup = ipoib_neigh_cleanup; return 0; } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3725b93c52f3..ad7fe1121412 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -36,7 +36,7 @@ struct neigh_parms struct net_device *dev; struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); - void (*neigh_destructor)(struct neighbour *); + void (*neigh_cleanup)(struct neighbour *); struct neigh_table *tbl; void *sysctl_table; diff --git a/net/atm/clip.c b/net/atm/clip.c index ebb5d0ce8b6f..8c3825816085 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -261,14 +261,6 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb) spin_unlock_irqrestore(&PRIV(dev)->xoff_lock, flags); } -static void clip_neigh_destroy(struct neighbour *neigh) -{ - DPRINTK("clip_neigh_destroy (neigh %p)\n", neigh); - if (NEIGH2ENTRY(neigh)->vccs) - printk(KERN_CRIT "clip_neigh_destroy: vccs != NULL !!!\n"); - NEIGH2ENTRY(neigh)->vccs = (void *) NEIGHBOR_DEAD; -} - static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb) { DPRINTK("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb); @@ -342,7 +334,6 @@ static struct neigh_table clip_tbl = { /* parameters are copied from ARP ... */ .parms = { .tbl = &clip_tbl, - .neigh_destructor = clip_neigh_destroy, .base_reachable_time = 30 * HZ, .retrans_time = 1 * HZ, .gc_staletime = 60 * HZ, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3183142c6044..cfc60019cf92 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -140,6 +140,8 @@ static int neigh_forced_gc(struct neigh_table *tbl) n->dead = 1; shrunk = 1; write_unlock(&n->lock); + if (n->parms->neigh_cleanup) + n->parms->neigh_cleanup(n); neigh_release(n); continue; } @@ -211,6 +213,8 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) NEIGH_PRINTK2("neigh %p is stray.\n", n); } write_unlock(&n->lock); + if (n->parms->neigh_cleanup) + n->parms->neigh_cleanup(n); neigh_release(n); } } @@ -582,9 +586,6 @@ void neigh_destroy(struct neighbour *neigh) kfree(hh); } - if (neigh->parms->neigh_destructor) - (neigh->parms->neigh_destructor)(neigh); - skb_queue_purge(&neigh->arp_queue); dev_put(neigh->dev); @@ -675,6 +676,8 @@ static void neigh_periodic_timer(unsigned long arg) *np = n->next; n->dead = 1; write_unlock(&n->lock); + if (n->parms->neigh_cleanup) + n->parms->neigh_cleanup(n); neigh_release(n); continue; } @@ -2088,8 +2091,11 @@ void __neigh_for_each_release(struct neigh_table *tbl, } else np = &n->next; write_unlock(&n->lock); - if (release) + if (release) { + if (n->parms->neigh_cleanup) + n->parms->neigh_cleanup(n); neigh_release(n); + } } } } -- cgit v1.2.3 From f11e6659ce9058928d73ff440f9b40a818d628ab Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 24 Mar 2007 20:36:25 -0700 Subject: [IPV6]: Fix routing round-robin locking. As per RFC2461, section 6.3.6, item #2, when no routers on the matching list are known to be reachable or probably reachable we do round robin on those available routes so that we make sure to probe as many of them as possible to detect when one becomes reachable faster. Each routing table has a rwlock protecting the tree and the linked list of routes at each leaf. The round robin code executes during lookup and thus with the rwlock taken as a reader. A small local spinlock tries to provide protection but this does not work at all for two reasons: 1) The round-robin list manipulation, as coded, goes like this (with read lock held): walk routes finding head and tail spin_lock(); rotate list using head and tail spin_unlock(); While one thread is rotating the list, another thread can end up with stale values of head and tail and then proceed to corrupt the list when it gets the lock. This ends up causing the OOPS in fib6_add() later onthat many people have been hitting. 2) All the other code paths that run with the rwlock held as a reader do not expect the list to change on them, they expect it to remain completely fixed while they hold the lock in that way. So, simply stated, it is impossible to implement this correctly using a manipulation of the list without violating the rwlock locking semantics. Reimplement using a per-fib6_node round-robin pointer. This way we don't need to manipulate the list at all, and since the round-robin pointer can only ever point to real existing entries we don't need to perform any locking on the changing of the round-robin pointer itself. We only need to reset the round-robin pointer to NULL when the entry it is pointing to is removed. The idea is from Thomas Graf and it is very similar to how this was implemented before the advanced router selection code when in. Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 + net/ipv6/ip6_fib.c | 8 +++++ net/ipv6/route.c | 97 +++++++++++++++++++++++++++++++-------------------- 3 files changed, 68 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 9eda572a2a65..cf355a3c2ad5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -58,6 +58,7 @@ struct fib6_node __u16 fn_bit; /* bit key */ __u16 fn_flags; __u32 fn_sernum; + struct rt6_info *rr_ptr; }; #ifndef CONFIG_IPV6_SUBTREES diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f4d7be77eb0f..268f476ef3db 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -658,6 +658,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &iter->u.dst.rt6_next; } + /* Reset round-robin state, if necessary */ + if (ins == &fn->leaf) + fn->rr_ptr = NULL; + /* * insert node */ @@ -1109,6 +1113,10 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, rt6_stats.fib_rt_entries--; rt6_stats.fib_discarded_routes++; + /* Reset round-robin state, if necessary */ + if (fn->rr_ptr == rt) + fn->rr_ptr = NULL; + /* Adjust walkers */ read_lock(&fib6_walker_lock); FOR_WALKERS(w) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a6b3117df546..3931b33b25e8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -363,55 +363,76 @@ static int rt6_score_route(struct rt6_info *rt, int oif, return m; } -static struct rt6_info *rt6_select(struct rt6_info **head, int oif, - int strict) +static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, + int *mpri, struct rt6_info *match) { - struct rt6_info *match = NULL, *last = NULL; - struct rt6_info *rt, *rt0 = *head; - u32 metric; + int m; + + if (rt6_check_expired(rt)) + goto out; + + m = rt6_score_route(rt, oif, strict); + if (m < 0) + goto out; + + if (m > *mpri) { + if (strict & RT6_LOOKUP_F_REACHABLE) + rt6_probe(match); + *mpri = m; + match = rt; + } else if (strict & RT6_LOOKUP_F_REACHABLE) { + rt6_probe(rt); + } + +out: + return match; +} + +static struct rt6_info *find_rr_leaf(struct fib6_node *fn, + struct rt6_info *rr_head, + u32 metric, int oif, int strict) +{ + struct rt6_info *rt, *match; int mpri = -1; - RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n", - __FUNCTION__, head, head ? *head : NULL, oif); + match = NULL; + for (rt = rr_head; rt && rt->rt6i_metric == metric; + rt = rt->u.dst.rt6_next) + match = find_match(rt, oif, strict, &mpri, match); + for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; + rt = rt->u.dst.rt6_next) + match = find_match(rt, oif, strict, &mpri, match); - for (rt = rt0, metric = rt0->rt6i_metric; - rt && rt->rt6i_metric == metric && (!last || rt != rt0); - rt = rt->u.dst.rt6_next) { - int m; + return match; +} - if (rt6_check_expired(rt)) - continue; +static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) +{ + struct rt6_info *match, *rt0; - last = rt; + RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", + __FUNCTION__, fn->leaf, oif); - m = rt6_score_route(rt, oif, strict); - if (m < 0) - continue; + rt0 = fn->rr_ptr; + if (!rt0) + fn->rr_ptr = rt0 = fn->leaf; - if (m > mpri) { - if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(match); - match = rt; - mpri = m; - } else if (strict & RT6_LOOKUP_F_REACHABLE) { - rt6_probe(rt); - } - } + match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); if (!match && - (strict & RT6_LOOKUP_F_REACHABLE) && - last && last != rt0) { + (strict & RT6_LOOKUP_F_REACHABLE)) { + struct rt6_info *next = rt0->u.dst.rt6_next; + /* no entries matched; do round-robin */ - static DEFINE_SPINLOCK(lock); - spin_lock(&lock); - *head = rt0->u.dst.rt6_next; - rt0->u.dst.rt6_next = last->u.dst.rt6_next; - last->u.dst.rt6_next = rt0; - spin_unlock(&lock); + if (!next || next->rt6i_metric != rt0->rt6i_metric) + next = fn->leaf; + + if (next != rt0) + fn->rr_ptr = next; } - RT6_TRACE("%s() => %p, score=%d\n", - __FUNCTION__, match, mpri); + RT6_TRACE("%s() => %p\n", + __FUNCTION__, match); return (match ? match : &ip6_null_entry); } @@ -657,7 +678,7 @@ restart_2: fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); + rt = rt6_select(fn, fl->iif, strict | reachable); BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -752,7 +773,7 @@ restart_2: fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); + rt = rt6_select(fn, fl->oif, strict | reachable); BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) -- cgit v1.2.3 From c2805fbb8630abb95d94ce7adc3f97976f7e0367 Mon Sep 17 00:00:00 2001 From: Jean Tourrilhes Date: Fri, 23 Mar 2007 00:31:16 +0000 Subject: [PATCH] WE-22 : prevent information leak on 64 bit Johannes Berg discovered that kernel space was leaking to userspace on 64 bit platform. He made a first patch to fix that. This is an improved version of his patch. Signed-off-by: Jean Tourrilhes Signed-off-by: John W. Linville --- include/linux/wireless.h | 21 +++++++++++-- include/net/iw_handler.h | 30 ++++++++++++------ net/core/rtnetlink.c | 3 +- net/core/wireless.c | 82 +++++++++++++++++++++++++++++------------------- 4 files changed, 91 insertions(+), 45 deletions(-) (limited to 'include/net') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index 447c52beb691..48759b2f57d7 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -1,10 +1,10 @@ /* * This file define a set of standard wireless extensions * - * Version : 21 14.3.06 + * Version : 22 16.3.07 * * Authors : Jean Tourrilhes - HPL - - * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved. + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. */ #ifndef _LINUX_WIRELESS_H @@ -85,7 +85,7 @@ * (there is some stuff that will be added in the future...) * I just plan to increment with each new version. */ -#define WIRELESS_EXT 21 +#define WIRELESS_EXT 22 /* * Changes : @@ -221,6 +221,10 @@ * - Add IW_RETRY_SHORT/IW_RETRY_LONG retry modifiers * - Power/Retry relative values no longer * 100000 * - Add explicit flag to tell stats are in 802.11k RCPI : IW_QUAL_RCPI + * + * V21 to V22 + * ---------- + * - Prevent leaking of kernel space in stream on 64 bits. */ /**************************** CONSTANTS ****************************/ @@ -1085,4 +1089,15 @@ struct iw_event #define IW_EV_POINT_LEN (IW_EV_LCP_LEN + sizeof(struct iw_point) - \ IW_EV_POINT_OFF) +/* Size of the Event prefix when packed in stream */ +#define IW_EV_LCP_PK_LEN (4) +/* Size of the various events when packed in stream */ +#define IW_EV_CHAR_PK_LEN (IW_EV_LCP_PK_LEN + IFNAMSIZ) +#define IW_EV_UINT_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(__u32)) +#define IW_EV_FREQ_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct iw_freq)) +#define IW_EV_PARAM_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct iw_param)) +#define IW_EV_ADDR_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct sockaddr)) +#define IW_EV_QUAL_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct iw_quality)) +#define IW_EV_POINT_PK_LEN (IW_EV_LCP_LEN + 4) + #endif /* _LINUX_WIRELESS_H */ diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 10559e937d27..8a830188354d 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -1,10 +1,10 @@ /* * This file define the new driver API for Wireless Extensions * - * Version : 7 18.3.05 + * Version : 8 16.3.07 * * Authors : Jean Tourrilhes - HPL - - * Copyright (c) 2001-2006 Jean Tourrilhes, All Rights Reserved. + * Copyright (c) 2001-2007 Jean Tourrilhes, All Rights Reserved. */ #ifndef _IW_HANDLER_H @@ -207,7 +207,7 @@ * will be needed... * I just plan to increment with each new version. */ -#define IW_HANDLER_VERSION 7 +#define IW_HANDLER_VERSION 8 /* * Changes : @@ -239,6 +239,10 @@ * - Remove (struct iw_point *)->pointer from events and streams * - Remove spy_offset from struct iw_handler_def * - Add "check" version of event macros for ieee802.11 stack + * + * V7 to V8 + * ---------- + * - Prevent leaking of kernel space in stream on 64 bits. */ /**************************** CONSTANTS ****************************/ @@ -500,7 +504,11 @@ iwe_stream_add_event(char * stream, /* Stream of events */ /* Check if it's possible */ if(likely((stream + event_len) < ends)) { iwe->len = event_len; - memcpy(stream, (char *) iwe, event_len); + /* Beware of alignement issues on 64 bits */ + memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); + memcpy(stream + IW_EV_LCP_LEN, + ((char *) iwe) + IW_EV_LCP_LEN, + event_len - IW_EV_LCP_LEN); stream += event_len; } return stream; @@ -521,10 +529,10 @@ iwe_stream_add_point(char * stream, /* Stream of events */ /* Check if it's possible */ if(likely((stream + event_len) < ends)) { iwe->len = event_len; - memcpy(stream, (char *) iwe, IW_EV_LCP_LEN); + memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); memcpy(stream + IW_EV_LCP_LEN, ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF, - IW_EV_POINT_LEN - IW_EV_LCP_LEN); + IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length); stream += event_len; } @@ -574,7 +582,11 @@ iwe_stream_check_add_event(char * stream, /* Stream of events */ /* Check if it's possible, set error if not */ if(likely((stream + event_len) < ends)) { iwe->len = event_len; - memcpy(stream, (char *) iwe, event_len); + /* Beware of alignement issues on 64 bits */ + memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); + memcpy(stream + IW_EV_LCP_LEN, + ((char *) iwe) + IW_EV_LCP_LEN, + event_len - IW_EV_LCP_LEN); stream += event_len; } else *perr = -E2BIG; @@ -598,10 +610,10 @@ iwe_stream_check_add_point(char * stream, /* Stream of events */ /* Check if it's possible */ if(likely((stream + event_len) < ends)) { iwe->len = event_len; - memcpy(stream, (char *) iwe, IW_EV_LCP_LEN); + memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); memcpy(stream + IW_EV_LCP_LEN, ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF, - IW_EV_POINT_LEN - IW_EV_LCP_LEN); + IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length); stream += event_len; } else diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6055074c4b81..33ea8eac7fe0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -621,7 +621,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout; - iw += IW_EV_POINT_OFF; + /* Payload is at an offset in buffer */ + iw = iw_buf + IW_EV_POINT_OFF; } #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ diff --git a/net/core/wireless.c b/net/core/wireless.c index 9936ab11e6e0..b07fe270a508 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -2,7 +2,7 @@ * This file implement the Wireless Extensions APIs. * * Authors : Jean Tourrilhes - HPL - - * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved. + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * * (As all part of the Linux kernel, this file is GPL) */ @@ -76,6 +76,9 @@ * o Change length in ESSID and NICK to strlen() instead of strlen()+1 * o Make standard_ioctl_num and standard_event_num unsigned * o Remove (struct net_device *)->get_wireless_stats() + * + * v10 - 16.3.07 - Jean II + * o Prevent leaking of kernel space in stream on 64 bits. */ /***************************** INCLUDES *****************************/ @@ -427,6 +430,21 @@ static const int event_type_size[] = { IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ }; +/* Size (in bytes) of various events, as packed */ +static const int event_type_pk_size[] = { + IW_EV_LCP_PK_LEN, /* IW_HEADER_TYPE_NULL */ + 0, + IW_EV_CHAR_PK_LEN, /* IW_HEADER_TYPE_CHAR */ + 0, + IW_EV_UINT_PK_LEN, /* IW_HEADER_TYPE_UINT */ + IW_EV_FREQ_PK_LEN, /* IW_HEADER_TYPE_FREQ */ + IW_EV_ADDR_PK_LEN, /* IW_HEADER_TYPE_ADDR */ + 0, + IW_EV_POINT_PK_LEN, /* Without variable payload */ + IW_EV_PARAM_PK_LEN, /* IW_HEADER_TYPE_PARAM */ + IW_EV_QUAL_PK_LEN, /* IW_HEADER_TYPE_QUAL */ +}; + /************************ COMMON SUBROUTINES ************************/ /* * Stuff that may be used in various place or doesn't fit in one @@ -1217,7 +1235,7 @@ static int rtnetlink_standard_get(struct net_device * dev, memcpy(buffer + IW_EV_POINT_OFF, request, request_len); /* Use our own copy of wrqu */ wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF - + IW_EV_LCP_LEN); + + IW_EV_LCP_PK_LEN); /* No extra arguments. Trivial to handle */ ret = handler(dev, &info, wrqu, NULL); @@ -1229,8 +1247,8 @@ static int rtnetlink_standard_get(struct net_device * dev, /* Get a temp copy of wrqu (skip pointer) */ memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF, - ((char *) request) + IW_EV_LCP_LEN, - IW_EV_POINT_LEN - IW_EV_LCP_LEN); + ((char *) request) + IW_EV_LCP_PK_LEN, + IW_EV_POINT_LEN - IW_EV_LCP_PK_LEN); /* Calculate space needed by arguments. Always allocate * for max space. Easier, and won't last long... */ @@ -1240,7 +1258,7 @@ static int rtnetlink_standard_get(struct net_device * dev, (wrqu_point.data.length > descr->max_tokens)) extra_size = (wrqu_point.data.length * descr->token_size); - buffer_size = extra_size + IW_EV_POINT_LEN + IW_EV_POINT_OFF; + buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF; #ifdef WE_RTNETLINK_DEBUG printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n", dev->name, extra_size, buffer_size); @@ -1254,15 +1272,15 @@ static int rtnetlink_standard_get(struct net_device * dev, /* Put wrqu in the right place (just before extra). * Leave space for IWE header and dummy pointer... - * Note that IW_EV_LCP_LEN==4 bytes, so it's still aligned... + * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned. */ - memcpy(buffer + IW_EV_LCP_LEN + IW_EV_POINT_OFF, + memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF, ((char *) &wrqu_point) + IW_EV_POINT_OFF, - IW_EV_POINT_LEN - IW_EV_LCP_LEN); - wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_LEN); + IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); + wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN); /* Extra comes logically after that. Offset +12 bytes. */ - extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_LEN; + extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN; /* Call the handler */ ret = handler(dev, &info, wrqu, extra); @@ -1270,11 +1288,11 @@ static int rtnetlink_standard_get(struct net_device * dev, /* Calculate real returned length */ extra_size = (wrqu->data.length * descr->token_size); /* Re-adjust reply size */ - request->len = extra_size + IW_EV_POINT_LEN; + request->len = extra_size + IW_EV_POINT_PK_LEN; /* Put the iwe header where it should, i.e. scrap the * dummy pointer. */ - memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_LEN); + memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN); #ifdef WE_RTNETLINK_DEBUG printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size); @@ -1331,10 +1349,10 @@ static inline int rtnetlink_standard_set(struct net_device * dev, #endif /* WE_RTNETLINK_DEBUG */ /* Extract fixed header from request. This is properly aligned. */ - wrqu = &request->u; + wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN); /* Check if wrqu is complete */ - hdr_len = event_type_size[descr->header_type]; + hdr_len = event_type_pk_size[descr->header_type]; if(request_len < hdr_len) { #ifdef WE_RTNETLINK_DEBUG printk(KERN_DEBUG @@ -1359,7 +1377,7 @@ static inline int rtnetlink_standard_set(struct net_device * dev, /* Put wrqu in the right place (skip pointer) */ memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF, - wrqu, IW_EV_POINT_LEN - IW_EV_LCP_LEN); + wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); /* Don't forget about the event code... */ wrqu = &wrqu_point; @@ -1483,7 +1501,7 @@ static inline int rtnetlink_private_get(struct net_device * dev, hdr_len = extra_size; extra_size = 0; } else { - hdr_len = IW_EV_POINT_LEN; + hdr_len = IW_EV_POINT_PK_LEN; } /* Check if wrqu is complete */ @@ -1514,7 +1532,7 @@ static inline int rtnetlink_private_get(struct net_device * dev, memcpy(buffer + IW_EV_POINT_OFF, request, request_len); /* Use our own copy of wrqu */ wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF - + IW_EV_LCP_LEN); + + IW_EV_LCP_PK_LEN); /* No extra arguments. Trivial to handle */ ret = handler(dev, &info, wrqu, (char *) wrqu); @@ -1523,7 +1541,7 @@ static inline int rtnetlink_private_get(struct net_device * dev, char * extra; /* Buffer for full reply */ - buffer_size = extra_size + IW_EV_POINT_LEN + IW_EV_POINT_OFF; + buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF; #ifdef WE_RTNETLINK_DEBUG printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n", @@ -1538,15 +1556,15 @@ static inline int rtnetlink_private_get(struct net_device * dev, /* Put wrqu in the right place (just before extra). * Leave space for IWE header and dummy pointer... - * Note that IW_EV_LCP_LEN==4 bytes, so it's still aligned... + * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned. */ - memcpy(buffer + IW_EV_LCP_LEN + IW_EV_POINT_OFF, - ((char *) request) + IW_EV_LCP_LEN, - IW_EV_POINT_LEN - IW_EV_LCP_LEN); - wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_LEN); + memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF, + ((char *) request) + IW_EV_LCP_PK_LEN, + IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); + wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN); /* Extra comes logically after that. Offset +12 bytes. */ - extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_LEN; + extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN; /* Call the handler */ ret = handler(dev, &info, wrqu, extra); @@ -1556,11 +1574,11 @@ static inline int rtnetlink_private_get(struct net_device * dev, if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) extra_size = adjust_priv_size(descr->get_args, wrqu); /* Re-adjust reply size */ - request->len = extra_size + IW_EV_POINT_LEN; + request->len = extra_size + IW_EV_POINT_PK_LEN; /* Put the iwe header where it should, i.e. scrap the * dummy pointer. */ - memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_LEN); + memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN); #ifdef WE_RTNETLINK_DEBUG printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size); @@ -1641,14 +1659,14 @@ static inline int rtnetlink_private_set(struct net_device * dev, /* Does it fits in wrqu ? */ if((descr->set_args & IW_PRIV_SIZE_FIXED) && (extra_size <= IFNAMSIZ)) { - hdr_len = IW_EV_LCP_LEN + extra_size; + hdr_len = IW_EV_LCP_PK_LEN + extra_size; extra_size = 0; } else { - hdr_len = IW_EV_POINT_LEN; + hdr_len = IW_EV_POINT_PK_LEN; } /* Extract fixed header from request. This is properly aligned. */ - wrqu = &request->u; + wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN); /* Check if wrqu is complete */ if(request_len < hdr_len) { @@ -1675,7 +1693,7 @@ static inline int rtnetlink_private_set(struct net_device * dev, /* Put wrqu in the right place (skip pointer) */ memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF, - wrqu, IW_EV_POINT_LEN - IW_EV_LCP_LEN); + wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); /* Does it fits within bounds ? */ if(wrqu_point.data.length > (descr->set_args & @@ -1738,7 +1756,7 @@ int wireless_rtnetlink_get(struct net_device * dev, iw_handler handler; /* Check length */ - if(len < IW_EV_LCP_LEN) { + if(len < IW_EV_LCP_PK_LEN) { printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n", dev->name, len); return -EINVAL; @@ -1822,7 +1840,7 @@ int wireless_rtnetlink_set(struct net_device * dev, iw_handler handler; /* Check length */ - if(len < IW_EV_LCP_LEN) { + if(len < IW_EV_LCP_PK_LEN) { printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n", dev->name, len); return -EINVAL; -- cgit v1.2.3 From c01003c20563d1e75ec9828d21743919d2b43977 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 29 Mar 2007 11:46:52 -0700 Subject: [IFB]: Fix crash on input device removal The input_device pointer is not refcounted, which means the device may disappear while packets are queued, causing a crash when ifb passes packets with a stale skb->dev pointer to netif_rx(). Fix by storing the interface index instead and do a lookup where neccessary. Signed-off-by: Patrick McHardy Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- drivers/net/ifb.c | 35 +++++++++++++---------------------- include/linux/skbuff.h | 5 +++-- include/net/pkt_cls.h | 7 +++++-- net/core/dev.c | 8 ++++---- net/core/skbuff.c | 2 +- net/sched/act_mirred.c | 2 +- 6 files changed, 27 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index ca2b21f9d444..07b4c0d7a75c 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -96,17 +96,24 @@ static void ri_tasklet(unsigned long dev) skb->tc_verd = SET_TC_NCLS(skb->tc_verd); stats->tx_packets++; stats->tx_bytes +=skb->len; + + skb->dev = __dev_get_by_index(skb->iif); + if (!skb->dev) { + dev_kfree_skb(skb); + stats->tx_dropped++; + break; + } + skb->iif = _dev->ifindex; + if (from & AT_EGRESS) { dp->st_rx_frm_egr++; dev_queue_xmit(skb); } else if (from & AT_INGRESS) { - dp->st_rx_frm_ing++; + skb_pull(skb, skb->dev->hard_header_len); netif_rx(skb); - } else { - dev_kfree_skb(skb); - stats->tx_dropped++; - } + } else + BUG(); } if (netif_tx_trylock(_dev)) { @@ -157,26 +164,10 @@ static int ifb_xmit(struct sk_buff *skb, struct net_device *dev) stats->rx_packets++; stats->rx_bytes+=skb->len; - if (!from || !skb->input_dev) { -dropped: + if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->iif) { dev_kfree_skb(skb); stats->rx_dropped++; return ret; - } else { - /* - * note we could be going - * ingress -> egress or - * egress -> ingress - */ - skb->dev = skb->input_dev; - skb->input_dev = dev; - if (from & AT_INGRESS) { - skb_pull(skb, skb->dev->hard_header_len); - } else { - if (!(from & AT_EGRESS)) { - goto dropped; - } - } } if (skb_queue_len(&dp->rq) >= dev->tx_queue_len) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4ff3940210d8..82f43ad478c7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -188,7 +188,7 @@ enum { * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by - * @input_dev: Device we arrived on + * @iif: ifindex of device we arrived on * @h: Transport layer header * @nh: Network layer header * @mac: Link layer header @@ -235,7 +235,8 @@ struct sk_buff { struct sock *sk; struct skb_timeval tstamp; struct net_device *dev; - struct net_device *input_dev; + int iif; + /* 4 byte hole on 64 bit*/ union { struct tcphdr *th; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index b902d24a3256..02647fe3d74b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -352,10 +352,13 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv) static inline int tcf_match_indev(struct sk_buff *skb, char *indev) { + struct net_device *dev; + if (indev[0]) { - if (!skb->input_dev) + if (!skb->iif) return 0; - if (strcmp(indev, skb->input_dev->name)) + dev = __dev_get_by_index(skb->iif); + if (!dev || strcmp(indev, dev->name)) return 0; } diff --git a/net/core/dev.c b/net/core/dev.c index 5984b55311a1..d44b8f1964fa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1741,8 +1741,8 @@ static int ing_filter(struct sk_buff *skb) if (dev->qdisc_ingress) { __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n", - skb->input_dev->name, skb->dev->name); + printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", + skb->iif, skb->dev->ifindex); return TC_ACT_SHOT; } @@ -1775,8 +1775,8 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->tstamp.off_sec) net_timestamp(skb); - if (!skb->input_dev) - skb->input_dev = skb->dev; + if (!skb->iif) + skb->iif = skb->dev->ifindex; orig_dev = skb_bond(skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 702fa8f08747..87573ae35b02 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -496,7 +496,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->tc_verd = SET_TC_VERD(skb->tc_verd,0); n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); n->tc_verd = CLR_TC_MUNGED(n->tc_verd); - C(input_dev); + C(iif); #endif skb_copy_secmark(n, skb); #endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 68f26cb278f9..3e93683e9ab3 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -198,7 +198,7 @@ bad_mirred: skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; - skb2->input_dev = skb->dev; + skb2->iif = skb->dev->ifindex; dev_queue_xmit(skb2); spin_unlock(&m->tcf_lock); return m->tcf_action; -- cgit v1.2.3