From 2588fe1d782f1686847493ad643157d5d10bf602 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:44:34 -0700 Subject: [INET]: Consolidate xxx_frag_intern This routine checks for the existence of a given entry in the hash table and inserts the new one if needed. The ->equal callback is used to compare two frag_queue-s together, but this one is temporary and will be removed later. The netfilter code and the ipv6 one use the same routine to compare frags. The inet_frag_intern() always returns non-NULL pointer, so convert the inet_frag_queue into protocol specific one (with the container_of) without any checks. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 4 ++++ include/net/ipv6.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 911c2cd02941..133e187fbc98 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -41,6 +41,8 @@ struct inet_frags { unsigned int (*hashfn)(struct inet_frag_queue *); void (*destructor)(struct inet_frag_queue *); void (*skb_free)(struct sk_buff *); + int (*equal)(struct inet_frag_queue *q1, + struct inet_frag_queue *q2); }; void inet_frags_init(struct inet_frags *); @@ -50,6 +52,8 @@ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); int inet_frag_evictor(struct inet_frags *f); +struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *q, + struct inet_frags *f, unsigned int hash); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cc796cbc1b26..ff1269713462 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -377,6 +377,9 @@ static inline int ipv6_prefix_equal(const struct in6_addr *a1, prefixlen); } +struct inet_frag_queue; +int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2); + static inline int ipv6_addr_any(const struct in6_addr *a) { return ((a->s6_addr32[0] | a->s6_addr32[1] | -- cgit v1.2.3 From e521db9d790aaa60ae8920e21cb7faedc280fc36 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:45:23 -0700 Subject: [INET]: Consolidate xxx_frag_alloc() Just perform the kzalloc() allocation and setup common fields in the inet_frag_queue(). Then return the result to the caller to initialize the rest. The inet_frag_alloc() may return NULL, so check the return value before doing the container_of(). This looks ugly, but the xxx_frag_alloc() will be removed soon. The xxx_expire() timer callbacks are patches, because the argument is now the inet_frag_queue, not the protocol specific queue. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 ++ net/ipv4/inet_fragment.c | 17 +++++++++++++++++ net/ipv4/ip_fragment.c | 20 +++++++------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 19 ++++++++----------- net/ipv6/reassembly.c | 19 +++++++------------ 5 files changed, 41 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 133e187fbc98..412b8582a616 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,6 +43,7 @@ struct inet_frags { void (*skb_free)(struct sk_buff *); int (*equal)(struct inet_frag_queue *q1, struct inet_frag_queue *q2); + void (*frag_expire)(unsigned long data); }; void inet_frags_init(struct inet_frags *); @@ -54,6 +55,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, int inet_frag_evictor(struct inet_frags *f); struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *q, struct inet_frags *f, unsigned int hash); +struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 15054eb3d4b9..57e15fa307dc 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -209,3 +209,20 @@ struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, return qp; } EXPORT_SYMBOL(inet_frag_intern); + +struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f) +{ + struct inet_frag_queue *q; + + q = kzalloc(f->qsize, GFP_ATOMIC); + if (q == NULL) + return NULL; + + atomic_add(f->qsize, &f->mem); + setup_timer(&q->timer, f->frag_expire, (unsigned long)q); + spin_lock_init(&q->lock); + atomic_set(&q->refcnt, 1); + + return q; +} +EXPORT_SYMBOL(inet_frag_alloc); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 4b1bbbee22c5..fc0d530df522 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -158,12 +158,10 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q) static __inline__ struct ipq *frag_alloc_queue(void) { - struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC); + struct inet_frag_queue *q; - if (!qp) - return NULL; - atomic_add(sizeof(struct ipq), &ip4_frags.mem); - return qp; + q = inet_frag_alloc(&ip4_frags); + return q ? container_of(q, struct ipq, q) : NULL; } @@ -199,7 +197,9 @@ static void ip_evictor(void) */ static void ip_expire(unsigned long arg) { - struct ipq *qp = (struct ipq *) arg; + struct ipq *qp; + + qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); spin_lock(&qp->q.lock); @@ -249,13 +249,6 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h) qp->user = user; qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; - /* Initialize a timer for this entry. */ - init_timer(&qp->q.timer); - qp->q.timer.data = (unsigned long) qp; /* pointer to queue */ - qp->q.timer.function = ip_expire; /* expire function */ - spin_lock_init(&qp->q.lock); - atomic_set(&qp->q.refcnt, 1); - return ip_frag_intern(qp, h); out_nomem: @@ -653,6 +646,7 @@ void __init ipfrag_init(void) ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.equal = ip4_frag_equal; + ip4_frags.frag_expire = ip_expire; inet_frags_init(&ip4_frags); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d7dc444ec48f..3f8c16b3301e 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -137,13 +137,10 @@ static void nf_frag_free(struct inet_frag_queue *q) static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) { - struct nf_ct_frag6_queue *fq; + struct inet_frag_queue *q; - fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); - if (fq == NULL) - return NULL; - atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem); - return fq; + q = inet_frag_alloc(&nf_frags); + return q ? container_of(q, struct nf_ct_frag6_queue, q) : NULL; } /* Destruction primitives. */ @@ -168,7 +165,10 @@ static void nf_ct_frag6_evictor(void) static void nf_ct_frag6_expire(unsigned long data) { - struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; + struct nf_ct_frag6_queue *fq; + + fq = container_of((struct inet_frag_queue *)data, + struct nf_ct_frag6_queue, q); spin_lock(&fq->q.lock); @@ -208,10 +208,6 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq); - spin_lock_init(&fq->q.lock); - atomic_set(&fq->q.refcnt, 1); - return nf_ct_frag6_intern(hash, fq); oom: @@ -726,6 +722,7 @@ int nf_ct_frag6_init(void) nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.equal = ip6_frag_equal; + nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 73ea204eaa6f..21913c78f053 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -171,12 +171,10 @@ static void ip6_frag_free(struct inet_frag_queue *fq) static inline struct frag_queue *frag_alloc_queue(void) { - struct frag_queue *fq = kzalloc(sizeof(struct frag_queue), GFP_ATOMIC); + struct inet_frag_queue *q; - if(!fq) - return NULL; - atomic_add(sizeof(struct frag_queue), &ip6_frags.mem); - return fq; + q = inet_frag_alloc(&ip6_frags); + return q ? container_of(q, struct frag_queue, q) : NULL; } /* Destruction primitives. */ @@ -205,9 +203,11 @@ static void ip6_evictor(struct inet6_dev *idev) static void ip6_frag_expire(unsigned long data) { - struct frag_queue *fq = (struct frag_queue *) data; + struct frag_queue *fq; struct net_device *dev = NULL; + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + spin_lock(&fq->q.lock); if (fq->q.last_in & COMPLETE) @@ -268,12 +268,6 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - init_timer(&fq->q.timer); - fq->q.timer.function = ip6_frag_expire; - fq->q.timer.data = (long) fq; - spin_lock_init(&fq->q.lock); - atomic_set(&fq->q.refcnt, 1); - return ip6_frag_intern(fq, hash); oom: @@ -685,5 +679,6 @@ void __init ipv6_frag_init(void) ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.equal = ip6_frag_equal; + ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); } -- cgit v1.2.3 From c6fda282294da882f8d8cc4c513940277dd380f5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:46:47 -0700 Subject: [INET]: Consolidate xxx_frag_create() This one uses the xxx_frag_intern() and xxx_frag_alloc() routines, which are already consolidated, so remove them from protocol code (as promised). The ->constructor callback is used to init the rest of the frag queue and it is the same for netfilter and ipv6. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 7 +++-- include/net/ipv6.h | 8 +++++ net/ipv4/inet_fragment.c | 20 +++++++++--- net/ipv4/ip_fragment.c | 54 +++++++++++++++++---------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 39 ++++++++---------------- net/ipv6/reassembly.c | 45 ++++++++++++--------------- 6 files changed, 88 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 412b8582a616..e33072b9fd91 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -39,6 +39,8 @@ struct inet_frags { struct inet_frags_ctl *ctl; unsigned int (*hashfn)(struct inet_frag_queue *); + void (*constructor)(struct inet_frag_queue *q, + void *arg); void (*destructor)(struct inet_frag_queue *); void (*skb_free)(struct sk_buff *); int (*equal)(struct inet_frag_queue *q1, @@ -53,9 +55,8 @@ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); int inet_frag_evictor(struct inet_frags *f); -struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *q, - struct inet_frags *f, unsigned int hash); -struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f); +struct inet_frag_queue *inet_frag_create(struct inet_frags *f, + void *create_arg, unsigned int hash); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ff1269713462..9dc99bf5cf0e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -380,6 +380,14 @@ static inline int ipv6_prefix_equal(const struct in6_addr *a1, struct inet_frag_queue; int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2); +struct ip6_create_arg { + __be32 id; + struct in6_addr *src; + struct in6_addr *dst; +}; + +void ip6_frag_init(struct inet_frag_queue *q, void *a); + static inline int ipv6_addr_any(const struct in6_addr *a) { return ((a->s6_addr32[0] | a->s6_addr32[1] | diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 57e15fa307dc..b531f803cda4 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -173,7 +173,7 @@ int inet_frag_evictor(struct inet_frags *f) } EXPORT_SYMBOL(inet_frag_evictor); -struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, +static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, struct inet_frags *f, unsigned int hash) { struct inet_frag_queue *qp; @@ -208,9 +208,8 @@ struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, write_unlock(&f->lock); return qp; } -EXPORT_SYMBOL(inet_frag_intern); -struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f) +static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -218,6 +217,7 @@ struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f) if (q == NULL) return NULL; + f->constructor(q, arg); atomic_add(f->qsize, &f->mem); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); @@ -225,4 +225,16 @@ struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f) return q; } -EXPORT_SYMBOL(inet_frag_alloc); + +struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, + unsigned int hash) +{ + struct inet_frag_queue *q; + + q = inet_frag_alloc(f, arg); + if (q == NULL) + return NULL; + + return inet_frag_intern(q, f, hash); +} +EXPORT_SYMBOL(inet_frag_create); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fc0d530df522..0d6cff1de5a3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -108,6 +108,11 @@ int ip_frag_mem(void) static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev); +struct ip4_create_arg { + struct iphdr *iph; + u32 user; +}; + static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, @@ -146,6 +151,20 @@ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) kfree_skb(skb); } +static void ip4_frag_init(struct inet_frag_queue *q, void *a) +{ + struct ipq *qp = container_of(q, struct ipq, q); + struct ip4_create_arg *arg = a; + + qp->protocol = arg->iph->protocol; + qp->id = arg->iph->id; + qp->saddr = arg->iph->saddr; + qp->daddr = arg->iph->daddr; + qp->user = arg->user; + qp->peer = sysctl_ipfrag_max_dist ? + inet_getpeer(arg->iph->saddr, 1) : NULL; +} + static __inline__ void ip4_frag_free(struct inet_frag_queue *q) { struct ipq *qp; @@ -156,14 +175,6 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q) kfree(qp); } -static __inline__ struct ipq *frag_alloc_queue(void) -{ - struct inet_frag_queue *q; - - q = inet_frag_alloc(&ip4_frags); - return q ? container_of(q, struct ipq, q) : NULL; -} - /* Destruction primitives. */ @@ -226,30 +237,20 @@ out: /* Creation primitives. */ -static struct ipq *ip_frag_intern(struct ipq *qp_in, unsigned int hash) -{ - struct inet_frag_queue *q; - - q = inet_frag_intern(&qp_in->q, &ip4_frags, hash); - return container_of(q, struct ipq, q); -} - /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h) { - struct ipq *qp; + struct inet_frag_queue *q; + struct ip4_create_arg arg; - if ((qp = frag_alloc_queue()) == NULL) - goto out_nomem; + arg.iph = iph; + arg.user = user; - qp->protocol = iph->protocol; - qp->id = iph->id; - qp->saddr = iph->saddr; - qp->daddr = iph->daddr; - qp->user = user; - qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; + q = inet_frag_create(&ip4_frags, &arg, h); + if (q == NULL) + goto out_nomem; - return ip_frag_intern(qp, h); + return container_of(q, struct ipq, q); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); @@ -642,6 +643,7 @@ void __init ipfrag_init(void) { ip4_frags.ctl = &ip4_frags_ctl; ip4_frags.hashfn = ip4_hashfn; + ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3f8c16b3301e..127d1d842786 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -135,14 +135,6 @@ static void nf_frag_free(struct inet_frag_queue *q) kfree(container_of(q, struct nf_ct_frag6_queue, q)); } -static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) -{ - struct inet_frag_queue *q; - - q = inet_frag_alloc(&nf_frags); - return q ? container_of(q, struct nf_ct_frag6_queue, q) : NULL; -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -184,33 +176,25 @@ out: /* Creation primitives. */ -static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, - struct nf_ct_frag6_queue *fq_in) +static struct nf_ct_frag6_queue * +nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; + struct ip6_create_arg arg; - q = inet_frag_intern(&fq_in->q, &nf_frags, hash); - return container_of(q, struct nf_ct_frag6_queue, q); -} - - -static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; + arg.id = id; + arg.src = src; + arg.dst = dst; - if ((fq = frag_alloc_queue()) == NULL) { - pr_debug("Can't alloc new queue\n"); + q = inet_frag_create(&nf_frags, &arg, hash); + if (q == NULL) goto oom; - } - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); - - return nf_ct_frag6_intern(hash, fq); + return container_of(q, struct nf_ct_frag6_queue, q); oom: + pr_debug("Can't alloc new queue\n"); return NULL; } @@ -718,6 +702,7 @@ int nf_ct_frag6_init(void) { nf_frags.ctl = &nf_frags_ctl; nf_frags.hashfn = nf_hashfn; + nf_frags.constructor = ip6_frag_init; nf_frags.destructor = nf_frag_free; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 21913c78f053..ce8734028d94 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -164,17 +164,20 @@ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) kfree_skb(skb); } -static void ip6_frag_free(struct inet_frag_queue *fq) +void ip6_frag_init(struct inet_frag_queue *q, void *a) { - kfree(container_of(fq, struct frag_queue, q)); + struct frag_queue *fq = container_of(q, struct frag_queue, q); + struct ip6_create_arg *arg = a; + + fq->id = arg->id; + ipv6_addr_copy(&fq->saddr, arg->src); + ipv6_addr_copy(&fq->daddr, arg->dst); } +EXPORT_SYMBOL(ip6_frag_init); -static inline struct frag_queue *frag_alloc_queue(void) +static void ip6_frag_free(struct inet_frag_queue *fq) { - struct inet_frag_queue *q; - - q = inet_frag_alloc(&ip6_frags); - return q ? container_of(q, struct frag_queue, q) : NULL; + kfree(container_of(fq, struct frag_queue, q)); } /* Destruction primitives. */ @@ -244,31 +247,22 @@ out: /* Creation primitives. */ - -static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in, - unsigned int hash) -{ - struct inet_frag_queue *q; - - q = inet_frag_intern(&fq_in->q, &ip6_frags, hash); - return container_of(q, struct frag_queue, q); -} - - static struct frag_queue * ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev, unsigned int hash) { - struct frag_queue *fq; + struct inet_frag_queue *q; + struct ip6_create_arg arg; - if ((fq = frag_alloc_queue()) == NULL) - goto oom; + arg.id = id; + arg.src = src; + arg.dst = dst; - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); + q = inet_frag_create(&ip6_frags, &arg, hash); + if (q == NULL) + goto oom; - return ip6_frag_intern(fq, hash); + return container_of(q, struct frag_queue, q); oom: IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); @@ -675,6 +669,7 @@ void __init ipv6_frag_init(void) ip6_frags.ctl = &ip6_frags_ctl; ip6_frags.hashfn = ip6_hashfn; + ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = ip6_frag_free; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); -- cgit v1.2.3 From abd6523d15f40bfee14652619a31a7f65f77f581 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:47:21 -0700 Subject: [INET]: Consolidate xxx_find() in fragment management Here we need another callback ->match to check whether the entry found in hash matches the key passed. The key used is the same as the creation argument for inet_frag_create. Yet again, this ->match is the same for netfilter and ipv6. Running a frew steps forward - this callback will later replace the ->equal one. Since the inet_frag_find() uses the already consolidated inet_frag_create() remove the xxx_frag_create from protocol codes. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 6 ++-- include/net/ipv6.h | 1 + net/ipv4/inet_fragment.c | 25 +++++++++++++-- net/ipv4/ip_fragment.c | 57 ++++++++++++--------------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 32 ++++-------------- net/ipv6/reassembly.c | 50 +++++++++++------------------ 6 files changed, 73 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e33072b9fd91..64299266a868 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -45,6 +45,8 @@ struct inet_frags { void (*skb_free)(struct sk_buff *); int (*equal)(struct inet_frag_queue *q1, struct inet_frag_queue *q2); + int (*match)(struct inet_frag_queue *q, + void *arg); void (*frag_expire)(unsigned long data); }; @@ -55,8 +57,8 @@ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); int inet_frag_evictor(struct inet_frags *f); -struct inet_frag_queue *inet_frag_create(struct inet_frags *f, - void *create_arg, unsigned int hash); +struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, + unsigned int hash); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9dc99bf5cf0e..005853a33ef6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -387,6 +387,7 @@ struct ip6_create_arg { }; void ip6_frag_init(struct inet_frag_queue *q, void *a); +int ip6_frag_match(struct inet_frag_queue *q, void *a); static inline int ipv6_addr_any(const struct in6_addr *a) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index b531f803cda4..6ba98ebbed93 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -226,8 +226,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) return q; } -struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, - unsigned int hash) +static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, + void *arg, unsigned int hash) { struct inet_frag_queue *q; @@ -237,4 +237,23 @@ struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, return inet_frag_intern(q, f, hash); } -EXPORT_SYMBOL(inet_frag_create); + +struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, + unsigned int hash) +{ + struct inet_frag_queue *q; + struct hlist_node *n; + + read_lock(&f->lock); + hlist_for_each_entry(q, n, &f->hash[hash], list) { + if (f->match(q, key)) { + atomic_inc(&q->refcnt); + read_unlock(&f->lock); + return q; + } + } + read_unlock(&f->lock); + + return inet_frag_create(f, key, hash); +} +EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0d6cff1de5a3..928259dbc0f8 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -142,6 +142,19 @@ static int ip4_frag_equal(struct inet_frag_queue *q1, qp1->user == qp2->user); } +static int ip4_frag_match(struct inet_frag_queue *q, void *a) +{ + struct ipq *qp; + struct ip4_create_arg *arg = a; + + qp = container_of(q, struct ipq, q); + return (qp->id == arg->iph->id && + qp->saddr == arg->iph->saddr && + qp->daddr == arg->iph->daddr && + qp->protocol == arg->iph->protocol && + qp->user == arg->user); +} + /* Memory Tracking Functions. */ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) { @@ -235,18 +248,20 @@ out: ipq_put(qp); } -/* Creation primitives. */ - -/* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h) +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and create new one, if nothing is found. + */ +static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { struct inet_frag_queue *q; struct ip4_create_arg arg; + unsigned int hash; arg.iph = iph; arg.user = user; + hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - q = inet_frag_create(&ip4_frags, &arg, h); + q = inet_frag_find(&ip4_frags, &arg, hash); if (q == NULL) goto out_nomem; @@ -257,37 +272,6 @@ out_nomem: return NULL; } -/* Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and create new one, if nothing is found. - */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) -{ - __be16 id = iph->id; - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; - __u8 protocol = iph->protocol; - unsigned int hash; - struct ipq *qp; - struct hlist_node *n; - - read_lock(&ip4_frags.lock); - hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { - if (qp->id == id && - qp->saddr == saddr && - qp->daddr == daddr && - qp->protocol == protocol && - qp->user == user) { - atomic_inc(&qp->q.refcnt); - read_unlock(&ip4_frags.lock); - return qp; - } - } - read_unlock(&ip4_frags.lock); - - return ip_frag_create(iph, user, hash); -} - /* Is the fragment too far ahead to be part of ipq? */ static inline int ip_frag_too_far(struct ipq *qp) { @@ -648,6 +632,7 @@ void __init ipfrag_init(void) ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.equal = ip4_frag_equal; + ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; inet_frags_init(&ip4_frags); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 127d1d842786..bff63d79c644 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -176,18 +176,19 @@ out: /* Creation primitives. */ -static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, - struct in6_addr *dst) +static __inline__ struct nf_ct_frag6_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; + unsigned int hash; arg.id = id; arg.src = src; arg.dst = dst; + hash = ip6qhashfn(id, src, dst); - q = inet_frag_create(&nf_frags, &arg, hash); + q = inet_frag_find(&nf_frags, &arg, hash); if (q == NULL) goto oom; @@ -198,28 +199,6 @@ oom: return NULL; } -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; - struct hlist_node *n; - unsigned int hash = ip6qhashfn(id, src, dst); - - read_lock(&nf_frags.lock); - hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - read_unlock(&nf_frags.lock); - return fq; - } - } - read_unlock(&nf_frags.lock); - - return nf_ct_frag6_create(hash, id, src, dst); -} - static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) @@ -706,6 +685,7 @@ int nf_ct_frag6_init(void) nf_frags.destructor = nf_frag_free; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.match = ip6_frag_match; nf_frags.equal = ip6_frag_equal; nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ce8734028d94..11fffe791fc4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -155,6 +155,18 @@ int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2) } EXPORT_SYMBOL(ip6_frag_equal); +int ip6_frag_match(struct inet_frag_queue *q, void *a) +{ + struct frag_queue *fq; + struct ip6_create_arg *arg = a; + + fq = container_of(q, struct frag_queue, q); + return (fq->id == arg->id && + ipv6_addr_equal(&fq->saddr, arg->src) && + ipv6_addr_equal(&fq->daddr, arg->dst)); +} +EXPORT_SYMBOL(ip6_frag_match); + /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { @@ -245,20 +257,20 @@ out: fq_put(fq); } -/* Creation primitives. */ - -static struct frag_queue * -ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev, unsigned int hash) +static __inline__ struct frag_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, + struct inet6_dev *idev) { struct inet_frag_queue *q; struct ip6_create_arg arg; + unsigned int hash; arg.id = id; arg.src = src; arg.dst = dst; + hash = ip6qhashfn(id, src, dst); - q = inet_frag_create(&ip6_frags, &arg, hash); + q = inet_frag_find(&ip6_frags, &arg, hash); if (q == NULL) goto oom; @@ -269,31 +281,6 @@ oom: return NULL; } -static __inline__ struct frag_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) -{ - struct frag_queue *fq; - struct hlist_node *n; - unsigned int hash; - - read_lock(&ip6_frags.lock); - hash = ip6qhashfn(id, src, dst); - hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - read_unlock(&ip6_frags.lock); - return fq; - } - } - read_unlock(&ip6_frags.lock); - - return ip6_frag_create(id, src, dst, idev, hash); -} - - static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { @@ -673,6 +660,7 @@ void __init ipv6_frag_init(void) ip6_frags.destructor = ip6_frag_free; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.match = ip6_frag_match; ip6_frags.equal = ip6_frag_equal; ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); -- cgit v1.2.3 From 48d60056387c37a17a46feda48613587a90535e5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:47:56 -0700 Subject: [INET]: Remove no longer needed ->equal callback Since this callback is used to check for conflicts in hashtable when inserting a newly created frag queue, we can do the same by checking for matching the queue with the argument, used to create one. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 -- include/net/ipv6.h | 1 - net/ipv4/inet_fragment.c | 6 +++--- net/ipv4/ip_fragment.c | 15 --------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 - net/ipv6/reassembly.c | 13 ------------- 6 files changed, 3 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 64299266a868..954def408975 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,8 +43,6 @@ struct inet_frags { void *arg); void (*destructor)(struct inet_frag_queue *); void (*skb_free)(struct sk_buff *); - int (*equal)(struct inet_frag_queue *q1, - struct inet_frag_queue *q2); int (*match)(struct inet_frag_queue *q, void *arg); void (*frag_expire)(unsigned long data); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 005853a33ef6..ae328b680ff2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -378,7 +378,6 @@ static inline int ipv6_prefix_equal(const struct in6_addr *a1, } struct inet_frag_queue; -int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2); struct ip6_create_arg { __be32 id; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 6ba98ebbed93..3ed09dd93442 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -174,7 +174,7 @@ int inet_frag_evictor(struct inet_frags *f) EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, - struct inet_frags *f, unsigned int hash) + struct inet_frags *f, unsigned int hash, void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP @@ -188,7 +188,7 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, * promoted read lock to write lock. */ hlist_for_each_entry(qp, n, &f->hash[hash], list) { - if (f->equal(qp, qp_in)) { + if (f->match(qp, arg)) { atomic_inc(&qp->refcnt); write_unlock(&f->lock); qp_in->last_in |= COMPLETE; @@ -235,7 +235,7 @@ static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, if (q == NULL) return NULL; - return inet_frag_intern(q, f, hash); + return inet_frag_intern(q, f, hash, arg); } struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 928259dbc0f8..314593b20506 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -128,20 +128,6 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q) return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); } -static int ip4_frag_equal(struct inet_frag_queue *q1, - struct inet_frag_queue *q2) -{ - struct ipq *qp1, *qp2; - - qp1 = container_of(q1, struct ipq, q); - qp2 = container_of(q2, struct ipq, q); - return (qp1->id == qp2->id && - qp1->saddr == qp2->saddr && - qp1->daddr == qp2->daddr && - qp1->protocol == qp2->protocol && - qp1->user == qp2->user); -} - static int ip4_frag_match(struct inet_frag_queue *q, void *a) { struct ipq *qp; @@ -631,7 +617,6 @@ void __init ipfrag_init(void) ip4_frags.destructor = ip4_frag_free; ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); - ip4_frags.equal = ip4_frag_equal; ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; inet_frags_init(&ip4_frags); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index bff63d79c644..25746d31504d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -686,7 +686,6 @@ int nf_ct_frag6_init(void) nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; - nf_frags.equal = ip6_frag_equal; nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 11fffe791fc4..01766bc75b6a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -143,18 +143,6 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q) return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); } -int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2) -{ - struct frag_queue *fq1, *fq2; - - fq1 = container_of(q1, struct frag_queue, q); - fq2 = container_of(q2, struct frag_queue, q); - return (fq1->id == fq2->id && - ipv6_addr_equal(&fq2->saddr, &fq1->saddr) && - ipv6_addr_equal(&fq2->daddr, &fq1->daddr)); -} -EXPORT_SYMBOL(ip6_frag_equal); - int ip6_frag_match(struct inet_frag_queue *q, void *a) { struct frag_queue *fq; @@ -661,7 +649,6 @@ void __init ipv6_frag_init(void) ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; - ip6_frags.equal = ip6_frag_equal; ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); } -- cgit v1.2.3 From 55b333253d5bcafbe187b50474e40789301c53c6 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:21:26 -0700 Subject: [NET]: Introduce the sk_detach_filter() call Filter is attached in a separate function, so do the same for filter detaching. This also removes one variable sock_setsockopt(). Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + net/core/filter.c | 16 ++++++++++++++++ net/core/sock.c | 12 +----------- 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 91b2e3b9251e..ddfa0372a3b7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -146,6 +146,7 @@ struct sock; extern unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +extern int sk_detach_filter(struct sock *sk); extern int sk_chk_filter(struct sock_filter *filter, int flen); #endif /* __KERNEL__ */ diff --git a/net/core/filter.c b/net/core/filter.c index bd903aaf7aa7..fd607581ab50 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -433,5 +433,21 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return err; } +int sk_detach_filter(struct sock *sk) +{ + int ret = -ENOENT; + struct sk_filter *filter; + + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter) { + rcu_assign_pointer(sk->sk_filter, NULL); + sk_filter_release(sk, filter); + ret = 0; + } + rcu_read_unlock_bh(); + return ret; +} + EXPORT_SYMBOL(sk_chk_filter); EXPORT_SYMBOL(sk_run_filter); diff --git a/net/core/sock.c b/net/core/sock.c index d45ecdccc6a1..07101381b8b7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -428,7 +428,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk=sock->sk; - struct sk_filter *filter; int val; int valbool; struct linger ling; @@ -652,16 +651,7 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); - if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_release(sk, filter); - rcu_read_unlock_bh(); - break; - } - rcu_read_unlock_bh(); - ret = -ENONET; + ret = sk_detach_filter(sk); break; case SO_PASSSEC: -- cgit v1.2.3 From 309dd5fc872448e35634d510049642312ebc170d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:21:51 -0700 Subject: [NET]: Move the filter releasing into a separate call This is done merely as a preparation for the fix. The sk_filter_uncharge() unaccounts the filter memory and calls the sk_filter_release(), which in turn decrements the refcount anf frees the filter. The latter function will be required separately. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/sock.h | 12 ++++++++---- net/core/filter.c | 4 ++-- net/core/sock.c | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 453c79d0915b..b9cfe125c9e6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -922,14 +922,18 @@ static inline void sk_filter_rcu_free(struct rcu_head *rcu) * Remove a filter from a socket and release its resources. */ -static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) +static inline void sk_filter_release(struct sk_filter *fp) +{ + if (atomic_dec_and_test(&fp->refcnt)) + call_rcu_bh(&fp->rcu, sk_filter_rcu_free); +} + +static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) { unsigned int size = sk_filter_len(fp); atomic_sub(size, &sk->sk_omem_alloc); - - if (atomic_dec_and_test(&fp->refcnt)) - call_rcu_bh(&fp->rcu, sk_filter_rcu_free); + sk_filter_release(fp); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) diff --git a/net/core/filter.c b/net/core/filter.c index fd607581ab50..2be1830d3c35 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -429,7 +429,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } if (fp) - sk_filter_release(sk, fp); + sk_filter_uncharge(sk, fp); return err; } @@ -442,7 +442,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference(sk->sk_filter); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_release(sk, filter); + sk_filter_uncharge(sk, filter); ret = 0; } rcu_read_unlock_bh(); diff --git a/net/core/sock.c b/net/core/sock.c index 07101381b8b7..d292b4113d6e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -915,7 +915,7 @@ void sk_free(struct sock *sk) filter = rcu_dereference(sk->sk_filter); if (filter) { - sk_filter_release(sk, filter); + sk_filter_uncharge(sk, filter); rcu_assign_pointer(sk->sk_filter, NULL); } -- cgit v1.2.3 From 47e958eac280c263397582d5581e868c3227a1bd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:22:42 -0700 Subject: [NET]: Fix the race between sk_filter_(de|at)tach and sk_clone() The proposed fix is to delay the reference counter decrement until the quiescent state pass. This will give sk_clone() a chance to get the reference on the cloned filter. Regular sk_filter_uncharge can happen from the sk_free() only and there's no need in delaying the put - the socket is dead anyway and is to be release itself. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/sock.h | 12 +----------- net/core/filter.c | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index b9cfe125c9e6..43fc3fa50d62 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -904,16 +904,6 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb) return err; } -/** - * sk_filter_rcu_free: Free a socket filter - * @rcu: rcu_head that contains the sk_filter to free - */ -static inline void sk_filter_rcu_free(struct rcu_head *rcu) -{ - struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - kfree(fp); -} - /** * sk_filter_release: Release a socket filter * @sk: socket @@ -925,7 +915,7 @@ static inline void sk_filter_rcu_free(struct rcu_head *rcu) static inline void sk_filter_release(struct sk_filter *fp) { if (atomic_dec_and_test(&fp->refcnt)) - call_rcu_bh(&fp->rcu, sk_filter_rcu_free); + kfree(fp); } static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) diff --git a/net/core/filter.c b/net/core/filter.c index 54dddc92452d..1f0068eae501 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -386,6 +386,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen) return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; } +/** + * sk_filter_rcu_release: Release a socket filter by rcu_head + * @rcu: rcu_head that contains the sk_filter to free + */ +static void sk_filter_rcu_release(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + + sk_filter_release(fp); +} + +static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) +{ + unsigned int size = sk_filter_len(fp); + + atomic_sub(size, &sk->sk_omem_alloc); + call_rcu_bh(&fp->rcu, sk_filter_rcu_release); +} + /** * sk_attach_filter - attach a socket filter * @fprog: the filter program @@ -428,7 +447,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) rcu_assign_pointer(sk->sk_filter, fp); rcu_read_unlock_bh(); - sk_filter_uncharge(sk, old_fp); + sk_filter_delayed_uncharge(sk, old_fp); return 0; } @@ -441,7 +460,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference(sk->sk_filter); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_uncharge(sk, filter); + sk_filter_delayed_uncharge(sk, filter); ret = 0; } rcu_read_unlock_bh(); -- cgit v1.2.3 From c4541b41c0e4b75b11125fed16db642fc03cb31c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:28:53 -0700 Subject: [IPSEC]: Move tunnel parsing for IPv4 out of xfrm4_input This patch moves the tunnel parsing for IPv4 out of xfrm4_input and into xfrm4_tunnel. This change is in line with what IPv6 does and will allow us to merge the two input functions. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 8 ++++++++ net/ipv4/xfrm4_input.c | 36 +++++++++++------------------------- net/ipv4/xfrm4_tunnel.c | 9 +++++++-- 3 files changed, 26 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0e844845f3f4..680739f69003 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1046,7 +1046,15 @@ extern void xfrm_replay_notify(struct xfrm_state *x, int event); extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); extern int xfrm_init_state(struct xfrm_state *x); extern int xfrm_output(struct sk_buff *skb); +extern int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); extern int xfrm4_rcv(struct sk_buff *skb); + +static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) +{ + return xfrm4_rcv_encap(skb, nexthdr, spi, 0); +} + extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index e9bbfde19ac3..5cb0b5995bc8 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -16,19 +16,6 @@ #include #include -static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) -{ - switch (nexthdr) { - case IPPROTO_IPIP: - case IPPROTO_IPV6: - *spi = ip_hdr(skb)->saddr; - *seq = 0; - return 0; - } - - return xfrm_parse_spi(skb, nexthdr, spi, seq); -} - #ifdef CONFIG_NETFILTER static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) { @@ -46,28 +33,29 @@ drop: } #endif -static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) +int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { - __be32 spi, seq; + int err; + __be32 seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); unsigned int nhoff = offsetof(struct iphdr, protocol); - if (err != 0) + seq = 0; + if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) goto drop; do { const struct iphdr *iph = ip_hdr(skb); - int nexthdr; if (xfrm_nr == XFRM_MAX_DEPTH) goto drop; x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - iph->protocol != IPPROTO_IPV6 ? iph->protocol : IPPROTO_IPIP, AF_INET); + nexthdr, AF_INET); if (x == NULL) goto drop; @@ -111,7 +99,7 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) break; } - err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); + err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) goto drop; } while (!err); @@ -165,6 +153,7 @@ drop: kfree_skb(skb); return 0; } +EXPORT_SYMBOL(xfrm4_rcv_encap); /* If it's a keepalive packet, then just eat it. * If it's an encapsulated packet, then pass it to the @@ -252,11 +241,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, len); skb_reset_transport_header(skb); - /* modify the protocol (it's ESP!) */ - iph->protocol = IPPROTO_ESP; - /* process ESP */ - ret = xfrm4_rcv_encap(skb, encap_type); + ret = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); return ret; drop: @@ -266,7 +252,7 @@ drop: int xfrm4_rcv(struct sk_buff *skb) { - return xfrm4_rcv_encap(skb, 0); + return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); } EXPORT_SYMBOL(xfrm4_rcv); diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 83e9580feac4..326845195620 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -48,20 +48,25 @@ static struct xfrm_type ipip_type = { .output = ipip_output }; +static int xfrm_tunnel_rcv(struct sk_buff *skb) +{ + return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr); +} + static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) { return -ENOENT; } static struct xfrm_tunnel xfrm_tunnel_handler = { - .handler = xfrm4_rcv, + .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, .priority = 2, }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static struct xfrm_tunnel xfrm64_tunnel_handler = { - .handler = xfrm4_rcv, + .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, .priority = 2, }; -- cgit v1.2.3 From 33b5ecb8f64706d1ed472dcb44162ab3a7345724 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:29:25 -0700 Subject: [IPSEC]: Get nexthdr from caller in xfrm6_rcv_spi Currently xfrm6_rcv_spi gets the nexthdr value itself from the packet. This means that we need to fix up the value in case we have a 4-on-6 tunnel. Moving this logic into the caller simplifies things and allows us to merge the code with IPv4. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv6/xfrm6_input.c | 9 ++++----- net/ipv6/xfrm6_tunnel.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 680739f69003..d8974ca19032 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1058,7 +1058,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); -extern int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi); +extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); extern int xfrm6_rcv(struct sk_buff *skb); extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 02f69e544f6f..596a730294ec 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,7 +16,7 @@ #include #include -int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { int err; __be32 seq; @@ -24,11 +24,9 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - int nexthdr; unsigned int nhoff; nhoff = IP6CB(skb)->nhoff; - nexthdr = skb_network_header(skb)[nhoff]; seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) @@ -41,7 +39,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) goto drop; x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - nexthdr != IPPROTO_IPIP ? nexthdr : IPPROTO_IPV6, AF_INET6); + nexthdr, AF_INET6); if (x == NULL) goto drop; spin_lock(&x->lock); @@ -135,7 +133,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_rcv(struct sk_buff *skb) { - return xfrm6_rcv_spi(skb, 0); + return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0); } EXPORT_SYMBOL(xfrm6_rcv); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 6c67ac197ee0..fae90ff31087 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -257,7 +257,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, spi) > 0 ? : 0; + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0; } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v1.2.3 From aa5d62cc8777f733f8b59b5586c0a1989813189e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:31:12 -0700 Subject: [IPSEC]: Move type and mode map into xfrm_state.c The type and mode maps are only used by SAs, not policies. So it makes sense to move them from xfrm_policy.c into xfrm_state.c. This also allows us to mark xfrm_get_type/xfrm_put_type/xfrm_get_mode/xfrm_put_mode as static. The only other change I've made in the move is to get rid of the casts on the request_module call for types. They're unnecessary because C will promote them to ints anyway. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 8 +-- net/xfrm/xfrm_policy.c | 173 ------------------------------------------------- net/xfrm/xfrm_state.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 179 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d8974ca19032..7f156a0b94c8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -228,8 +228,6 @@ struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { unsigned short family; - struct xfrm_type *type_map[IPPROTO_MAX]; - struct xfrm_mode *mode_map[XFRM_MODE_MAX]; struct dst_ops *dst_ops; void (*garbage_collect)(void); int (*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl); @@ -256,6 +254,8 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; + struct xfrm_type *type_map[IPPROTO_MAX]; + struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -295,8 +295,6 @@ struct xfrm_type extern int xfrm_register_type(struct xfrm_type *type, unsigned short family); extern int xfrm_unregister_type(struct xfrm_type *type, unsigned short family); -extern struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family); -extern void xfrm_put_type(struct xfrm_type *type); struct xfrm_mode { int (*input)(struct xfrm_state *x, struct sk_buff *skb); @@ -320,8 +318,6 @@ struct xfrm_mode { extern int xfrm_register_mode(struct xfrm_mode *mode, int family); extern int xfrm_unregister_mode(struct xfrm_mode *mode, int family); -extern struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family); -extern void xfrm_put_mode(struct xfrm_mode *mode); struct xfrm_tmpl { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index af27c193697c..ca24c90d3796 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -49,8 +49,6 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); -static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family); -static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo); static inline int __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) @@ -86,72 +84,6 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } -int xfrm_register_type(struct xfrm_type *type, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); - struct xfrm_type **typemap; - int err = 0; - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; - - if (likely(typemap[type->proto] == NULL)) - typemap[type->proto] = type; - else - err = -EEXIST; - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_register_type); - -int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); - struct xfrm_type **typemap; - int err = 0; - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; - - if (unlikely(typemap[type->proto] != type)) - err = -ENOENT; - else - typemap[type->proto] = NULL; - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_type); - -struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_type **typemap; - struct xfrm_type *type; - int modload_attempted = 0; - -retry: - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - typemap = afinfo->type_map; - - type = typemap[proto]; - if (unlikely(type && !try_module_get(type->owner))) - type = NULL; - if (!type && !modload_attempted) { - xfrm_policy_put_afinfo(afinfo); - request_module("xfrm-type-%d-%d", - (int) family, (int) proto); - modload_attempted = 1; - goto retry; - } - - xfrm_policy_put_afinfo(afinfo); - return type; -} - int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family) { @@ -170,94 +102,6 @@ int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, } EXPORT_SYMBOL(xfrm_dst_lookup); -void xfrm_put_type(struct xfrm_type *type) -{ - module_put(type->owner); -} - -int xfrm_register_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_policy_lock_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -EEXIST; - modemap = afinfo->mode_map; - if (likely(modemap[mode->encap] == NULL)) { - modemap[mode->encap] = mode; - err = 0; - } - - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_register_mode); - -int xfrm_unregister_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_policy_lock_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -ENOENT; - modemap = afinfo->mode_map; - if (likely(modemap[mode->encap] == mode)) { - modemap[mode->encap] = NULL; - err = 0; - } - - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_mode); - -struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode *mode; - int modload_attempted = 0; - - if (unlikely(encap >= XFRM_MODE_MAX)) - return NULL; - -retry: - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - - mode = afinfo->mode_map[encap]; - if (unlikely(mode && !try_module_get(mode->owner))) - mode = NULL; - if (!mode && !modload_attempted) { - xfrm_policy_put_afinfo(afinfo); - request_module("xfrm-mode-%d-%d", family, encap); - modload_attempted = 1; - goto retry; - } - - xfrm_policy_put_afinfo(afinfo); - return mode; -} - -void xfrm_put_mode(struct xfrm_mode *mode) -{ - module_put(mode->owner); -} - static inline unsigned long make_jiffies(long secs) { if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) @@ -2213,23 +2057,6 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) read_unlock(&xfrm_policy_afinfo_lock); } -static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family) -{ - struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - write_lock_bh(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; - if (unlikely(!afinfo)) - write_unlock_bh(&xfrm_policy_afinfo_lock); - return afinfo; -} - -static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - write_unlock_bh(&xfrm_policy_afinfo_lock); -} - static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 344f0a6abec5..dc438f2b9442 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -187,6 +187,176 @@ int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) +{ + struct xfrm_state_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + write_lock_bh(&xfrm_state_afinfo_lock); + afinfo = xfrm_state_afinfo[family]; + if (unlikely(!afinfo)) + write_unlock_bh(&xfrm_state_afinfo_lock); + return afinfo; +} + +static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) +{ + write_unlock_bh(&xfrm_state_afinfo_lock); +} + +int xfrm_register_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_type **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + if (likely(typemap[type->proto] == NULL)) + typemap[type->proto] = type; + else + err = -EEXIST; + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_type); + +int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_type **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + if (unlikely(typemap[type->proto] != type)) + err = -ENOENT; + else + typemap[type->proto] = NULL; + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_type); + +static struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_type **typemap; + struct xfrm_type *type; + int modload_attempted = 0; + +retry: + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + typemap = afinfo->type_map; + + type = typemap[proto]; + if (unlikely(type && !try_module_get(type->owner))) + type = NULL; + if (!type && !modload_attempted) { + xfrm_state_put_afinfo(afinfo); + request_module("xfrm-type-%d-%d", family, proto); + modload_attempted = 1; + goto retry; + } + + xfrm_state_put_afinfo(afinfo); + return type; +} + +static void xfrm_put_type(struct xfrm_type *type) +{ + module_put(type->owner); +} + +int xfrm_register_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_state_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -EEXIST; + modemap = afinfo->mode_map; + if (likely(modemap[mode->encap] == NULL)) { + modemap[mode->encap] = mode; + err = 0; + } + + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_mode); + +int xfrm_unregister_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_state_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -ENOENT; + modemap = afinfo->mode_map; + if (likely(modemap[mode->encap] == mode)) { + modemap[mode->encap] = NULL; + err = 0; + } + + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_mode); + +static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode *mode; + int modload_attempted = 0; + + if (unlikely(encap >= XFRM_MODE_MAX)) + return NULL; + +retry: + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + + mode = afinfo->mode_map[encap]; + if (unlikely(mode && !try_module_get(mode->owner))) + mode = NULL; + if (!mode && !modload_attempted) { + xfrm_state_put_afinfo(afinfo); + request_module("xfrm-mode-%d-%d", family, encap); + modload_attempted = 1; + goto retry; + } + + xfrm_state_put_afinfo(afinfo); + return mode; +} + +static void xfrm_put_mode(struct xfrm_mode *mode) +{ + module_put(mode->owner); +} + static void xfrm_state_gc_destroy(struct xfrm_state *x) { del_timer_sync(&x->timer); -- cgit v1.2.3 From 1bfcb10f670f5ff5e1d9f53e59680573524cb142 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:31:50 -0700 Subject: [IPSEC]: Add missing BEET checks Currently BEET mode does not reinject the packet back into the stack like tunnel mode does. Since BEET should behave just like tunnel mode this is incorrect. This patch fixes this by introducing a flags field to xfrm_mode that tells the IPsec code whether it should terminate and reinject the packet back into the stack. It then sets the flag for BEET and tunnel mode. I've also added a number of missing BEET checks elsewhere where we check whether a given mode is a tunnel or not. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++++++ net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_mode_beet.c | 1 + net/ipv4/xfrm4_mode_tunnel.c | 1 + net/ipv4/xfrm4_output.c | 2 +- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_input.c | 2 +- net/ipv6/xfrm6_mode_beet.c | 1 + net/ipv6/xfrm6_mode_tunnel.c | 1 + net/ipv6/xfrm6_output.c | 2 +- net/ipv6/xfrm6_policy.c | 3 +-- net/ipv6/xfrm6_state.c | 6 ++++-- net/xfrm/xfrm_output.c | 2 +- net/xfrm/xfrm_policy.c | 6 ++++-- 14 files changed, 25 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7f156a0b94c8..2143f2911a21 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -314,6 +314,12 @@ struct xfrm_mode { struct module *owner; unsigned int encap; + int flags; +}; + +/* Flags for xfrm_mode. */ +enum { + XFRM_MODE_FLAG_TUNNEL = 1, }; extern int xfrm_register_mode(struct xfrm_mode *mode, int family); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 5cb0b5995bc8..bc5dc0747cd2 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -94,7 +94,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, if (x->mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 73d2338bec55..e42e122414be 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -114,6 +114,7 @@ static struct xfrm_mode xfrm4_beet_mode = { .output = xfrm4_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm4_beet_init(void) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 1ae9d32276f0..e4deecba6dd2 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -139,6 +139,7 @@ static struct xfrm_mode xfrm4_tunnel_mode = { .output = xfrm4_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm4_tunnel_init(void) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index a4edd666318b..dcbc2743069c 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -47,7 +47,7 @@ static inline int xfrm4_output_one(struct sk_buff *skb) struct iphdr *iph; int err; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 329825ca68fe..2373d673df60 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -117,7 +117,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch (encap_family) { case AF_INET: diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b1201c33eb12..c6ee1a3ba19a 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -71,7 +71,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 13bb1e856764..2bfb4f05c14c 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -79,6 +79,7 @@ static struct xfrm_mode xfrm6_beet_mode = { .output = xfrm6_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_beet_init(void) diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index ea2283879112..fd84e2217274 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -118,6 +118,7 @@ static struct xfrm_mode xfrm6_tunnel_mode = { .output = xfrm6_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_tunnel_init(void) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index a5a32c17249d..c9f42d1c2dff 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb) struct ipv6hdr *iph; int err; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 15aa4c58c315..dc4bdcb55cbe 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -178,8 +178,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL || - xfrm[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch(encap_family) { case AF_INET: diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index cdadb4847469..e644c80515fc 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -93,7 +93,8 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) /* Rule 4: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->props.mode == XFRM_MODE_TUNNEL) { + (src[i]->props.mode == XFRM_MODE_TUNNEL || + src[i]->props.mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } @@ -146,7 +147,8 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) /* Rule 3: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->mode == XFRM_MODE_TUNNEL) { + (src[i]->mode == XFRM_MODE_TUNNEL || + src[i]->mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 0eb3377602e9..8bf71ba2345f 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -82,7 +82,7 @@ int xfrm_output(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + } while (x && !(x->mode->flags & XFRM_MODE_FLAG_TUNNEL)); err = 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ca24c90d3796..1d66fb42c9cb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1940,7 +1940,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (xdst->genid != dst->xfrm->genid) return 0; - if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && + if (strict && fl && + !(dst->xfrm->mode->flags & XFRM_MODE_FLAG_TUNNEL) && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) return 0; @@ -2291,7 +2292,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) continue; n++; - if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL) + if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && + pol->xfrm_vec[i].mode != XFRM_MODE_BEET) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, -- cgit v1.2.3 From 17c2a42a24e1e8dd6aa7cea4f84e034ab1bfff31 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:33:12 -0700 Subject: [IPSEC]: Store afinfo pointer in xfrm_mode It is convenient to have a pointer from xfrm_state to address-specific functions such as the output function for a family. Currently the address-specific policy code calls out to the xfrm state code to get those pointers when we could get it in an easier way via the state itself. This patch adds an xfrm_state_afinfo to xfrm_mode (since they're address-specific) and changes the policy code to use it. I've also added an owner field to do reference counting on the module providing the afinfo even though it isn't strictly necessary today since IPv6 can't be unloaded yet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 +++--- net/ipv4/xfrm4_policy.c | 13 +------------ net/ipv4/xfrm4_state.c | 1 + net/ipv6/xfrm6_policy.c | 14 +------------- net/ipv6/xfrm6_state.c | 1 + net/xfrm/xfrm_state.c | 26 +++++++++++++++++--------- 6 files changed, 24 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2143f2911a21..f0f3318f6550 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -253,7 +253,8 @@ extern void km_state_expired(struct xfrm_state *x, int hard, u32 pid); extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { - unsigned short family; + unsigned int family; + struct module *owner; struct xfrm_type *type_map[IPPROTO_MAX]; struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); @@ -267,8 +268,6 @@ struct xfrm_state_afinfo { extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); -extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); -extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); extern void xfrm_state_delete_tunnel(struct xfrm_state *x); @@ -312,6 +311,7 @@ struct xfrm_mode { */ int (*output)(struct xfrm_state *x,struct sk_buff *skb); + struct xfrm_state_afinfo *afinfo; struct module *owner; unsigned int encap; int flags; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 2373d673df60..c65b8e03c049 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -151,7 +151,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; x->u.rt.fl = *fl; dst_prev->xfrm = xfrm[i++]; @@ -169,17 +168,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv6 module can be unloaded, we should manage reference - * to xfrm6_output in afinfo->output. Miyazawa - * */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - err = -EAFNOSUPPORT; - goto error; - } - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); + dst_prev->output = dst_prev->xfrm->mode->afinfo->output; if (dst_prev->xfrm->props.family == AF_INET && rt->peer) atomic_inc(&rt->peer->refcnt); x->u.rt.peer = rt->peer; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 93e2c061cdda..13d54a1c3337 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -49,6 +49,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, + .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .output = xfrm4_output, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dc4bdcb55cbe..324268329f69 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -214,7 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; @@ -231,18 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv4 is implemented as module and can be unloaded, - * we should manage reference to xfrm4_output in afinfo->output. - * Miyazawa - */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - goto error; - } - - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); + dst_prev->output = dst_prev->xfrm->mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index e644c80515fc..b392bee396f1 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -170,6 +170,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, + .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index dc438f2b9442..48b4a06b3d1a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -57,6 +57,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); + static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, @@ -289,11 +292,18 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) err = -EEXIST; modemap = afinfo->mode_map; - if (likely(modemap[mode->encap] == NULL)) { - modemap[mode->encap] = mode; - err = 0; - } + if (modemap[mode->encap]) + goto out; + err = -ENOENT; + if (!try_module_get(afinfo->owner)) + goto out; + + mode->afinfo = afinfo; + modemap[mode->encap] = mode; + err = 0; + +out: xfrm_state_unlock_afinfo(afinfo); return err; } @@ -316,6 +326,7 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family) modemap = afinfo->mode_map; if (likely(modemap[mode->encap] == mode)) { modemap[mode->encap] = NULL; + module_put(mode->afinfo->owner); err = 0; } @@ -1869,7 +1880,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1881,14 +1892,11 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) return afinfo; } -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { read_unlock(&xfrm_state_afinfo_lock); } -EXPORT_SYMBOL(xfrm_state_get_afinfo); -EXPORT_SYMBOL(xfrm_state_put_afinfo); - /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { -- cgit v1.2.3 From 13996378e6585fb25e582afe7489bf52dde78deb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:35:51 -0700 Subject: [IPSEC]: Rename mode to outer_mode and add inner_mode This patch adds a new field to xfrm states called inner_mode. The existing mode object is renamed to outer_mode. This is the first part of an attempt to fix inter-family transforms. As it is we always use the outer family when determining which mode to use. As a result we may end up shoving IPv4 packets into netfilter6 and vice versa. What we really want is to use the inner family for the first part of outbound processing and the outer family for the second part. For inbound processing we'd use the opposite pairing. I've also added a check to prevent silly combinations such as transport mode with inter-family transforms. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/core/pktgen.c | 2 +- net/ipv4/xfrm4_input.c | 4 ++-- net/ipv4/xfrm4_output.c | 2 +- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_input.c | 4 ++-- net/ipv6/xfrm6_output.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- net/xfrm/xfrm_output.c | 4 ++-- net/xfrm/xfrm_policy.c | 2 +- net/xfrm/xfrm_state.c | 18 ++++++++++++++---- 11 files changed, 28 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f0f3318f6550..688f6f5d3285 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -186,7 +186,8 @@ struct xfrm_state /* Reference to data common to all the instances of this * transformer. */ struct xfrm_type *type; - struct xfrm_mode *mode; + struct xfrm_mode *inner_mode; + struct xfrm_mode *outer_mode; /* Security context */ struct xfrm_sec_ctx *security; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2100c734b102..8cae60c53383 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2454,7 +2454,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) spin_lock(&x->lock); iph = ip_hdr(skb); - err = x->mode->output(x, skb); + err = x->outer_mode->output(x, skb); if (err) goto error; err = x->type->output(x, skb); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index bc5dc0747cd2..5e95c8a07efb 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -91,10 +91,10 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index dcbc2743069c..c4a7156962bd 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -47,7 +47,7 @@ static inline int xfrm4_output_one(struct sk_buff *skb) struct iphdr *iph; int err; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1f0ea0e0371b..cc86fb110dd8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -168,7 +168,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = dst_prev->xfrm->mode->afinfo->output; + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; if (rt0->peer) atomic_inc(&rt0->peer->refcnt); x->u.rt.peer = rt0->peer; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index c6ee1a3ba19a..515783707e86 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -68,10 +68,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c9f42d1c2dff..656976760ad4 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb) struct ipv6hdr *iph; int err; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 324268329f69..82e27b80d07d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -230,7 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = dst_prev->xfrm->mode->afinfo->output; + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 8bf71ba2345f..f4bfd6c45651 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -63,7 +63,7 @@ int xfrm_output(struct sk_buff *skb) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } - err = x->mode->output(x, skb); + err = x->outer_mode->output(x, skb); if (err) goto error; @@ -82,7 +82,7 @@ int xfrm_output(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !(x->mode->flags & XFRM_MODE_FLAG_TUNNEL)); + } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); err = 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1d66fb42c9cb..b702bd8a3893 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1941,7 +1941,7 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 0; if (strict && fl && - !(dst->xfrm->mode->flags & XFRM_MODE_FLAG_TUNNEL) && + !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 48b4a06b3d1a..224b44e31a07 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -377,8 +377,10 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->calg); kfree(x->encap); kfree(x->coaddr); - if (x->mode) - xfrm_put_mode(x->mode); + if (x->inner_mode) + xfrm_put_mode(x->inner_mode); + if (x->outer_mode) + xfrm_put_mode(x->outer_mode); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -1947,6 +1949,14 @@ int xfrm_init_state(struct xfrm_state *x) goto error; err = -EPROTONOSUPPORT; + x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); + if (x->inner_mode == NULL) + goto error; + + if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && + family != x->sel.family) + goto error; + x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) goto error; @@ -1955,8 +1965,8 @@ int xfrm_init_state(struct xfrm_state *x) if (err) goto error; - x->mode = xfrm_get_mode(x->props.mode, family); - if (x->mode == NULL) + x->outer_mode = xfrm_get_mode(x->props.mode, family); + if (x->outer_mode == NULL) goto error; x->km.state = XFRM_STATE_VALID; -- cgit v1.2.3 From 1b83336bb9fa4f110b5b0a4378a82cd5f0fd0ac8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 18 Oct 2007 05:09:28 -0700 Subject: [NET]: Fix OOPS due to missing check in dev_parse_header(). [ This is kernel bugzilla 9174 "linux-2.6.23-git11 kernel panic" ] The device in question is an IPv6-over-IPv4 tunnel, which doesn't have any header_ops, so the crash happens in dev_parse_header when dereferencing them. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 39dd83b183a9..452c88d971ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -827,7 +827,7 @@ static inline int dev_parse_header(const struct sk_buff *skb, { const struct net_device *dev = skb->dev; - if (!dev->header_ops->parse) + if (!dev->header_ops || !dev->header_ops->parse) return 0; return dev->header_ops->parse(skb, haddr); } -- cgit v1.2.3 From 009e8c965fd72a78636b9a96c7015109c5c70176 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 18 Oct 2007 05:12:21 -0700 Subject: [NETFILTER]: xt_sctp: fix mistake to pass a pointer where array is required Macros like SCTP_CHUNKMAP_XXX(chukmap) require chukmap to be an array, but match_packet() passes a pointer to these macros. Also remove the ELEMCOUNT macro and fix a bug in SCTP_CHUNKMAP_COPY. Signed-off-by: Li Zefan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_sctp.h | 13 +++++-------- net/netfilter/xt_sctp.c | 18 ++++++++---------- 2 files changed, 13 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_sctp.h b/include/linux/netfilter/xt_sctp.h index b157897e7792..dd5a4fd4cfd3 100644 --- a/include/linux/netfilter/xt_sctp.h +++ b/include/linux/netfilter/xt_sctp.h @@ -7,9 +7,6 @@ #define XT_SCTP_VALID_FLAGS 0x07 -#define ELEMCOUNT(x) (sizeof(x)/sizeof(x[0])) - - struct xt_sctp_flag_info { u_int8_t chunktype; u_int8_t flag; @@ -59,21 +56,21 @@ struct xt_sctp_info { #define SCTP_CHUNKMAP_RESET(chunkmap) \ do { \ int i; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ chunkmap[i] = 0; \ } while (0) #define SCTP_CHUNKMAP_SET_ALL(chunkmap) \ do { \ int i; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ chunkmap[i] = ~0; \ } while (0) #define SCTP_CHUNKMAP_COPY(destmap, srcmap) \ do { \ int i; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + for (i = 0; i < ARRAY_SIZE(srcmap); i++) \ destmap[i] = srcmap[i]; \ } while (0) @@ -81,7 +78,7 @@ struct xt_sctp_info { ({ \ int i; \ int flag = 1; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) { \ if (chunkmap[i]) { \ flag = 0; \ break; \ @@ -94,7 +91,7 @@ struct xt_sctp_info { ({ \ int i; \ int flag = 1; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) { \ if (chunkmap[i] != ~0) { \ flag = 0; \ break; \ diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index f907770fd4e9..3358273a47b7 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -42,21 +42,21 @@ match_flags(const struct xt_sctp_flag_info *flag_info, static inline bool match_packet(const struct sk_buff *skb, unsigned int offset, - const u_int32_t *chunkmap, - int chunk_match_type, - const struct xt_sctp_flag_info *flag_info, - const int flag_count, + const struct xt_sctp_info *info, bool *hotdrop) { u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)]; sctp_chunkhdr_t _sch, *sch; + int chunk_match_type = info->chunk_match_type; + const struct xt_sctp_flag_info *flag_info = info->flag_info; + int flag_count = info->flag_count; #ifdef DEBUG_SCTP int i = 0; #endif if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) - SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap); + SCTP_CHUNKMAP_COPY(chunkmapcopy, info->chunkmap); do { sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); @@ -73,7 +73,7 @@ match_packet(const struct sk_buff *skb, duprintf("skb->len: %d\toffset: %d\n", skb->len, offset); - if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch->type)) { + if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) { switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ANY: if (match_flags(flag_info, flag_count, @@ -104,7 +104,7 @@ match_packet(const struct sk_buff *skb, switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ALL: - return SCTP_CHUNKMAP_IS_CLEAR(chunkmap); + return SCTP_CHUNKMAP_IS_CLEAR(info->chunkmap); case SCTP_CHUNK_MATCH_ANY: return false; case SCTP_CHUNK_MATCH_ONLY: @@ -148,9 +148,7 @@ match(const struct sk_buff *skb, && ntohs(sh->dest) <= info->dpts[1], XT_SCTP_DEST_PORTS, info->flags, info->invflags) && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), - info->chunkmap, info->chunk_match_type, - info->flag_info, info->flag_count, - hotdrop), + info, hotdrop), XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } -- cgit v1.2.3