summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@nuts.ninka.net>2003-05-12 01:51:27 -0700
committerDavid S. Miller <davem@nuts.ninka.net>2003-05-12 01:51:27 -0700
commitf5bda5bc978626bf1f508370c25702366bc2c214 (patch)
treefc9df7eb89c998942c67b76a2a805705b76b9513
parentd62a3dd4742d6fad314e1f3e15ec9d6b63adacff (diff)
parentcb52a86c562adbf63abf8bde4d4502a444a64c01 (diff)
Merge nuts.ninka.net:/home/davem/src/BK/network-2.5
into nuts.ninka.net:/home/davem/src/BK/net-2.5
-rw-r--r--crypto/Kconfig14
-rw-r--r--include/asm-sparc/hardirq.h1
-rw-r--r--include/asm-sparc64/hardirq.h1
-rw-r--r--include/linux/brlock.h222
-rw-r--r--include/linux/ipv6.h7
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/skbuff.h9
-rw-r--r--include/net/ipv6.h20
-rw-r--r--include/net/xfrm.h13
-rw-r--r--kernel/ksyms.c12
-rw-r--r--lib/Makefile2
-rw-r--r--lib/brlock.c72
-rw-r--r--net/bridge/br_fdb.c123
-rw-r--r--net/bridge/br_if.c10
-rw-r--r--net/bridge/br_ioctl.c9
-rw-r--r--net/bridge/br_private.h40
-rw-r--r--net/bridge/br_private_stp.h1
-rw-r--r--net/bridge/br_private_timer.h54
-rw-r--r--net/bridge/br_stp.c103
-rw-r--r--net/bridge/br_stp_if.c51
-rw-r--r--net/bridge/br_stp_timer.c186
-rw-r--r--net/core/dev.c103
-rw-r--r--net/core/wireless.c1
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ipcomp.c67
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c7
-rw-r--r--net/ipv4/netfilter/ip_fw_compat.c26
-rw-r--r--net/ipv4/netfilter/ip_fw_compat.h28
-rw-r--r--net/ipv4/netfilter/ip_fw_compat_masq.c1
-rw-r--r--net/ipv4/netfilter/ip_fw_compat_redir.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c38
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/icmp.c210
-rw-r--r--net/ipv6/ip6_output.c669
-rw-r--r--net/ipv6/raw.c230
-rw-r--r--net/ipv6/udp.c320
-rw-r--r--net/key/af_key.c8
-rw-r--r--net/netsyms.c4
-rw-r--r--net/packet/af_packet.c17
-rw-r--r--net/sched/sch_ingress.c5
-rw-r--r--net/xfrm/xfrm_state.c19
-rw-r--r--net/xfrm/xfrm_user.c5
43 files changed, 1747 insertions, 981 deletions
diff --git a/crypto/Kconfig b/crypto/Kconfig
index ef70c8cd2a99..2f01aab53eb2 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -6,14 +6,16 @@ menu "Cryptographic options"
config CRYPTO
bool "Cryptographic API"
- default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m
+ default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m || INET6_AH=y || INET6_AH=m || \
+ INET6_ESP=y || INET6_ESP=m
help
This option provides the core Cryptographic API.
config CRYPTO_HMAC
bool "HMAC support"
depends on CRYPTO
- default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m
+ default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m || INET6_AH=y || INET6_AH=m || \
+ INET6_ESP=y || INET6_ESP=m
help
HMAC: Keyed-Hashing for Message Authentication (RFC2104).
This is required for IPSec.
@@ -33,14 +35,16 @@ config CRYPTO_MD4
config CRYPTO_MD5
tristate "MD5 digest algorithm"
depends on CRYPTO
- default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m
+ default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m || INET6_AH=y || INET6_AH=m || \
+ INET6_ESP=y || INET6_ESP=m
help
MD5 message digest algorithm (RFC1321).
config CRYPTO_SHA1
tristate "SHA1 digest algorithm"
depends on CRYPTO
- default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m
+ default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m || INET6_AH=y || INET6_AH=m || \
+ INET6_ESP=y || INET6_ESP=m
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
@@ -68,7 +72,7 @@ config CRYPTO_SHA512
config CRYPTO_DES
tristate "DES and Triple DES EDE cipher algorithms"
depends on CRYPTO
- default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m
+ default y if INET_ESP=y || INET_ESP=m || INET6_ESP=y || INET6_ESP=m
help
DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
diff --git a/include/asm-sparc/hardirq.h b/include/asm-sparc/hardirq.h
index 822ec6f1062d..04644f2f2930 100644
--- a/include/asm-sparc/hardirq.h
+++ b/include/asm-sparc/hardirq.h
@@ -9,7 +9,6 @@
#include <linux/config.h>
#include <linux/threads.h>
-#include <linux/brlock.h>
#include <linux/spinlock.h>
#include <linux/cache.h>
diff --git a/include/asm-sparc64/hardirq.h b/include/asm-sparc64/hardirq.h
index 0d52c3c6c66a..9657368f4ad2 100644
--- a/include/asm-sparc64/hardirq.h
+++ b/include/asm-sparc64/hardirq.h
@@ -8,7 +8,6 @@
#include <linux/config.h>
#include <linux/threads.h>
-#include <linux/brlock.h>
#include <linux/spinlock.h>
#include <linux/cache.h>
diff --git a/include/linux/brlock.h b/include/linux/brlock.h
deleted file mode 100644
index 59880a3f38c6..000000000000
--- a/include/linux/brlock.h
+++ /dev/null
@@ -1,222 +0,0 @@
-#ifndef __LINUX_BRLOCK_H
-#define __LINUX_BRLOCK_H
-
-/*
- * 'Big Reader' read-write spinlocks.
- *
- * super-fast read/write locks, with write-side penalty. The point
- * is to have a per-CPU read/write lock. Readers lock their CPU-local
- * readlock, writers must lock all locks to get write access. These
- * CPU-read-write locks are semantically identical to normal rwlocks.
- * Memory usage is higher as well. (NR_CPUS*L1_CACHE_BYTES bytes)
- *
- * The most important feature is that these spinlocks do not cause
- * cacheline ping-pong in the 'most readonly data' case.
- *
- * Copyright 2000, Ingo Molnar <mingo@redhat.com>
- *
- * Registry idea and naming [ crutial! :-) ] by:
- *
- * David S. Miller <davem@redhat.com>
- *
- * David has an implementation that doesn't use atomic operations in
- * the read branch via memory ordering tricks - i guess we need to
- * split this up into a per-arch thing? The atomicity issue is a
- * secondary item in profiles, at least on x86 platforms.
- *
- * The atomic op version overhead is indeed a big deal on
- * load-locked/store-conditional cpus (ALPHA/MIPS/PPC) and
- * compare-and-swap cpus (Sparc64). So we control which
- * implementation to use with a __BRLOCK_USE_ATOMICS define. -DaveM
- *
- */
-
-/* Register bigreader lock indices here. */
-enum brlock_indices {
- BR_NETPROTO_LOCK,
- __BR_END
-};
-
-#include <linux/config.h>
-
-#ifdef CONFIG_SMP
-
-#include <linux/cache.h>
-#include <linux/spinlock.h>
-
-#if defined(__i386__) || defined(__ia64__) || defined(__x86_64__)
-#define __BRLOCK_USE_ATOMICS
-#else
-#undef __BRLOCK_USE_ATOMICS
-#endif
-
-#ifdef __BRLOCK_USE_ATOMICS
-typedef rwlock_t brlock_read_lock_t;
-#else
-typedef unsigned int brlock_read_lock_t;
-#endif
-
-/*
- * align last allocated index to the next cacheline:
- */
-#define __BR_IDX_MAX \
- (((sizeof(brlock_read_lock_t)*__BR_END + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) / sizeof(brlock_read_lock_t))
-
-extern brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX];
-
-#ifndef __BRLOCK_USE_ATOMICS
-struct br_wrlock {
- spinlock_t lock;
-} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
-
-extern struct br_wrlock __br_write_locks[__BR_IDX_MAX];
-#endif
-
-extern void __br_lock_usage_bug (void);
-
-#ifdef __BRLOCK_USE_ATOMICS
-
-static inline void br_read_lock (enum brlock_indices idx)
-{
- /*
- * This causes a link-time bug message if an
- * invalid index is used:
- */
- if (idx >= __BR_END)
- __br_lock_usage_bug();
-
- preempt_disable();
- _raw_read_lock(&__brlock_array[smp_processor_id()][idx]);
-}
-
-static inline void br_read_unlock (enum brlock_indices idx)
-{
- if (idx >= __BR_END)
- __br_lock_usage_bug();
-
- read_unlock(&__brlock_array[smp_processor_id()][idx]);
-}
-
-#else /* ! __BRLOCK_USE_ATOMICS */
-static inline void br_read_lock (enum brlock_indices idx)
-{
- unsigned int *ctr;
- spinlock_t *lock;
-
- /*
- * This causes a link-time bug message if an
- * invalid index is used:
- */
- if (idx >= __BR_END)
- __br_lock_usage_bug();
-
- preempt_disable();
- ctr = &__brlock_array[smp_processor_id()][idx];
- lock = &__br_write_locks[idx].lock;
-again:
- (*ctr)++;
- mb();
- if (spin_is_locked(lock)) {
- (*ctr)--;
- wmb(); /*
- * The release of the ctr must become visible
- * to the other cpus eventually thus wmb(),
- * we don't care if spin_is_locked is reordered
- * before the releasing of the ctr.
- * However IMHO this wmb() is superflous even in theory.
- * It would not be superflous only if on the
- * other CPUs doing a ldl_l instead of an ldl
- * would make a difference and I don't think this is
- * the case.
- * I'd like to clarify this issue further
- * but for now this is a slow path so adding the
- * wmb() will keep us on the safe side.
- */
- while (spin_is_locked(lock))
- barrier();
- goto again;
- }
-}
-
-static inline void br_read_unlock (enum brlock_indices idx)
-{
- unsigned int *ctr;
-
- if (idx >= __BR_END)
- __br_lock_usage_bug();
-
- ctr = &__brlock_array[smp_processor_id()][idx];
-
- wmb();
- (*ctr)--;
- preempt_enable();
-}
-#endif /* __BRLOCK_USE_ATOMICS */
-
-/* write path not inlined - it's rare and larger */
-
-extern void FASTCALL(__br_write_lock (enum brlock_indices idx));
-extern void FASTCALL(__br_write_unlock (enum brlock_indices idx));
-
-static inline void br_write_lock (enum brlock_indices idx)
-{
- if (idx >= __BR_END)
- __br_lock_usage_bug();
- __br_write_lock(idx);
-}
-
-static inline void br_write_unlock (enum brlock_indices idx)
-{
- if (idx >= __BR_END)
- __br_lock_usage_bug();
- __br_write_unlock(idx);
-}
-
-#else
-# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); })
-# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); })
-# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); })
-# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); })
-#endif /* CONFIG_SMP */
-
-/*
- * Now enumerate all of the possible sw/hw IRQ protected
- * versions of the interfaces.
- */
-#define br_read_lock_irqsave(idx, flags) \
- do { local_irq_save(flags); br_read_lock(idx); } while (0)
-
-#define br_read_lock_irq(idx) \
- do { local_irq_disable(); br_read_lock(idx); } while (0)
-
-#define br_read_lock_bh(idx) \
- do { local_bh_disable(); br_read_lock(idx); } while (0)
-
-#define br_write_lock_irqsave(idx, flags) \
- do { local_irq_save(flags); br_write_lock(idx); } while (0)
-
-#define br_write_lock_irq(idx) \
- do { local_irq_disable(); br_write_lock(idx); } while (0)
-
-#define br_write_lock_bh(idx) \
- do { local_bh_disable(); br_write_lock(idx); } while (0)
-
-#define br_read_unlock_irqrestore(idx, flags) \
- do { br_read_unlock(irx); local_irq_restore(flags); } while (0)
-
-#define br_read_unlock_irq(idx) \
- do { br_read_unlock(idx); local_irq_enable(); } while (0)
-
-#define br_read_unlock_bh(idx) \
- do { br_read_unlock(idx); local_bh_enable(); } while (0)
-
-#define br_write_unlock_irqrestore(idx, flags) \
- do { br_write_unlock(irx); local_irq_restore(flags); } while (0)
-
-#define br_write_unlock_irq(idx) \
- do { br_write_unlock(idx); local_irq_enable(); } while (0)
-
-#define br_write_unlock_bh(idx) \
- do { br_write_unlock(idx); local_bh_enable(); } while (0)
-
-#endif /* __LINUX_BRLOCK_H */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index caab8bbc88fe..f70e91028c96 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -121,6 +121,7 @@ struct ipv6hdr {
#include <linux/icmpv6.h>
#include <net/if_inet6.h> /* struct ipv6_mc_socklist */
#include <linux/tcp.h>
+#include <linux/udp.h>
/*
This structure contains results of exthdrs parsing
@@ -178,6 +179,11 @@ struct ipv6_pinfo {
struct ipv6_txoptions *opt;
struct sk_buff *pktoptions;
+ struct {
+ struct ipv6_txoptions *opt;
+ struct rt6_info *rt;
+ struct flowi *fl;
+ } cork;
};
struct raw6_opt {
@@ -200,6 +206,7 @@ struct udp6_sock {
struct sock sk;
struct ipv6_pinfo *pinet6;
struct inet_opt inet;
+ struct udp_opt udp;
struct ipv6_pinfo inet6;
};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 080084cc3afc..9ebb267411df 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -456,7 +456,7 @@ struct packet_type
int (*func) (struct sk_buff *, struct net_device *,
struct packet_type *);
void *data; /* Private to the packet type */
- struct packet_type *next;
+ struct list_head list;
};
@@ -472,6 +472,7 @@ extern int netdev_boot_setup_check(struct net_device *dev);
extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
extern void dev_add_pack(struct packet_type *pt);
extern void dev_remove_pack(struct packet_type *pt);
+extern void __dev_remove_pack(struct packet_type *pt);
extern int dev_get(const char *name);
extern struct net_device *dev_get_by_flags(unsigned short flags,
unsigned short mask);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3be29f0d15f6..6b8ab6887236 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -792,6 +792,15 @@ static inline int skb_pagelen(const struct sk_buff *skb)
return len + skb_headlen(skb);
}
+static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ frag->page = page;
+ frag->page_offset = off;
+ frag->size = size;
+ skb_shinfo(skb)->nr_frags = i+1;
+}
+
#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) \
BUG(); } while (0)
#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) \
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b6760b3b9f56..754c7f36afda 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -316,6 +316,26 @@ extern int ip6_build_xmit(struct sock *sk,
struct ipv6_txoptions *opt,
int hlimit, int flags);
+extern int ip6_append_data(struct sock *sk,
+ int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
+ void *from,
+ int length,
+ int transhdrlen,
+ int hlimit,
+ struct ipv6_txoptions *opt,
+ struct flowi *fl,
+ struct rt6_info *rt,
+ unsigned int flags);
+
+extern int ip6_push_pending_frames(struct sock *sk);
+
+extern void ip6_flush_pending_frames(struct sock *sk);
+
+extern int ip6_dst_lookup(struct sock *sk,
+ struct dst_entry **dst,
+ struct flowi *fl,
+ struct in6_addr **saddr);
+
/*
* skb processing functions
*/
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 099781cf0725..f11387b308af 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -123,6 +123,12 @@ struct xfrm_state
/* Data for encapsulator */
struct xfrm_encap_tmpl *encap;
+ /* IPComp needs an IPIP tunnel for handling uncompressed packets */
+ struct xfrm_state *tunnel;
+
+ /* If a tunnel, number of users + 1 */
+ atomic_t tunnel_users;
+
/* State for replay detection */
struct xfrm_replay_state replay;
@@ -196,6 +202,8 @@ extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo);
extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
+extern void xfrm_state_delete_tunnel(struct xfrm_state *x);
+
struct xfrm_decap_state;
struct xfrm_type
{
@@ -699,6 +707,11 @@ xfrm_state_addr_check(struct xfrm_state *x,
return 0;
}
+static inline int xfrm_state_kern(struct xfrm_state *x)
+{
+ return atomic_read(&x->tunnel_users);
+}
+
/*
* xfrm algorithm information
*/
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index fcd14e808045..07ff11d7b6d2 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -40,7 +40,6 @@
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/highuid.h>
-#include <linux/brlock.h>
#include <linux/fs.h>
#include <linux/uio.h>
#include <linux/tty.h>
@@ -429,17 +428,6 @@ EXPORT_SYMBOL(del_timer_sync);
#endif
EXPORT_SYMBOL(mod_timer);
-#ifdef CONFIG_SMP
-
-/* Big-Reader lock implementation */
-EXPORT_SYMBOL(__brlock_array);
-#ifndef __BRLOCK_USE_ATOMICS
-EXPORT_SYMBOL(__br_write_locks);
-#endif
-EXPORT_SYMBOL(__br_write_lock);
-EXPORT_SYMBOL(__br_write_unlock);
-#endif
-
#ifdef HAVE_DISABLE_HLT
EXPORT_SYMBOL(disable_hlt);
EXPORT_SYMBOL(enable_hlt);
diff --git a/lib/Makefile b/lib/Makefile
index 6ab94d3cb906..75af740c3743 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@
L_TARGET := lib.a
-obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \
+obj-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
kobject.o idr.o
diff --git a/lib/brlock.c b/lib/brlock.c
deleted file mode 100644
index 7e9121378da1..000000000000
--- a/lib/brlock.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- *
- * linux/lib/brlock.c
- *
- * 'Big Reader' read-write spinlocks. See linux/brlock.h for details.
- *
- * Copyright 2000, Ingo Molnar <mingo@redhat.com>
- * Copyright 2000, David S. Miller <davem@redhat.com>
- */
-
-#include <linux/config.h>
-
-#ifdef CONFIG_SMP
-
-#include <linux/sched.h>
-#include <linux/brlock.h>
-
-#ifdef __BRLOCK_USE_ATOMICS
-
-brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
- { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = RW_LOCK_UNLOCKED } };
-
-void __br_write_lock (enum brlock_indices idx)
-{
- int i;
-
- preempt_disable();
- for (i = 0; i < NR_CPUS; i++)
- _raw_write_lock(&__brlock_array[i][idx]);
-}
-
-void __br_write_unlock (enum brlock_indices idx)
-{
- int i;
-
- for (i = 0; i < NR_CPUS; i++)
- _raw_write_unlock(&__brlock_array[i][idx]);
- preempt_enable();
-}
-
-#else /* ! __BRLOCK_USE_ATOMICS */
-
-brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
- { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = 0 } };
-
-struct br_wrlock __br_write_locks[__BR_IDX_MAX] =
- { [0 ... __BR_IDX_MAX-1] = { SPIN_LOCK_UNLOCKED } };
-
-void __br_write_lock (enum brlock_indices idx)
-{
- int i;
-
- preempt_disable();
-again:
- _raw_spin_lock(&__br_write_locks[idx].lock);
- for (i = 0; i < NR_CPUS; i++)
- if (__brlock_array[i][idx] != 0) {
- _raw_spin_unlock(&__br_write_locks[idx].lock);
- barrier();
- cpu_relax();
- goto again;
- }
-}
-
-void __br_write_unlock (enum brlock_indices idx)
-{
- spin_unlock(&__br_write_locks[idx].lock);
-}
-
-#endif /* __BRLOCK_USE_ATOMICS */
-
-#endif /* CONFIG_SMP */
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index fc26712d2149..853da564b321 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -20,25 +20,19 @@
#include <asm/uaccess.h>
#include "br_private.h"
-static __inline__ unsigned long __timeout(struct net_bridge *br)
+/* if topology_changing then use forward_delay (default 15 sec)
+ * otherwise keep longer (default 5 minutes)
+ */
+static __inline__ unsigned long hold_time(const struct net_bridge *br)
{
- unsigned long timeout;
-
- timeout = jiffies - br->ageing_time;
- if (br->topology_change)
- timeout = jiffies - br->forward_delay;
-
- return timeout;
+ return br->topology_change ? br->forward_delay : br->ageing_time;
}
-static __inline__ int has_expired(struct net_bridge *br,
- struct net_bridge_fdb_entry *fdb)
+static __inline__ int has_expired(const struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb)
{
- if (!fdb->is_static &&
- time_before_eq(fdb->ageing_timer, __timeout(br)))
- return 1;
-
- return 0;
+ return !fdb->is_static
+ && time_before_eq(fdb->ageing_timer + hold_time(br), jiffies);
}
static __inline__ void copy_fdb(struct __fdb_entry *ent,
@@ -52,7 +46,7 @@ static __inline__ void copy_fdb(struct __fdb_entry *ent,
: ((jiffies - f->ageing_timer) * USER_HZ) / HZ;
}
-static __inline__ int br_mac_hash(unsigned char *mac)
+static __inline__ int br_mac_hash(const unsigned char *mac)
{
unsigned long x;
@@ -68,7 +62,14 @@ static __inline__ int br_mac_hash(unsigned char *mac)
return x & (BR_HASH_SIZE - 1);
}
-void br_fdb_changeaddr(struct net_bridge_port *p, unsigned char *newaddr)
+static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f)
+{
+ hlist_del(&f->hlist);
+ list_del(&f->age_list);
+ br_fdb_put(f);
+}
+
+void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{
struct net_bridge *br;
int i;
@@ -98,25 +99,29 @@ void br_fdb_changeaddr(struct net_bridge_port *p, unsigned char *newaddr)
write_unlock_bh(&br->hash_lock);
}
-void br_fdb_cleanup(struct net_bridge *br)
+void br_fdb_cleanup(unsigned long _data)
{
- int i;
- unsigned long timeout;
-
- timeout = __timeout(br);
+ struct net_bridge *br = (struct net_bridge *)_data;
+ struct list_head *l, *n;
+ unsigned long delay;
write_lock_bh(&br->hash_lock);
- for (i=0;i<BR_HASH_SIZE;i++) {
- struct hlist_node *h, *g;
-
- hlist_for_each_safe(h, g, &br->hash[i]) {
- struct net_bridge_fdb_entry *f
- = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
- if (!f->is_static &&
- time_before_eq(f->ageing_timer, timeout)) {
- hlist_del(&f->hlist);
- br_fdb_put(f);
+ delay = hold_time(br);
+
+ list_for_each_safe(l, n, &br->age_list) {
+ struct net_bridge_fdb_entry *f
+ = list_entry(l, struct net_bridge_fdb_entry, age_list);
+ unsigned long expires = f->ageing_timer + delay;
+
+ if (time_before_eq(expires, jiffies)) {
+ if (!f->is_static) {
+ pr_debug("expire age %lu jiffies %lu\n",
+ f->ageing_timer, jiffies);
+ fdb_delete(f);
}
+ } else {
+ mod_timer(&br->gc_timer, expires);
+ break;
}
}
write_unlock_bh(&br->hash_lock);
@@ -134,8 +139,7 @@ void br_fdb_delete_by_port(struct net_bridge *br, struct net_bridge_port *p)
struct net_bridge_fdb_entry *f
= hlist_entry(h, struct net_bridge_fdb_entry, hlist);
if (f->dst == p) {
- hlist_del(&f->hlist);
- br_fdb_put(f);
+ fdb_delete(f);
}
}
}
@@ -237,55 +241,46 @@ int br_fdb_get_entries(struct net_bridge *br,
return num;
}
-static __inline__ void __fdb_possibly_replace(struct net_bridge_fdb_entry *fdb,
- struct net_bridge_port *source,
- int is_local)
-{
- if (!fdb->is_static || is_local) {
- fdb->dst = source;
- fdb->is_local = is_local;
- fdb->is_static = is_local;
- fdb->ageing_timer = jiffies;
- }
-}
-
-void br_fdb_insert(struct net_bridge *br,
- struct net_bridge_port *source,
- unsigned char *addr,
- int is_local)
+void br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
+ const unsigned char *addr, int is_local)
{
struct hlist_node *h;
struct net_bridge_fdb_entry *fdb;
- int hash;
-
- hash = br_mac_hash(addr);
+ int hash = br_mac_hash(addr);
write_lock_bh(&br->hash_lock);
hlist_for_each(h, &br->hash[hash]) {
fdb = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
if (!fdb->is_local &&
!memcmp(fdb->addr.addr, addr, ETH_ALEN)) {
- __fdb_possibly_replace(fdb, source, is_local);
- write_unlock_bh(&br->hash_lock);
- return;
+ if (likely(!fdb->is_static || is_local)) {
+ /* move to end of age list */
+ list_del(&fdb->age_list);
+ goto update;
+ }
+ goto out;
}
-
}
fdb = kmalloc(sizeof(*fdb), GFP_ATOMIC);
- if (fdb == NULL) {
- write_unlock_bh(&br->hash_lock);
- return;
- }
+ if (fdb == NULL)
+ goto out;
memcpy(fdb->addr.addr, addr, ETH_ALEN);
atomic_set(&fdb->use_count, 1);
+ hlist_add_head(&fdb->hlist, &br->hash[hash]);
+
+ if (!timer_pending(&br->gc_timer)) {
+ br->gc_timer.expires = jiffies + hold_time(br);
+ add_timer(&br->gc_timer);
+ }
+
+ update:
fdb->dst = source;
fdb->is_local = is_local;
fdb->is_static = is_local;
fdb->ageing_timer = jiffies;
-
- hlist_add_head(&fdb->hlist, &br->hash[hash]);
-
+ list_add_tail(&fdb->age_list, &br->age_list);
+ out:
write_unlock_bh(&br->hash_lock);
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 06d7d41335ac..a46ca3eecba3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -84,8 +84,6 @@ static struct net_bridge *new_nb(const char *name)
memset(br, 0, sizeof(*br));
dev = &br->dev;
- init_timer(&br->tick);
-
strncpy(dev->name, name, IFNAMSIZ);
dev->priv = br;
dev->priv_flags = IFF_EBRIDGE;
@@ -109,12 +107,10 @@ static struct net_bridge *new_nb(const char *name)
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
br->topology_change = 0;
br->topology_change_detected = 0;
- br_timer_clear(&br->hello_timer);
- br_timer_clear(&br->tcn_timer);
- br_timer_clear(&br->topology_change_timer);
-
br->ageing_time = 300 * HZ;
- br->gc_interval = 4 * HZ;
+ INIT_LIST_HEAD(&br->age_list);
+
+ br_stp_timer_init(br);
return br;
}
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 8005aad17e74..939663847ca4 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -32,9 +32,10 @@ static inline unsigned long ticks_to_user(unsigned long tick)
}
/* Report time remaining in user HZ */
-static unsigned long timer_residue(const struct br_timer *timer)
+static unsigned long timer_residue(const struct timer_list *timer)
{
- return ticks_to_user(timer->running ? (jiffies - timer->expires) : 0);
+ return ticks_to_user(timer_pending(timer)
+ ? (timer->expires - jiffies) : 0);
}
static int br_ioctl_device(struct net_bridge *br,
@@ -87,7 +88,6 @@ static int br_ioctl_device(struct net_bridge *br,
b.root_port = br->root_port;
b.stp_enabled = br->stp_enabled;
b.ageing_time = ticks_to_user(br->ageing_time);
- b.gc_interval = ticks_to_user(br->gc_interval);
b.hello_timer_value = timer_residue(&br->hello_timer);
b.tcn_timer_value = timer_residue(&br->tcn_timer);
b.topology_change_timer_value = timer_residue(&br->topology_change_timer);
@@ -146,8 +146,7 @@ static int br_ioctl_device(struct net_bridge *br,
br->ageing_time = user_to_ticks(arg0);
return 0;
- case BRCTL_SET_GC_INTERVAL:
- br->gc_interval = user_to_ticks(arg0);
+ case BRCTL_SET_GC_INTERVAL: /* no longer used */
return 0;
case BRCTL_GET_PORT_INFO:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 5ddd034fa0fe..2e9d1a483ee1 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -18,7 +18,6 @@
#include <linux/netdevice.h>
#include <linux/miscdevice.h>
#include <linux/if_bridge.h>
-#include "br_private_timer.h"
#define BR_HASH_BITS 8
#define BR_HASH_SIZE (1 << BR_HASH_BITS)
@@ -44,10 +43,11 @@ struct mac_addr
struct net_bridge_fdb_entry
{
struct hlist_node hlist;
- atomic_t use_count;
- mac_addr addr;
struct net_bridge_port *dst;
+ struct list_head age_list;
+ atomic_t use_count;
unsigned long ageing_timer;
+ mac_addr addr;
unsigned is_local:1;
unsigned is_static:1;
};
@@ -71,10 +71,9 @@ struct net_bridge_port
unsigned config_pending:1;
int priority;
- struct br_timer forward_delay_timer;
- struct br_timer hold_timer;
- struct br_timer message_age_timer;
-
+ struct timer_list forward_delay_timer;
+ struct timer_list hold_timer;
+ struct timer_list message_age_timer;
struct rcu_head rcu;
};
@@ -86,7 +85,7 @@ struct net_bridge
struct net_device_stats statistics;
rwlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
- struct timer_list tick;
+ struct list_head age_list;
/* STP */
bridge_id designated_root;
@@ -103,13 +102,12 @@ struct net_bridge
unsigned topology_change:1;
unsigned topology_change_detected:1;
- struct br_timer hello_timer;
- struct br_timer tcn_timer;
- struct br_timer topology_change_timer;
- struct br_timer gc_timer;
+ struct timer_list hello_timer;
+ struct timer_list tcn_timer;
+ struct timer_list topology_change_timer;
+ struct timer_list gc_timer;
int ageing_time;
- int gc_interval;
};
extern struct notifier_block br_device_notifier;
@@ -128,8 +126,8 @@ extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
/* br_fdb.c */
extern void br_fdb_changeaddr(struct net_bridge_port *p,
- unsigned char *newaddr);
-extern void br_fdb_cleanup(struct net_bridge *br);
+ const unsigned char *newaddr);
+extern void br_fdb_cleanup(unsigned long arg);
extern void br_fdb_delete_by_port(struct net_bridge *br,
struct net_bridge_port *p);
extern struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br,
@@ -140,9 +138,9 @@ extern int br_fdb_get_entries(struct net_bridge *br,
int maxnum,
int offset);
extern void br_fdb_insert(struct net_bridge *br,
- struct net_bridge_port *source,
- unsigned char *addr,
- int is_local);
+ struct net_bridge_port *source,
+ const unsigned char *addr,
+ int is_local);
/* br_forward.c */
extern void br_deliver(const struct net_bridge_port *to,
@@ -188,10 +186,10 @@ extern int br_netfilter_init(void);
extern void br_netfilter_fini(void);
/* br_stp.c */
+extern void br_log_state(const struct net_bridge_port *p);
extern struct net_bridge_port *br_get_port(struct net_bridge *br,
int port_no);
extern void br_init_port(struct net_bridge_port *p);
-extern port_id br_make_port_id(struct net_bridge_port *p);
extern void br_become_designated_port(struct net_bridge_port *p);
/* br_stp_if.c */
@@ -210,4 +208,8 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p,
/* br_stp_bpdu.c */
extern void br_stp_handle_bpdu(struct sk_buff *skb);
+/* br_stp_timer.c */
+extern void br_stp_timer_init(struct net_bridge *br);
+extern void br_stp_port_timer_init(struct net_bridge_port *p);
+
#endif
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index 7d409ed9c0df..e29f01ac1adf 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -47,7 +47,6 @@ extern void br_configuration_update(struct net_bridge *);
extern void br_port_state_selection(struct net_bridge *);
extern void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu);
extern void br_received_tcn_bpdu(struct net_bridge_port *p);
-extern void br_tick(unsigned long __data);
extern void br_transmit_config(struct net_bridge_port *p);
extern void br_transmit_tcn(struct net_bridge *br);
extern void br_topology_change_detection(struct net_bridge *br);
diff --git a/net/bridge/br_private_timer.h b/net/bridge/br_private_timer.h
deleted file mode 100644
index 6655ab9f5887..000000000000
--- a/net/bridge/br_private_timer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Linux ethernet bridge
- *
- * Authors:
- * Lennert Buytenhek <buytenh@gnu.org>
- *
- * $Id: br_private_timer.h,v 1.1 2000/02/18 16:47:13 davem Exp $
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _BR_PRIVATE_TIMER_H
-#define _BR_PRIVATE_TIMER_H
-
-struct br_timer
-{
- int running;
- unsigned long expires;
-};
-
-extern __inline__ void br_timer_clear(struct br_timer *t)
-{
- t->running = 0;
-}
-
-extern __inline__ unsigned long br_timer_get_residue(struct br_timer *t)
-{
- if (t->running)
- return jiffies - t->expires;
-
- return 0;
-}
-
-extern __inline__ void br_timer_set(struct br_timer *t, unsigned long x)
-{
- t->expires = x;
- t->running = 1;
-}
-
-extern __inline__ int br_timer_is_running(struct br_timer *t)
-{
- return t->running;
-}
-
-extern __inline__ int br_timer_has_expired(struct br_timer *t, unsigned long to)
-{
- return t->running && time_after_eq(jiffies, t->expires + to);
-}
-
-
-#endif
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 25b214b782d4..a2ba31bc9e7a 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -12,7 +12,6 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
#include <linux/kernel.h>
#include <linux/if_bridge.h>
#include <linux/smp_lock.h>
@@ -20,6 +19,18 @@
#include "br_private.h"
#include "br_private_stp.h"
+static const char *br_port_state_names[] = {
+ "disabled", "learning", "forwarding", "blocking",
+};
+
+void br_log_state(const struct net_bridge_port *p)
+{
+ pr_info("%s: port %d(%s) entering %s state\n",
+ p->br->dev.name, p->port_no, p->dev->name,
+ br_port_state_names[p->state]);
+
+}
+
/* called under bridge lock */
struct net_bridge_port *br_get_port(struct net_bridge *br, int port_no)
{
@@ -34,7 +45,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, int port_no)
}
/* called under bridge lock */
-static int br_should_become_root_port(struct net_bridge_port *p, int root_port)
+static int br_should_become_root_port(const struct net_bridge_port *p,
+ int root_port)
{
struct net_bridge *br;
struct net_bridge_port *rp;
@@ -116,9 +128,12 @@ void br_become_root_bridge(struct net_bridge *br)
br->hello_time = br->bridge_hello_time;
br->forward_delay = br->bridge_forward_delay;
br_topology_change_detection(br);
- br_timer_clear(&br->tcn_timer);
- br_config_bpdu_generation(br);
- br_timer_set(&br->hello_timer, jiffies);
+ del_timer(&br->tcn_timer);
+
+ if (br->dev.flags & IFF_UP) {
+ br_config_bpdu_generation(br);
+ mod_timer(&br->hello_timer, jiffies + br->hello_time);
+ }
}
/* called under bridge lock */
@@ -127,7 +142,8 @@ void br_transmit_config(struct net_bridge_port *p)
struct br_config_bpdu bpdu;
struct net_bridge *br;
- if (br_timer_is_running(&p->hold_timer)) {
+
+ if (timer_pending(&p->hold_timer)) {
p->config_pending = 1;
return;
}
@@ -142,12 +158,11 @@ void br_transmit_config(struct net_bridge_port *p)
bpdu.port_id = p->port_id;
bpdu.message_age = 0;
if (!br_is_root_bridge(br)) {
- struct net_bridge_port *root;
- unsigned long age;
+ struct net_bridge_port *root
+ = br_get_port(br, br->root_port);
+ bpdu.max_age = root->message_age_timer.expires - jiffies;
- root = br_get_port(br, br->root_port);
- age = br_timer_get_residue(&root->message_age_timer) + 1;
- bpdu.message_age = age;
+ if (bpdu.max_age <= 0) bpdu.max_age = 1;
}
bpdu.max_age = br->max_age;
bpdu.hello_time = br->hello_time;
@@ -157,22 +172,26 @@ void br_transmit_config(struct net_bridge_port *p)
p->topology_change_ack = 0;
p->config_pending = 0;
- br_timer_set(&p->hold_timer, jiffies);
+
+ mod_timer(&p->hold_timer, jiffies + BR_HOLD_TIME);
}
/* called under bridge lock */
-static void br_record_config_information(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
+static inline void br_record_config_information(struct net_bridge_port *p,
+ const struct br_config_bpdu *bpdu)
{
p->designated_root = bpdu->root;
p->designated_cost = bpdu->root_path_cost;
p->designated_bridge = bpdu->bridge_id;
p->designated_port = bpdu->port_id;
- br_timer_set(&p->message_age_timer, jiffies - bpdu->message_age);
+ mod_timer(&p->message_age_timer, jiffies
+ + (p->br->max_age - bpdu->message_age));
}
/* called under bridge lock */
-static void br_record_config_timeout_values(struct net_bridge *br, struct br_config_bpdu *bpdu)
+static inline void br_record_config_timeout_values(struct net_bridge *br,
+ const struct br_config_bpdu *bpdu)
{
br->max_age = bpdu->max_age;
br->hello_time = bpdu->hello_time;
@@ -187,7 +206,7 @@ void br_transmit_tcn(struct net_bridge *br)
}
/* called under bridge lock */
-static int br_should_become_designated_port(struct net_bridge_port *p)
+static int br_should_become_designated_port(const struct net_bridge_port *p)
{
struct net_bridge *br;
int t;
@@ -261,25 +280,28 @@ static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_b
}
/* called under bridge lock */
-static void br_topology_change_acknowledged(struct net_bridge *br)
+static inline void br_topology_change_acknowledged(struct net_bridge *br)
{
br->topology_change_detected = 0;
- br_timer_clear(&br->tcn_timer);
+ del_timer(&br->tcn_timer);
}
/* called under bridge lock */
void br_topology_change_detection(struct net_bridge *br)
{
- printk(KERN_INFO "%s: topology change detected", br->dev.name);
+ if (!(br->dev.flags & IFF_UP))
+ return;
+ pr_info("%s: topology change detected", br->dev.name);
if (br_is_root_bridge(br)) {
printk(", propagating");
br->topology_change = 1;
- br_timer_set(&br->topology_change_timer, jiffies);
+ mod_timer(&br->topology_change_timer, jiffies
+ + br->bridge_forward_delay + br->bridge_max_age);
} else if (!br->topology_change_detected) {
printk(", sending tcn bpdu");
br_transmit_tcn(br);
- br_timer_set(&br->tcn_timer, jiffies);
+ mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time);
}
printk("\n");
@@ -299,7 +321,7 @@ void br_config_bpdu_generation(struct net_bridge *br)
}
/* called under bridge lock */
-static void br_reply(struct net_bridge_port *p)
+static inline void br_reply(struct net_bridge_port *p)
{
br_transmit_config(p);
}
@@ -323,6 +345,7 @@ void br_become_designated_port(struct net_bridge_port *p)
p->designated_port = p->port_id;
}
+
/* called under bridge lock */
static void br_make_blocking(struct net_bridge_port *p)
{
@@ -332,11 +355,9 @@ static void br_make_blocking(struct net_bridge_port *p)
p->state == BR_STATE_LEARNING)
br_topology_change_detection(p->br);
- printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
- p->br->dev.name, p->port_no, p->dev->name, "blocking");
-
p->state = BR_STATE_BLOCKING;
- br_timer_clear(&p->forward_delay_timer);
+ br_log_state(p);
+ del_timer(&p->forward_delay_timer);
}
}
@@ -345,20 +366,12 @@ static void br_make_forwarding(struct net_bridge_port *p)
{
if (p->state == BR_STATE_BLOCKING) {
if (p->br->stp_enabled) {
- printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
- p->br->dev.name, p->port_no, p->dev->name,
- "listening");
-
p->state = BR_STATE_LISTENING;
} else {
- printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
- p->br->dev.name, p->port_no, p->dev->name,
- "learning");
-
p->state = BR_STATE_LEARNING;
}
- br_timer_set(&p->forward_delay_timer, jiffies);
- }
+ br_log_state(p);
+ mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); }
}
/* called under bridge lock */
@@ -373,7 +386,7 @@ void br_port_state_selection(struct net_bridge *br)
p->topology_change_ack = 0;
br_make_forwarding(p);
} else if (br_is_designated_port(p)) {
- br_timer_clear(&p->message_age_timer);
+ del_timer(&p->message_age_timer);
br_make_forwarding(p);
} else {
p->config_pending = 0;
@@ -381,11 +394,12 @@ void br_port_state_selection(struct net_bridge *br)
br_make_blocking(p);
}
}
+
}
}
/* called under bridge lock */
-static void br_topology_change_acknowledge(struct net_bridge_port *p)
+static inline void br_topology_change_acknowledge(struct net_bridge_port *p)
{
p->topology_change_ack = 1;
br_transmit_config(p);
@@ -396,20 +410,23 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b
{
struct net_bridge *br;
int was_root;
-
+
br = p->br;
was_root = br_is_root_bridge(br);
+
if (br_supersedes_port_info(p, bpdu)) {
br_record_config_information(p, bpdu);
br_configuration_update(br);
br_port_state_selection(br);
if (!br_is_root_bridge(br) && was_root) {
- br_timer_clear(&br->hello_timer);
+ del_timer(&br->hello_timer);
if (br->topology_change_detected) {
- br_timer_clear(&br->topology_change_timer);
+ del_timer(&br->topology_change_timer);
br_transmit_tcn(br);
- br_timer_set(&br->tcn_timer, jiffies);
+
+ mod_timer(&br->tcn_timer,
+ jiffies + br->bridge_hello_time);
}
}
@@ -428,7 +445,7 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b
void br_received_tcn_bpdu(struct net_bridge_port *p)
{
if (br_is_designated_port(p)) {
- printk(KERN_INFO "%s: received tcn bpdu on port %i(%s)\n",
+ pr_info("%s: received tcn bpdu on port %i(%s)\n",
p->br->dev.name, p->port_no, p->dev->name);
br_topology_change_detection(p->br);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 47a2c5c0b5f4..43de31e752af 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -20,7 +20,7 @@
#include "br_private.h"
#include "br_private_stp.h"
-__u16 br_make_port_id(struct net_bridge_port *p)
+static inline __u16 br_make_port_id(const struct net_bridge_port *p)
{
return (p->priority << 8) | p->port_no;
}
@@ -33,33 +33,25 @@ void br_init_port(struct net_bridge_port *p)
p->state = BR_STATE_BLOCKING;
p->topology_change_ack = 0;
p->config_pending = 0;
- br_timer_clear(&p->message_age_timer);
- br_timer_clear(&p->forward_delay_timer);
- br_timer_clear(&p->hold_timer);
+
+ br_stp_port_timer_init(p);
}
/* called under bridge lock */
void br_stp_enable_bridge(struct net_bridge *br)
{
struct net_bridge_port *p;
- struct timer_list *timer = &br->tick;
spin_lock_bh(&br->lock);
- init_timer(timer);
- timer->data = (unsigned long) br;
- timer->function = br_tick;
- timer->expires = jiffies + 1;
- add_timer(timer);
-
- br_timer_set(&br->hello_timer, jiffies);
+ br->hello_timer.expires = jiffies + br->hello_time;
+ add_timer(&br->hello_timer);
br_config_bpdu_generation(br);
list_for_each_entry(p, &br->port_list, list) {
if (p->dev->flags & IFF_UP)
br_stp_enable_port(p);
- }
- br_timer_set(&br->gc_timer, jiffies);
+ }
spin_unlock_bh(&br->lock);
}
@@ -68,22 +60,22 @@ void br_stp_disable_bridge(struct net_bridge *br)
{
struct net_bridge_port *p;
- spin_lock_bh(&br->lock);
- br->topology_change = 0;
- br->topology_change_detected = 0;
- br_timer_clear(&br->hello_timer);
- br_timer_clear(&br->topology_change_timer);
- br_timer_clear(&br->tcn_timer);
- br_timer_clear(&br->gc_timer);
- br_fdb_cleanup(br);
-
+ spin_lock(&br->lock);
list_for_each_entry(p, &br->port_list, list) {
if (p->state != BR_STATE_DISABLED)
br_stp_disable_port(p);
+
}
- spin_unlock_bh(&br->lock);
- del_timer_sync(&br->tick);
+ br->topology_change = 0;
+ br->topology_change_detected = 0;
+ spin_unlock(&br->lock);
+
+ del_timer_sync(&br->hello_timer);
+ del_timer_sync(&br->topology_change_timer);
+ del_timer_sync(&br->tcn_timer);
+ del_timer_sync(&br->gc_timer);
+
}
/* called under bridge lock */
@@ -108,10 +100,13 @@ void br_stp_disable_port(struct net_bridge_port *p)
p->state = BR_STATE_DISABLED;
p->topology_change_ack = 0;
p->config_pending = 0;
- br_timer_clear(&p->message_age_timer);
- br_timer_clear(&p->forward_delay_timer);
- br_timer_clear(&p->hold_timer);
+
+ del_timer(&p->message_age_timer);
+ del_timer(&p->forward_delay_timer);
+ del_timer(&p->hold_timer);
+
br_configuration_update(br);
+
br_port_state_selection(br);
if (br_is_root_bridge(br) && !wasroot)
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index be7dd0ac5e17..72af7397b047 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -20,51 +20,59 @@
#include "br_private.h"
#include "br_private_stp.h"
-static void dump_bridge_id(bridge_id *id)
-{
- printk("%.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x", id->prio[0],
- id->prio[1], id->addr[0], id->addr[1], id->addr[2], id->addr[3],
- id->addr[4], id->addr[5]);
-}
-
/* called under bridge lock */
-static int br_is_designated_for_some_port(struct net_bridge *br)
+static int br_is_designated_for_some_port(const struct net_bridge *br)
{
struct net_bridge_port *p;
list_for_each_entry(p, &br->port_list, list) {
if (p->state != BR_STATE_DISABLED &&
- !memcmp(&p->designated_bridge, &br->bridge_id, 8))
+ !memcmp(&p->designated_bridge, &br->bridge_id, 8))
return 1;
}
return 0;
}
-/* called under bridge lock */
-static void br_hello_timer_expired(struct net_bridge *br)
+static void br_hello_timer_expired(unsigned long arg)
{
- br_config_bpdu_generation(br);
- br_timer_set(&br->hello_timer, jiffies);
+ struct net_bridge *br = (struct net_bridge *)arg;
+
+ pr_debug("%s: hello timer expired\n", br->dev.name);
+ spin_lock_bh(&br->lock);
+ if (br->dev.flags & IFF_UP) {
+ br_config_bpdu_generation(br);
+
+ br->hello_timer.expires = jiffies + br->hello_time;
+ add_timer(&br->hello_timer);
+ }
+ spin_unlock_bh(&br->lock);
}
-/* called under bridge lock */
-static void br_message_age_timer_expired(struct net_bridge_port *p)
+static void br_message_age_timer_expired(unsigned long arg)
{
- struct net_bridge *br;
+ struct net_bridge_port *p = (struct net_bridge_port *) arg;
+ struct net_bridge *br = p->br;
+ const bridge_id *id = &p->designated_bridge;
int was_root;
- br = p->br;
- printk(KERN_INFO "%s: ", br->dev.name);
- printk("neighbour ");
- dump_bridge_id(&p->designated_bridge);
- printk(" lost on port %i(%s)\n", p->port_no, p->dev->name);
+ if (p->state == BR_STATE_DISABLED)
+ return;
+
+
+ pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n",
+ br->dev.name,
+ id->prio[0], id->prio[1],
+ id->addr[0], id->addr[1], id->addr[2],
+ id->addr[3], id->addr[4], id->addr[5],
+ p->port_no, p->dev->name);
/*
* According to the spec, the message age timer cannot be
* running when we are the root bridge. So.. this was_root
* check is redundant. I'm leaving it in for now, though.
*/
+ spin_lock_bh(&br->lock);
was_root = br_is_root_bridge(br);
br_become_designated_port(p);
@@ -72,107 +80,101 @@ static void br_message_age_timer_expired(struct net_bridge_port *p)
br_port_state_selection(br);
if (br_is_root_bridge(br) && !was_root)
br_become_root_bridge(br);
+ spin_unlock_bh(&br->lock);
}
-/* called under bridge lock */
-static void br_forward_delay_timer_expired(struct net_bridge_port *p)
+static void br_forward_delay_timer_expired(unsigned long arg)
{
- if (p->state == BR_STATE_LISTENING) {
- printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
- p->br->dev.name, p->port_no, p->dev->name, "learning");
+ struct net_bridge_port *p = (struct net_bridge_port *) arg;
+ struct net_bridge *br = p->br;
+ pr_debug("%s: %d(%s) forward delay timer\n",
+ br->dev.name, p->port_no, p->dev->name);
+ spin_lock_bh(&br->lock);
+ if (p->state == BR_STATE_LISTENING) {
p->state = BR_STATE_LEARNING;
- br_timer_set(&p->forward_delay_timer, jiffies);
+ p->forward_delay_timer.expires = jiffies + br->forward_delay;
+ add_timer(&p->forward_delay_timer);
} else if (p->state == BR_STATE_LEARNING) {
- printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
- p->br->dev.name, p->port_no, p->dev->name, "forwarding");
-
p->state = BR_STATE_FORWARDING;
- if (br_is_designated_for_some_port(p->br))
- br_topology_change_detection(p->br);
+ if (br_is_designated_for_some_port(br))
+ br_topology_change_detection(br);
}
+ br_log_state(p);
+ spin_unlock_bh(&br->lock);
}
-/* called under bridge lock */
-static void br_tcn_timer_expired(struct net_bridge *br)
+static void br_tcn_timer_expired(unsigned long arg)
{
- printk(KERN_INFO "%s: retransmitting tcn bpdu\n", br->dev.name);
- br_transmit_tcn(br);
- br_timer_set(&br->tcn_timer, jiffies);
+ struct net_bridge *br = (struct net_bridge *) arg;
+
+ pr_debug("%s: tcn timer expired\n", br->dev.name);
+ spin_lock_bh(&br->lock);
+ if (br->dev.flags & IFF_UP) {
+ br_transmit_tcn(br);
+
+ br->tcn_timer.expires = jiffies + br->bridge_hello_time;
+ add_timer(&br->tcn_timer);
+ }
+ spin_unlock_bh(&br->lock);
}
-/* called under bridge lock */
-static void br_topology_change_timer_expired(struct net_bridge *br)
+static void br_topology_change_timer_expired(unsigned long arg)
{
+ struct net_bridge *br = (struct net_bridge *) arg;
+
+ pr_debug("%s: topo change timer expired\n", br->dev.name);
+ spin_lock_bh(&br->lock);
br->topology_change_detected = 0;
br->topology_change = 0;
+ spin_unlock_bh(&br->lock);
}
-/* called under bridge lock */
-static void br_hold_timer_expired(struct net_bridge_port *p)
+static void br_hold_timer_expired(unsigned long arg)
{
+ struct net_bridge_port *p = (struct net_bridge_port *) arg;
+
+ pr_debug("%s: %d(%s) hold timer expired\n",
+ p->br->dev.name, p->port_no, p->dev->name);
+
+ spin_lock_bh(&p->br->lock);
if (p->config_pending)
br_transmit_config(p);
+ spin_unlock_bh(&p->br->lock);
}
-/* called under bridge lock */
-static void br_check_port_timers(struct net_bridge_port *p)
+static inline void br_timer_init(struct timer_list *timer,
+ void (*_function)(unsigned long),
+ unsigned long _data)
{
- if (br_timer_has_expired(&p->message_age_timer, p->br->max_age)) {
- br_timer_clear(&p->message_age_timer);
- br_message_age_timer_expired(p);
- }
-
- if (br_timer_has_expired(&p->forward_delay_timer, p->br->forward_delay)) {
- br_timer_clear(&p->forward_delay_timer);
- br_forward_delay_timer_expired(p);
- }
-
- if (br_timer_has_expired(&p->hold_timer, BR_HOLD_TIME)) {
- br_timer_clear(&p->hold_timer);
- br_hold_timer_expired(p);
- }
+ init_timer(timer);
+ timer->function = _function;
+ timer->data = _data;
}
-/* called under bridge lock */
-static void br_check_timers(struct net_bridge *br)
+void br_stp_timer_init(struct net_bridge *br)
{
- struct net_bridge_port *p;
-
- if (br_timer_has_expired(&br->gc_timer, br->gc_interval)) {
- br_timer_set(&br->gc_timer, jiffies);
- br_fdb_cleanup(br);
- }
+ br_timer_init(&br->hello_timer, br_hello_timer_expired,
+ (unsigned long) br);
- if (br_timer_has_expired(&br->hello_timer, br->hello_time)) {
- br_timer_clear(&br->hello_timer);
- br_hello_timer_expired(br);
- }
+ br_timer_init(&br->tcn_timer, br_tcn_timer_expired,
+ (unsigned long) br);
- if (br_timer_has_expired(&br->tcn_timer, br->bridge_hello_time)) {
- br_timer_clear(&br->tcn_timer);
- br_tcn_timer_expired(br);
- }
+ br_timer_init(&br->topology_change_timer,
+ br_topology_change_timer_expired,
+ (unsigned long) br);
- if (br_timer_has_expired(&br->topology_change_timer, br->bridge_forward_delay + br->bridge_max_age)) {
- br_timer_clear(&br->topology_change_timer);
- br_topology_change_timer_expired(br);
- }
-
- list_for_each_entry(p, &br->port_list, list) {
- if (p->state != BR_STATE_DISABLED)
- br_check_port_timers(p);
- }
+ br_timer_init(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
}
-void br_tick(unsigned long __data)
+void br_stp_port_timer_init(struct net_bridge_port *p)
{
- struct net_bridge *br = (struct net_bridge *)__data;
-
- if (spin_trylock_bh(&br->lock)) {
- br_check_timers(br);
- spin_unlock_bh(&br->lock);
- }
- br->tick.expires = jiffies + 1;
- add_timer(&br->tick);
-}
+ br_timer_init(&p->message_age_timer, br_message_age_timer_expired,
+ (unsigned long) p);
+
+ br_timer_init(&p->forward_delay_timer, br_forward_delay_timer_expired,
+ (unsigned long) p);
+
+ br_timer_init(&p->hold_timer, br_hold_timer_expired,
+ (unsigned long) p);
+}
diff --git a/net/core/dev.c b/net/core/dev.c
index 6f9ce1949224..82bc8f0c47cb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -90,7 +90,6 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
-#include <linux/brlock.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
@@ -170,8 +169,9 @@ const char *if_port_text[] = {
* 86DD IPv6
*/
-static struct packet_type *ptype_base[16]; /* 16 way hashed list */
-static struct packet_type *ptype_all; /* Taps */
+static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
+static struct list_head ptype_base[16]; /* 16 way hashed list */
+static struct list_head ptype_all; /* Taps */
#ifdef OFFLINE_SAMPLE
static void sample_queue(unsigned long dummy);
@@ -239,14 +239,17 @@ int netdev_nit;
* Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists.
+ *
+ * This call does not sleep therefore it can not
+ * guarantee all CPU's that are in middle of receiving packets
+ * will see the new packet type (until the next received packet).
*/
void dev_add_pack(struct packet_type *pt)
{
int hash;
- br_write_lock_bh(BR_NETPROTO_LOCK);
-
+ spin_lock_bh(&ptype_lock);
#ifdef CONFIG_NET_FASTROUTE
/* Hack to detect packet socket */
if (pt->data && (long)(pt->data) != 1) {
@@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt)
#endif
if (pt->type == htons(ETH_P_ALL)) {
netdev_nit++;
- pt->next = ptype_all;
- ptype_all = pt;
+ list_add_rcu(&pt->list, &ptype_all);
} else {
hash = ntohs(pt->type) & 15;
- pt->next = ptype_base[hash];
- ptype_base[hash] = pt;
+ list_add_rcu(&pt->list, &ptype_base[hash]);
}
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ spin_unlock_bh(&ptype_lock);
}
extern void linkwatch_run_queue(void);
+
+
/**
- * dev_remove_pack - remove packet handler
+ * __dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
- * returns.
+ * returns.
+ *
+ * The packet type might still be in use by receivers
+ * and must not be freed until after all the CPU's have gone
+ * through a quiescent state.
*/
-void dev_remove_pack(struct packet_type *pt)
+void __dev_remove_pack(struct packet_type *pt)
{
- struct packet_type **pt1;
+ struct list_head *head;
+ struct packet_type *pt1;
- br_write_lock_bh(BR_NETPROTO_LOCK);
+ spin_lock_bh(&ptype_lock);
if (pt->type == htons(ETH_P_ALL)) {
netdev_nit--;
- pt1 = &ptype_all;
+ head = &ptype_all;
} else
- pt1 = &ptype_base[ntohs(pt->type) & 15];
+ head = &ptype_base[ntohs(pt->type) & 15];
- for (; *pt1; pt1 = &((*pt1)->next)) {
- if (pt == *pt1) {
- *pt1 = pt->next;
+ list_for_each_entry(pt1, head, list) {
+ if (pt == pt1) {
#ifdef CONFIG_NET_FASTROUTE
if (pt->data)
netdev_fastroute_obstacles--;
#endif
+ list_del_rcu(&pt->list);
goto out;
}
}
+
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
out:
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ spin_unlock_bh(&ptype_lock);
+}
+/**
+ * dev_remove_pack - remove packet handler
+ * @pt: packet type declaration
+ *
+ * Remove a protocol handler that was previously added to the kernel
+ * protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ * from the kernel lists and can be freed or reused once this function
+ * returns.
+ *
+ * This call sleeps to guarantee that no CPU is looking at the packet
+ * type after return.
+ */
+void dev_remove_pack(struct packet_type *pt)
+{
+ __dev_remove_pack(pt);
+
+ synchronize_net();
}
/******************************************************************************
@@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
struct packet_type *ptype;
do_gettimeofday(&skb->stamp);
- br_read_lock(BR_NETPROTO_LOCK);
- for (ptype = ptype_all; ptype; ptype = ptype->next) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
@@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
ptype->func(skb2, skb->dev, ptype);
}
}
- br_read_unlock(BR_NETPROTO_LOCK);
+ rcu_read_unlock();
}
/* Calculate csum in the case, when packet is misrouted.
@@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *skb)
skb->h.raw = skb->nh.raw = skb->data;
pt_prev = NULL;
- for (ptype = ptype_all; ptype; ptype = ptype->next) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev) {
if (!pt_prev->data) {
@@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *skb)
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (skb->dev->br_port) {
- int ret;
-
ret = handle_bridge(skb, pt_prev);
if (br_handle_frame_hook(skb) == 0)
- return ret;
+ goto out;
pt_prev = NULL;
}
#endif
- for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) {
+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev) {
@@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *skb)
ret = NET_RX_DROP;
}
+ out:
+ rcu_read_unlock();
return ret;
}
@@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq_action *h)
unsigned long start_time = jiffies;
int budget = netdev_max_backlog;
- br_read_lock(BR_NETPROTO_LOCK);
+
+ preempt_disable();
local_irq_disable();
while (!list_empty(&queue->poll_list)) {
@@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq_action *h)
}
out:
local_irq_enable();
- br_read_unlock(BR_NETPROTO_LOCK);
+ preempt_enable();
return;
softnet_break:
@@ -1997,9 +2026,9 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
dev_hold(master);
}
- br_write_lock_bh(BR_NETPROTO_LOCK);
slave->master = master;
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+ synchronize_net();
if (old)
dev_put(old);
@@ -2663,8 +2692,8 @@ int netdev_finish_unregister(struct net_device *dev)
/* Synchronize with packet receive processing. */
void synchronize_net(void)
{
- br_write_lock_bh(BR_NETPROTO_LOCK);
- br_write_unlock_bh(BR_NETPROTO_LOCK);
+ might_sleep();
+ synchronize_kernel();
}
/**
@@ -2848,6 +2877,10 @@ static int __init net_dev_init(void)
subsystem_register(&net_subsys);
+ INIT_LIST_HEAD(&ptype_all);
+ for (i = 0; i < 16; i++)
+ INIT_LIST_HEAD(&ptype_base[i]);
+
#ifdef CONFIG_NET_DIVERT
dv_init();
#endif /* CONFIG_NET_DIVERT */
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 8456a4aeaf7b..7d0060ae7785 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -53,6 +53,7 @@
/***************************** INCLUDES *****************************/
#include <linux/config.h> /* Not needed ??? */
+#include <linux/module.h>
#include <linux/types.h> /* off_t */
#include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */
#include <linux/proc_fs.h>
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2ccf51403636..a32ef17e78cb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1013,7 +1013,7 @@ void inet_register_protosw(struct inet_protosw *p)
out:
spin_unlock_bh(&inetsw_lock);
- synchronize_kernel();
+ synchronize_net();
return;
@@ -1040,7 +1040,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
list_del_rcu(&p->list);
spin_unlock_bh(&inetsw_lock);
- synchronize_kernel();
+ synchronize_net();
}
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 302eac537f30..6ee23d0f62de 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -685,16 +685,6 @@ skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
return 0;
}
-static void
-skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
-{
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- frag->page = page;
- frag->page_offset = off;
- frag->size = size;
- skb_shinfo(skb)->nr_frags = i+1;
-}
-
static inline unsigned int
csum_page(struct page *page, int offset, int copy)
{
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5bad8bf0452d..91b9826e30c4 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -269,6 +269,67 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
xfrm_state_put(x);
}
+/* We always hold one tunnel user reference to indicate a tunnel */
+static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
+{
+ struct xfrm_state *t;
+
+ t = xfrm_state_alloc();
+ if (t == NULL)
+ goto out;
+
+ t->id.proto = IPPROTO_IPIP;
+ t->id.spi = x->props.saddr.a4;
+ t->id.daddr.a4 = x->id.daddr.a4;
+ memcpy(&t->sel, &x->sel, sizeof(t->sel));
+ t->props.family = AF_INET;
+ t->props.mode = 1;
+ t->props.saddr.a4 = x->props.saddr.a4;
+
+ t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family);
+ if (t->type == NULL)
+ goto error;
+
+ if (t->type->init_state(t, NULL))
+ goto error;
+
+ t->km.state = XFRM_STATE_VALID;
+ atomic_set(&t->tunnel_users, 1);
+out:
+ return t;
+
+error:
+ xfrm_state_put(t);
+ t = NULL;
+ goto out;
+}
+
+/*
+ * Must be protected by xfrm_cfg_sem. State and tunnel user references are
+ * always incremented on success.
+ */
+static int ipcomp_tunnel_attach(struct xfrm_state *x)
+{
+ int err = 0;
+ struct xfrm_state *t;
+
+ t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4,
+ x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
+ if (!t) {
+ t = ipcomp_tunnel_create(x);
+ if (!t) {
+ err = -EINVAL;
+ goto out;
+ }
+ xfrm_state_insert(t);
+ xfrm_state_hold(t);
+ }
+ x->tunnel = t;
+ atomic_inc(&t->tunnel_users);
+out:
+ return err;
+}
+
static void ipcomp_free_data(struct ipcomp_data *ipcd)
{
if (ipcd->tfm)
@@ -308,6 +369,12 @@ static int ipcomp_init_state(struct xfrm_state *x, void *args)
if (!ipcd->tfm)
goto error;
+ if (x->props.mode) {
+ err = ipcomp_tunnel_attach(x);
+ if (err)
+ goto error;
+ }
+
calg_desc = xfrm_calg_get_byname(x->calg->alg_name);
BUG_ON(!calg_desc);
ipcd->threshold = calg_desc->uinfo.comp.threshold;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 48aea8927526..37ced15b419d 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1188,12 +1188,9 @@ ip_ct_gather_frags(struct sk_buff *skb)
local_bh_enable();
if (!skb) {
- if (sk) sock_put(sk);
+ if (sk)
+ sock_put(sk);
return skb;
- } else if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
- kfree_skb(skb);
- if (sk) sock_put(sk);
- return NULL;
}
if (sk) {
diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c
index 9767f2f8fc0d..d03c9a6d1340 100644
--- a/net/ipv4/netfilter/ip_fw_compat.c
+++ b/net/ipv4/netfilter/ip_fw_compat.c
@@ -15,34 +15,10 @@ struct notifier_block;
#include <linux/netfilter_ipv4/compat_firewall.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include "ip_fw_compat.h"
static struct firewall_ops *fwops;
-/* From ip_fw_compat_redir.c */
-extern unsigned int
-do_redirect(struct sk_buff *skb,
- const struct net_device *dev,
- u_int16_t redirpt);
-
-extern void
-check_for_redirect(struct sk_buff *skb);
-
-extern void
-check_for_unredirect(struct sk_buff *skb);
-
-/* From ip_fw_compat_masq.c */
-extern unsigned int
-do_masquerade(struct sk_buff **pskb, const struct net_device *dev);
-
-extern unsigned int
-check_for_masq_error(struct sk_buff **pskb);
-
-extern unsigned int
-check_for_demasq(struct sk_buff **pskb);
-
-extern int __init masq_init(void);
-extern void masq_cleanup(void);
-
/* They call these; we do what they want. */
int register_firewall(int pf, struct firewall_ops *fw)
{
diff --git a/net/ipv4/netfilter/ip_fw_compat.h b/net/ipv4/netfilter/ip_fw_compat.h
new file mode 100644
index 000000000000..b46951afeff5
--- /dev/null
+++ b/net/ipv4/netfilter/ip_fw_compat.h
@@ -0,0 +1,28 @@
+#ifndef _LINUX_IP_FW_COMPAT_H
+#define _LINUX_IP_FW_COMPAT_H
+
+/* From ip_fw_compat_redir.c */
+extern unsigned int
+do_redirect(struct sk_buff *skb,
+ const struct net_device *dev,
+ u_int16_t redirpt);
+
+extern void
+check_for_redirect(struct sk_buff *skb);
+
+extern void
+check_for_unredirect(struct sk_buff *skb);
+
+/* From ip_fw_compat_masq.c */
+extern unsigned int
+do_masquerade(struct sk_buff **pskb, const struct net_device *dev);
+
+extern void check_for_masq_error(struct sk_buff **pskb);
+
+extern unsigned int
+check_for_demasq(struct sk_buff **pskb);
+
+extern int __init masq_init(void);
+extern void masq_cleanup(void);
+
+#endif /* _LINUX_IP_FW_COMPAT_H */
diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c
index 318e4ea04a20..a9500f66888b 100644
--- a/net/ipv4/netfilter/ip_fw_compat_masq.c
+++ b/net/ipv4/netfilter/ip_fw_compat_masq.c
@@ -25,6 +25,7 @@
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
+#include "ip_fw_compat.h"
#if 0
#define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_fw_compat_redir.c b/net/ipv4/netfilter/ip_fw_compat_redir.c
index 0540d87e1134..de4454744cbf 100644
--- a/net/ipv4/netfilter/ip_fw_compat_redir.c
+++ b/net/ipv4/netfilter/ip_fw_compat_redir.c
@@ -28,6 +28,7 @@ static DECLARE_LOCK(redir_lock);
#define ASSERT_WRITE_LOCK(x) MUST_BE_LOCKED(&redir_lock)
#include <linux/netfilter_ipv4/listhelp.h>
+#include "ip_fw_compat.h"
#if 0
#define DEBUGP printk
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index d6bfc1e061bd..fcf0c78f823e 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -163,36 +163,32 @@ static int ipip_rcv(struct sk_buff *skb)
skb->nh.iph->saddr,
IPPROTO_IPIP, AF_INET);
- if (x) {
- spin_lock(&x->lock);
+ if (!x)
+ goto drop;
- if (unlikely(x->km.state != XFRM_STATE_VALID))
- goto drop_unlock;
- }
+ spin_lock(&x->lock);
+
+ if (unlikely(x->km.state != XFRM_STATE_VALID))
+ goto drop_unlock;
err = ipip_xfrm_rcv(x, NULL, skb);
if (err)
goto drop_unlock;
- if (x) {
- x->curlft.bytes += skb->len;
- x->curlft.packets++;
-
- spin_unlock(&x->lock);
-
- xfrm_state_put(x);
- }
-
- return 0;
+ x->curlft.bytes += skb->len;
+ x->curlft.packets++;
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+out:
+ return err;
drop_unlock:
- if (x) {
- spin_unlock(&x->lock);
- xfrm_state_put(x);
- }
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+drop:
+ err = NET_RX_DROP;
kfree_skb(skb);
-out:
- return 0;
+ goto out;
}
static void ipip_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ec1a43b38b4f..ee0e31af4cc8 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -637,7 +637,7 @@ inet6_unregister_protosw(struct inet_protosw *p)
list_del_rcu(&p->list);
spin_unlock_bh(&inetsw6_lock);
- synchronize_kernel();
+ synchronize_net();
}
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8a415c312de0..c76ae27175ab 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -28,6 +28,7 @@
* YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
* Randy Dunlap and
* YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
+ * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
*/
#include <linux/module.h>
@@ -104,42 +105,6 @@ static __inline__ void icmpv6_xmit_unlock(void)
spin_unlock_bh(&icmpv6_socket->sk->lock.slock);
}
-
-
-/*
- * getfrag callback
- */
-
-static int icmpv6_getfrag(const void *data, struct in6_addr *saddr,
- char *buff, unsigned int offset, unsigned int len)
-{
- struct icmpv6_msg *msg = (struct icmpv6_msg *) data;
- struct icmp6hdr *icmph;
- __u32 csum;
-
- if (offset) {
- csum = skb_copy_and_csum_bits(msg->skb, msg->offset +
- (offset - sizeof(struct icmp6hdr)),
- buff, len, msg->csum);
- msg->csum = csum;
- return 0;
- }
-
- csum = csum_partial_copy_nocheck((void *) &msg->icmph, buff,
- sizeof(struct icmp6hdr), msg->csum);
-
- csum = skb_copy_and_csum_bits(msg->skb, msg->offset,
- buff + sizeof(struct icmp6hdr),
- len - sizeof(struct icmp6hdr), csum);
-
- icmph = (struct icmp6hdr *) buff;
-
- icmph->icmp6_cksum = csum_ipv6_magic(saddr, msg->daddr, msg->len,
- IPPROTO_ICMPV6, csum);
- return 0;
-}
-
-
/*
* Slightly more convenient version of icmpv6_send.
*/
@@ -242,22 +207,74 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
return (optval&0xC0) == 0x80;
}
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
+{
+ struct sk_buff *skb;
+ struct icmp6hdr *icmp6h;
+ int err = 0;
+
+ if ((skb = skb_peek(&sk->write_queue)) == NULL)
+ goto out;
+
+ icmp6h = (struct icmp6hdr*) skb->h.raw;
+ memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
+ icmp6h->icmp6_cksum = 0;
+
+ if (skb_queue_len(&sk->write_queue) == 1) {
+ skb->csum = csum_partial((char *)icmp6h,
+ sizeof(struct icmp6hdr), skb->csum);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(fl->fl6_src,
+ fl->fl6_dst,
+ len, fl->proto, skb->csum);
+ } else {
+ u32 tmp_csum = 0;
+
+ skb_queue_walk(&sk->write_queue, skb) {
+ tmp_csum = csum_add(tmp_csum, skb->csum);
+ }
+
+ tmp_csum = csum_partial((char *)icmp6h,
+ sizeof(struct icmp6hdr), tmp_csum);
+ tmp_csum = csum_ipv6_magic(fl->fl6_src,
+ fl->fl6_dst,
+ len, fl->proto, tmp_csum);
+ icmp6h->icmp6_cksum = tmp_csum;
+ }
+ if (icmp6h->icmp6_cksum == 0)
+ icmp6h->icmp6_cksum = -1;
+ ip6_push_pending_frames(sk);
+out:
+ return err;
+}
+
+static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+{
+ struct sk_buff *org_skb = (struct sk_buff *)from;
+ __u32 csum = 0;
+ csum = skb_copy_and_csum_bits(org_skb, offset, to, len, csum);
+ skb->csum = csum_block_add(skb->csum, csum, odd);
+ return 0;
+}
+
/*
* Send an ICMP message in response to a packet in error
*/
-
void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
struct net_device *dev)
{
struct inet6_dev *idev;
struct ipv6hdr *hdr = skb->nh.ipv6h;
struct sock *sk = icmpv6_socket->sk;
- struct in6_addr *saddr = NULL;
- int iif = 0;
- struct icmpv6_msg msg;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *saddr = NULL, *tmp_saddr = NULL;
+ struct dst_entry *dst;
+ struct icmp6hdr tmp_hdr;
struct flowi fl;
+ int iif = 0;
int addr_type = 0;
- int len;
+ int len, plen;
+ int hlimit = -1;
+ int err = 0;
if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
return;
@@ -328,36 +345,48 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
if (!icmpv6_xrlim_allow(sk, type, &fl))
goto out;
- /*
- * ok. kick it. checksum will be provided by the
- * getfrag_t callback.
- */
+ tmp_hdr.icmp6_type = type;
+ tmp_hdr.icmp6_code = code;
+ tmp_hdr.icmp6_cksum = 0;
+ tmp_hdr.icmp6_pointer = htonl(info);
- msg.icmph.icmp6_type = type;
- msg.icmph.icmp6_code = code;
- msg.icmph.icmp6_cksum = 0;
- msg.icmph.icmp6_pointer = htonl(info);
+ if (!fl.oif && ipv6_addr_is_multicast(fl.fl6_dst))
+ fl.oif = np->mcast_oif;
- msg.skb = skb;
- msg.offset = skb->nh.raw - skb->data;
- msg.csum = 0;
- msg.daddr = &hdr->saddr;
+ err = ip6_dst_lookup(sk, &dst, &fl, &tmp_saddr);
+ if (err) goto out;
- len = skb->len - msg.offset + sizeof(struct icmp6hdr);
- len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr));
+ if (hlimit < 0) {
+ if (ipv6_addr_is_multicast(fl.fl6_dst))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
+ }
+ plen = skb->nh.raw - skb->data;
+ __skb_pull(skb, plen);
+ len = skb->len;
+ len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
if (len < 0) {
if (net_ratelimit())
printk(KERN_DEBUG "icmp: len problem\n");
+ __skb_push(skb, plen);
goto out;
}
- msg.len = len;
-
idev = in6_dev_get(skb->dev);
-
- ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1,
- MSG_DONTWAIT);
+
+ err = ip6_append_data(sk, icmpv6_getfrag, skb, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr),
+ hlimit, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT);
+ if (err) {
+ ip6_flush_pending_frames(sk);
+ goto out;
+ }
+ err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
+ __skb_push(skb, plen);
+
if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
ICMP6_INC_STATS_OFFSET_BH(idev, Icmp6OutDestUnreachs, type - ICMPV6_DEST_UNREACH);
ICMP6_INC_STATS_BH(idev, Icmp6OutMsgs);
@@ -365,6 +394,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
if (likely(idev != NULL))
in6_dev_put(idev);
out:
+ if (tmp_saddr) kfree(tmp_saddr);
icmpv6_xmit_unlock();
}
@@ -372,10 +402,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
{
struct sock *sk = icmpv6_socket->sk;
struct inet6_dev *idev;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *saddr = NULL, *tmp_saddr = NULL;
struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
- struct in6_addr *saddr;
- struct icmpv6_msg msg;
+ struct icmp6hdr tmp_hdr;
struct flowi fl;
+ struct dst_entry *dst;
+ int err = 0;
+ int hlimit = -1;
saddr = &skb->nh.ipv6h->daddr;
@@ -383,39 +417,55 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
ipv6_chk_acast_addr(0, saddr))
saddr = NULL;
- msg.icmph.icmp6_type = ICMPV6_ECHO_REPLY;
- msg.icmph.icmp6_code = 0;
- msg.icmph.icmp6_cksum = 0;
- msg.icmph.icmp6_identifier = icmph->icmp6_identifier;
- msg.icmph.icmp6_sequence = icmph->icmp6_sequence;
-
- msg.skb = skb;
- msg.offset = 0;
- msg.csum = 0;
- msg.len = skb->len + sizeof(struct icmp6hdr);
- msg.daddr = &skb->nh.ipv6h->saddr;
+ memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
+ tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
fl.proto = IPPROTO_ICMPV6;
- fl.fl6_dst = msg.daddr;
+ fl.fl6_dst = &skb->nh.ipv6h->saddr;
fl.fl6_src = saddr;
fl.oif = skb->dev->ifindex;
fl.fl6_flowlabel = 0;
fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
fl.fl_icmp_code = 0;
+ icmpv6_xmit_lock();
+
+ if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr))
+ fl.oif = np->mcast_oif;
+
+ err = ip6_dst_lookup(sk, &dst, &fl, &tmp_saddr);
+
+ if (err) goto out;
+
+ if (hlimit < 0) {
+ if (ipv6_addr_is_multicast(fl.fl6_dst))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
+ }
+
idev = in6_dev_get(skb->dev);
- icmpv6_xmit_lock();
+ err = ip6_append_data(sk, icmpv6_getfrag, skb, skb->len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr), hlimit, NULL, &fl,
+ (struct rt6_info*)dst, MSG_DONTWAIT);
- ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, msg.len, NULL, -1,
- MSG_DONTWAIT);
- ICMP6_INC_STATS_BH(idev, Icmp6OutEchoReplies);
- ICMP6_INC_STATS_BH(idev, Icmp6OutMsgs);
+ if (err) {
+ ip6_flush_pending_frames(sk);
+ goto out;
+ }
+ err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
- icmpv6_xmit_unlock();
+ ICMP6_INC_STATS_BH(idev, Icmp6OutEchoReplies);
+ ICMP6_INC_STATS_BH(idev, Icmp6OutMsgs);
if (likely(idev != NULL))
in6_dev_put(idev);
+out:
+ if (tmp_saddr) kfree(tmp_saddr);
+ icmpv6_xmit_unlock();
}
static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9705f52c5655..6ce5838ea4d0 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -23,6 +23,9 @@
*
* H. von Brand : Added missing #include <linux/string.h>
* Imran Patel : frag id should be in NBO
+ * Kazunori MIYAZAWA @USAGI
+ * : add ip6_append_data and related functions
+ * for datagram xmit
*/
#include <linux/config.h>
@@ -52,6 +55,8 @@
#include <net/icmp.h>
#include <net/xfrm.h>
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
+
static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
{
static u32 ipv6_fragmentation_id = 1;
@@ -98,7 +103,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
}
-int ip6_output(struct sk_buff *skb)
+int ip6_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct net_device *dev = dst->dev;
@@ -134,6 +139,13 @@ int ip6_output(struct sk_buff *skb)
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
}
+int ip6_output(struct sk_buff *skb)
+{
+ if ((skb->len > skb->dst->dev->mtu || skb_shinfo(skb)->frag_list))
+ return ip6_fragment(skb, ip6_output2);
+ else
+ return ip6_output2(skb);
+}
#ifdef CONFIG_NETFILTER
int ip6_route_me_harder(struct sk_buff *skb)
@@ -847,3 +859,658 @@ drop:
kfree_skb(skb);
return -EINVAL;
}
+
+static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+{
+ to->pkt_type = from->pkt_type;
+ to->priority = from->priority;
+ to->protocol = from->protocol;
+ to->security = from->security;
+ to->dst = dst_clone(from->dst);
+ to->dev = from->dev;
+
+#ifdef CONFIG_NET_SCHED
+ to->tc_index = from->tc_index;
+#endif
+#ifdef CONFIG_NETFILTER
+ to->nfmark = from->nfmark;
+ /* Connection association is same as pre-frag packet */
+ to->nfct = from->nfct;
+ nf_conntrack_get(to->nfct);
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+ to->nf_bridge = from->nf_bridge;
+ nf_bridge_get(to->nf_bridge);
+#endif
+#ifdef CONFIG_NETFILTER_DEBUG
+ to->nf_debug = from->nf_debug;
+#endif
+#endif
+}
+
+static int ip6_found_nexthdr(struct sk_buff *skb, u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+ unsigned int packet_len = skb->tail - skb->nh.raw;
+ int found_rhdr = 0;
+ *nexthdr = &skb->nh.ipv6h->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+
+ case NEXTHDR_HOP:
+ case NEXTHDR_ROUTING:
+ case NEXTHDR_DEST:
+ if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
+ if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ break;
+ default :
+ return offset;
+ }
+ }
+
+ return offset;
+}
+
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+{
+ struct net_device *dev;
+ struct rt6_info *rt = (struct rt6_info*)skb->dst;
+ struct sk_buff *frag;
+ struct ipv6hdr *tmp_hdr;
+ struct frag_hdr *fh;
+ unsigned int mtu, hlen, left, len;
+ u32 frag_id = 0;
+ int ptr, offset = 0, err=0;
+ u8 *prevhdr, nexthdr = 0;
+
+ dev = rt->u.dst.dev;
+ hlen = ip6_found_nexthdr(skb, &prevhdr);
+ nexthdr = *prevhdr;
+
+ mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
+
+ if (skb_shinfo(skb)->frag_list) {
+ int first_len = 0;
+
+ if (first_len - hlen > mtu ||
+ ((first_len - hlen) & 7) ||
+ skb_cloned(skb))
+ goto slow_path;
+
+ for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+ /* Correct geometry. */
+ if (frag->len > mtu ||
+ ((frag->len & 7) && frag->next) ||
+ skb_headroom(frag) < hlen)
+ goto slow_path;
+
+ /* Correct socket ownership. */
+ if (frag->sk == NULL)
+ goto slow_path;
+
+ /* Partially cloned skb? */
+ if (skb_shared(frag))
+ goto slow_path;
+ }
+
+ err = 0;
+ offset = 0;
+ frag = skb_shinfo(skb)->frag_list;
+ skb_shinfo(skb)->frag_list = 0;
+ /* BUILD HEADER */
+
+ tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
+ if (!tmp_hdr) {
+ IP6_INC_STATS(Ip6FragFails);
+ return -ENOMEM;
+ }
+
+ *prevhdr = NEXTHDR_FRAGMENT;
+ memcpy(tmp_hdr, skb->nh.raw, hlen);
+ __skb_pull(skb, hlen);
+ fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
+ skb->nh.raw = __skb_push(skb, hlen);
+ memcpy(skb->nh.raw, tmp_hdr, hlen);
+
+ ipv6_select_ident(skb, fh);
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(0x0001);
+ frag_id = fh->identification;
+
+ first_len = skb_pagelen(skb);
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+
+
+ for (;;) {
+ /* Prepare header of the next frame,
+ * before previous one went down. */
+ if (frag) {
+ frag->h.raw = frag->data;
+ fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
+ frag->nh.raw = __skb_push(frag, hlen);
+ memcpy(frag->nh.raw, tmp_hdr, hlen);
+ offset += skb->len - hlen - sizeof(struct frag_hdr);
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ if (frag->next != NULL)
+ offset |= 0x0001;
+ fh->frag_off = htons(offset);
+ fh->identification = frag_id;
+ frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ip6_copy_metadata(frag, skb);
+ }
+ err = output(skb);
+
+ if (err || !frag)
+ break;
+
+ skb = frag;
+ frag = skb->next;
+ skb->next = NULL;
+ }
+
+ if (tmp_hdr)
+ kfree(tmp_hdr);
+
+ if (err == 0) {
+ IP6_INC_STATS(Ip6FragOKs);
+ return 0;
+ }
+
+ while (frag) {
+ skb = frag->next;
+ kfree_skb(frag);
+ frag = skb;
+ }
+
+ IP6_INC_STATS(Ip6FragFails);
+ return err;
+ }
+
+slow_path:
+ left = skb->len - hlen; /* Space per frame */
+ ptr = hlen; /* Where to start from */
+
+ /*
+ * Fragment the datagram.
+ */
+
+ *prevhdr = NEXTHDR_FRAGMENT;
+
+ /*
+ * Keep copying data until we run out.
+ */
+ while(left > 0) {
+ len = left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > mtu)
+ len = mtu;
+ /* IF: we are not sending upto and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < left) {
+ len &= ~7;
+ }
+ /*
+ * Allocate buffer.
+ */
+
+ if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+ NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ /*
+ * Set up data on packet
+ */
+
+ ip6_copy_metadata(frag, skb);
+ skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+ skb_put(frag, len + hlen + sizeof(struct frag_hdr));
+ frag->nh.raw = frag->data;
+ fh = (struct frag_hdr*)(frag->data + hlen);
+ frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+ if (skb->sk)
+ skb_set_owner_w(frag, skb->sk);
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+ memcpy(frag->nh.raw, skb->data, hlen);
+
+ /*
+ * Build fragment header.
+ */
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ if (frag_id) {
+ ipv6_select_ident(skb, fh);
+ frag_id = fh->identification;
+ } else
+ fh->identification = frag_id;
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+ BUG();
+ left -= len;
+
+ fh->frag_off = htons( left > 0 ? (offset | 0x0001) : offset);
+ frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+ ptr += len;
+ offset += len;
+
+ /*
+ * Put this fragment into the sending queue.
+ */
+
+ IP6_INC_STATS(Ip6FragCreates);
+
+ err = output(frag);
+ if (err)
+ goto fail;
+ }
+ kfree_skb(skb);
+ IP6_INC_STATS(Ip6FragOKs);
+ return err;
+
+fail:
+ kfree_skb(skb);
+ IP6_INC_STATS(Ip6FragFails);
+ return err;
+}
+
+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl, struct in6_addr **saddr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ int err = 0;
+
+ *dst = __sk_dst_check(sk, np->dst_cookie);
+ if (*dst) {
+ struct rt6_info *rt = (struct rt6_info*)*dst;
+
+ /* Yes, checking route validity in not connected
+ case is not very simple. Take into account,
+ that we do not support routing by source, TOS,
+ and MSG_DONTROUTE --ANK (980726)
+
+ 1. If route was host route, check that
+ cached destination is current.
+ If it is network route, we still may
+ check its validity using saved pointer
+ to the last used address: daddr_cache.
+ We do not want to save whole address now,
+ (because main consumer of this service
+ is tcp, which has not this problem),
+ so that the last trick works only on connected
+ sockets.
+ 2. oif also should be the same.
+ */
+
+ if (((rt->rt6i_dst.plen != 128 ||
+ ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
+ && (np->daddr_cache == NULL ||
+ ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
+ || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
+ *dst = NULL;
+ } else
+ dst_hold(*dst);
+ }
+
+ if (*dst == NULL)
+ *dst = ip6_route_output(sk, fl);
+
+ if ((*dst)->error) {
+ IP6_INC_STATS(Ip6OutNoRoutes);
+ dst_release(*dst);
+ return -ENETUNREACH;
+ }
+
+ if (fl->fl6_src == NULL) {
+ *saddr = kmalloc(sizeof(struct in6_addr), GFP_ATOMIC);
+ err = ipv6_get_saddr(*dst, fl->fl6_dst, *saddr);
+
+ if (err) {
+#if IP6_DEBUG >= 2
+ printk(KERN_DEBUG "ip6_build_xmit: "
+ "no availiable source address\n");
+#endif
+ return err;
+ }
+ fl->fl6_src = *saddr;
+ }
+
+ if (*dst) {
+ if ((err = xfrm_lookup(dst, fl, sk, 0)) < 0) {
+ dst_release(*dst);
+ return -ENETUNREACH;
+ }
+ }
+
+ return 0;
+}
+
+int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
+ void *from, int length, int transhdrlen,
+ int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
+ unsigned int flags)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sk_buff *skb;
+ unsigned int maxfraglen, fragheaderlen;
+ int exthdrlen;
+ int hh_len;
+ int mtu;
+ int copy = 0;
+ int err;
+ int offset = 0;
+ int csummode = CHECKSUM_NONE;
+
+ if (flags&MSG_PROBE)
+ return 0;
+ if (skb_queue_empty(&sk->write_queue)) {
+ /*
+ * setup for corking
+ */
+ if (opt) {
+ if (np->cork.opt == NULL)
+ np->cork.opt = kmalloc(opt->tot_len, sk->allocation);
+ memcpy(np->cork.opt, opt, opt->tot_len);
+ inet->cork.flags |= IPCORK_OPT;
+ /* need source address above miyazawa*/
+ exthdrlen += opt->opt_flen ? opt->opt_flen : 0;
+ }
+ dst_hold(&rt->u.dst);
+ np->cork.rt = rt;
+ np->cork.fl = fl;
+ inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
+ inet->cork.length = 0;
+ inet->sndmsg_page = NULL;
+ inet->sndmsg_off = 0;
+ if ((exthdrlen = rt->u.dst.header_len) != 0) {
+ length += exthdrlen;
+ transhdrlen += exthdrlen;
+ }
+ } else {
+ rt = np->cork.rt;
+ if (inet->cork.flags & IPCORK_OPT)
+ opt = np->cork.opt;
+ transhdrlen = 0;
+ exthdrlen = 0;
+ mtu = inet->cork.fragsize;
+ }
+
+ hh_len = (rt->u.dst.dev->hard_header_len&~15) + 16;
+
+ fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
+
+ if (mtu < 65576) {
+ if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
+ ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
+ }
+
+ inet->cork.length += length;
+
+ if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
+ goto alloc_new_skb;
+
+ while (length > 0) {
+ if ((copy = maxfraglen - skb->len) <= 0) {
+ char *data;
+ unsigned int datalen;
+ unsigned int fraglen;
+ unsigned int alloclen;
+ BUG_TRAP(copy == 0);
+alloc_new_skb:
+ datalen = maxfraglen - fragheaderlen;
+ if (datalen > length)
+ datalen = length;
+ fraglen = datalen + fragheaderlen;
+ if ((flags & MSG_MORE) &&
+ !(rt->u.dst.dev->features&NETIF_F_SG))
+ alloclen = maxfraglen;
+ else
+ alloclen = fraglen;
+ alloclen += sizeof(struct frag_hdr);
+ if (transhdrlen) {
+ skb = sock_alloc_send_skb(sk,
+ alloclen + hh_len + 15,
+ (flags & MSG_DONTWAIT), &err);
+ } else {
+ skb = NULL;
+ if (atomic_read(&sk->wmem_alloc) <= 2*sk->sndbuf)
+ skb = sock_wmalloc(sk,
+ alloclen + hh_len + 15, 1,
+ sk->allocation);
+ if (unlikely(skb == NULL))
+ err = -ENOBUFS;
+ }
+ if (skb == NULL)
+ goto error;
+ /*
+ * Fill in the control structures
+ */
+ skb->ip_summed = csummode;
+ skb->csum = 0;
+ /* reserve 8 byte for fragmentation */
+ skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
+
+ /*
+ * Find where to start putting bytes
+ */
+ data = skb_put(skb, fraglen);
+ skb->nh.raw = data + exthdrlen;
+ data += fragheaderlen;
+ skb->h.raw = data + exthdrlen;
+ copy = datalen - transhdrlen;
+ if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ offset += copy;
+ length -= datalen;
+ transhdrlen = 0;
+ exthdrlen = 0;
+ csummode = CHECKSUM_NONE;
+
+ /*
+ * Put the packet on the pending queue
+ */
+ __skb_queue_tail(&sk->write_queue, skb);
+ continue;
+ }
+
+ if (copy > length)
+ copy = length;
+
+ if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+ unsigned int off;
+
+ off = skb->len;
+ if (getfrag(from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
+ __skb_trim(skb, off);
+ err = -EFAULT;
+ goto error;
+ }
+ } else {
+ int i = skb_shinfo(skb)->nr_frags;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+ struct page *page = inet->sndmsg_page;
+ int off = inet->sndmsg_off;
+ unsigned int left;
+
+ if (page && (left = PAGE_SIZE - off) > 0) {
+ if (copy >= left)
+ copy = left;
+ if (page != frag->page) {
+ if (i == MAX_SKB_FRAGS) {
+ err = -EMSGSIZE;
+ goto error;
+ }
+ get_page(page);
+ skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
+ frag = &skb_shinfo(skb)->frags[i];
+ }
+ } else if(i < MAX_SKB_FRAGS) {
+ if (copy > PAGE_SIZE)
+ copy = PAGE_SIZE;
+ page = alloc_pages(sk->allocation, 0);
+ if (page == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+ inet->sndmsg_page = page;
+ inet->sndmsg_off = 0;
+
+ skb_fill_page_desc(skb, i, page, 0, 0);
+ frag = &skb_shinfo(skb)->frags[i];
+ skb->truesize += PAGE_SIZE;
+ atomic_add(PAGE_SIZE, &sk->wmem_alloc);
+ } else {
+ err = -EMSGSIZE;
+ goto error;
+ }
+ if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+ err = -EFAULT;
+ goto error;
+ }
+ inet->sndmsg_off += copy;
+ frag->size += copy;
+ skb->len += copy;
+ skb->data_len += copy;
+ }
+ offset += copy;
+ length -= copy;
+ }
+ return 0;
+error:
+ inet->cork.length -= length;
+ IP6_INC_STATS(Ip6OutDiscards);
+ return err;
+}
+
+int ip6_push_pending_frames(struct sock *sk)
+{
+ struct sk_buff *skb, *tmp_skb;
+ struct sk_buff **tail_skb;
+ struct in6_addr *final_dst = NULL;
+ struct inet_opt *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *hdr;
+ struct ipv6_txoptions *opt = np->cork.opt;
+ struct rt6_info *rt = np->cork.rt;
+ struct flowi *fl = np->cork.fl;
+ unsigned char proto = fl->proto;
+ int err = 0;
+
+ if ((skb = __skb_dequeue(&sk->write_queue)) == NULL)
+ goto out;
+ tail_skb = &(skb_shinfo(skb)->frag_list);
+
+ /* move skb->data to ip header from ext header */
+ if (skb->data < skb->nh.raw)
+ __skb_pull(skb, skb->nh.raw - skb->data);
+ while ((tmp_skb = __skb_dequeue(&sk->write_queue)) != NULL) {
+ __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+ *tail_skb = tmp_skb;
+ tail_skb = &(tmp_skb->next);
+ skb->len += tmp_skb->len;
+ skb->data_len += tmp_skb->len;
+#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
+ skb->truesize += tmp_skb->truesize;
+ __sock_put(tmp_skb->sk);
+ tmp_skb->destructor = NULL;
+ tmp_skb->sk = NULL;
+#endif
+ }
+
+ final_dst = fl->fl6_dst;
+ __skb_pull(skb, skb->h.raw - skb->nh.raw);
+ if (opt && opt->opt_flen)
+ ipv6_push_frag_opts(skb, opt, &proto);
+ if (opt && opt->opt_nflen)
+ ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
+
+ skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
+
+ *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
+
+ if (skb->len < 65536)
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ else
+ hdr->payload_len = 0;
+ hdr->hop_limit = np->hop_limit;
+ hdr->nexthdr = proto;
+ memcpy(&hdr->saddr, fl->fl6_src, sizeof(struct in6_addr));
+ memcpy(&hdr->daddr, final_dst, sizeof(struct in6_addr));
+
+ skb->dst = dst_clone(&rt->u.dst);
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
+ if (err) {
+ if (err > 0)
+ err = inet->recverr ? net_xmit_errno(err) : 0;
+ if (err)
+ goto error;
+ }
+
+out:
+ inet->cork.flags &= ~IPCORK_OPT;
+ if (np->cork.opt) {
+ kfree(np->cork.opt);
+ np->cork.opt = NULL;
+ }
+ if (np->cork.rt) {
+ np->cork.rt = NULL;
+ }
+ if (np->cork.fl) {
+ np->cork.fl = NULL;
+ }
+ return err;
+error:
+ goto out;
+}
+
+void ip6_flush_pending_frames(struct sock *sk)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue_tail(&sk->write_queue)) != NULL)
+ kfree_skb(skb);
+
+ inet->cork.flags &= ~IPCORK_OPT;
+
+ if (np->cork.opt) {
+ kfree(np->cork.opt);
+ np->cork.opt = NULL;
+ }
+ if (np->cork.rt) {
+ np->cork.rt = NULL;
+ }
+ if (np->cork.fl) {
+ np->cork.fl = NULL;
+ }
+}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dbec69833323..2da2d8293d40 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -12,6 +12,7 @@
* Fixes:
* Hideaki YOSHIFUJI : sin6_scope_id support
* YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance)
+ * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -29,6 +30,8 @@
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -438,87 +441,115 @@ csum_copy_err:
goto out_free;
}
-/*
- * Sending...
- */
+static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct raw6_opt *opt, int len)
+{
+ struct sk_buff *skb;
+ int err = 0;
+ u16 *csum;
-struct rawv6_fakehdr {
- struct iovec *iov;
- struct sock *sk;
- __u32 len;
- __u32 cksum;
- __u32 proto;
- struct in6_addr *daddr;
-};
+ if ((skb = skb_peek(&sk->write_queue)) == NULL)
+ goto out;
-static int rawv6_getfrag(const void *data, struct in6_addr *saddr,
- char *buff, unsigned int offset, unsigned int len)
-{
- struct iovec *iov = (struct iovec *) data;
+ if (opt->offset + 1 < len)
+ csum = (u16 *)(skb->h.raw + opt->offset);
+ else {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (skb_queue_len(&sk->write_queue) == 1) {
+ /*
+ * Only one fragment on the socket.
+ */
+ /* should be check HW csum miyazawa */
+ *csum = csum_ipv6_magic(fl->fl6_src,
+ fl->fl6_dst,
+ len, fl->proto, skb->csum);
+ } else {
+ u32 tmp_csum = 0;
+
+ skb_queue_walk(&sk->write_queue, skb) {
+ tmp_csum = csum_add(tmp_csum, skb->csum);
+ }
- return memcpy_fromiovecend(buff, iov, offset, len);
+ tmp_csum = csum_ipv6_magic(fl->fl6_src,
+ fl->fl6_dst,
+ len, fl->proto, tmp_csum);
+ *csum = tmp_csum;
+ }
+ if (*csum == 0)
+ *csum = -1;
+ ip6_push_pending_frames(sk);
+out:
+ return err;
}
-static int rawv6_frag_cksum(const void *data, struct in6_addr *addr,
- char *buff, unsigned int offset,
- unsigned int len)
+static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
+ struct flowi *fl, struct rt6_info *rt,
+ unsigned int flags)
{
- struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data;
-
- if (csum_partial_copy_fromiovecend(buff, hdr->iov, offset,
- len, &hdr->cksum))
- return -EFAULT;
-
- if (offset == 0) {
- struct sock *sk;
- struct raw6_opt *opt;
- struct in6_addr *daddr;
-
- sk = hdr->sk;
- opt = raw6_sk(sk);
+ struct inet_opt *inet = inet_sk(sk);
+ struct ipv6hdr *iph;
+ struct sk_buff *skb;
+ unsigned int hh_len;
+ int err;
- if (hdr->daddr)
- daddr = hdr->daddr;
- else
- daddr = addr + 1;
-
- hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len,
- hdr->proto, hdr->cksum);
-
- if (opt->offset + 1 < len) {
- __u16 *csum;
-
- csum = (__u16 *) (buff + opt->offset);
- if (*csum) {
- /* in case cksum was not initialized */
- __u32 sum = hdr->cksum;
- sum += *csum;
- *csum = hdr->cksum = (sum + (sum>>16));
- } else {
- *csum = hdr->cksum;
- }
- } else {
- if (net_ratelimit())
- printk(KERN_DEBUG "icmp: cksum offset too big\n");
- return -EINVAL;
- }
- }
- return 0;
-}
+ if (length > rt->u.dst.dev->mtu) {
+ ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
+ return -EMSGSIZE;
+ }
+ if (flags&MSG_PROBE)
+ goto out;
+
+ hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+ skb = sock_alloc_send_skb(sk, length+hh_len+15,
+ flags&MSG_DONTWAIT, &err);
+ if (skb == NULL)
+ goto error;
+ skb_reserve(skb, hh_len);
+
+ skb->priority = sk->priority;
+ skb->dst = dst_clone(&rt->u.dst);
+
+ skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb->h.raw = skb->nh.raw;
+ err = memcpy_fromiovecend((void *)iph, from, 0, length);
+ if (err)
+ goto error_fault;
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+ dst_output);
+ if (err > 0)
+ err = inet->recverr ? net_xmit_errno(err) : 0;
+ if (err)
+ goto error;
+out:
+ return 0;
+error_fault:
+ err = -EFAULT;
+ kfree_skb(skb);
+error:
+ IP6_INC_STATS(Ip6OutDiscards);
+ return err;
+}
static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int len)
{
struct ipv6_txoptions opt_space;
struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ struct in6_addr *daddr, *saddr = NULL;
struct inet_opt *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct raw6_opt *raw_opt = raw6_sk(sk);
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
+ struct dst_entry *dst = NULL;
struct flowi fl;
int addr_len = msg->msg_namelen;
- struct in6_addr *daddr;
- struct raw6_opt *raw_opt;
int hlimit = -1;
u16 proto;
int err;
@@ -552,6 +583,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (!proto)
proto = inet->num;
+ else if (proto != inet->num)
+ return(-EINVAL);
if (proto > 255)
return(-EINVAL);
@@ -590,6 +623,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
* unspecfied destination address
* treated as error... is this correct ?
*/
+ fl6_sock_release(flowlabel);
return(-EINVAL);
}
@@ -619,39 +653,71 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
- raw_opt = raw6_sk(sk);
-
fl.proto = proto;
fl.fl6_dst = daddr;
if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
fl.fl6_src = &np->saddr;
fl.fl_icmp_type = 0;
fl.fl_icmp_code = 0;
-
- if (raw_opt->checksum) {
- struct rawv6_fakehdr hdr;
-
- hdr.iov = msg->msg_iov;
- hdr.sk = sk;
- hdr.len = len;
- hdr.cksum = 0;
- hdr.proto = proto;
-
- if (opt && opt->srcrt)
- hdr.daddr = daddr;
+
+ /* merge ip6_build_xmit from ip6_output */
+ if (opt && opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+ fl.fl6_dst = rt0->addr;
+ }
+
+ if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr))
+ fl.oif = np->mcast_oif;
+
+ err = ip6_dst_lookup(sk, &dst, &fl, &saddr);
+ if (err) goto out;
+
+ if (hlimit < 0) {
+ if (ipv6_addr_is_multicast(fl.fl6_dst))
+ hlimit = np->mcast_hops;
else
- hdr.daddr = NULL;
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
+ }
- err = ip6_build_xmit(sk, rawv6_frag_cksum, &hdr, &fl, len,
- opt, hlimit, msg->msg_flags);
+ if (msg->msg_flags&MSG_CONFIRM)
+ goto do_confirm;
+
+back_from_confirm:
+ if (inet->hdrincl) {
+ err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
} else {
- err = ip6_build_xmit(sk, rawv6_getfrag, msg->msg_iov, &fl, len,
- opt, hlimit, msg->msg_flags);
+ lock_sock(sk);
+ err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
+ hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags);
+
+ if (err)
+ ip6_flush_pending_frames(sk);
+ else if (!(msg->msg_flags & MSG_MORE)) {
+ if (raw_opt->checksum) {
+ err = rawv6_push_pending_frames(sk, &fl, raw_opt, len);
+ } else {
+ err = ip6_push_pending_frames(sk);
+ }
+ }
}
+done:
+ ip6_dst_store(sk, dst, fl.nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
+ if (err > 0)
+ err = np->recverr ? net_xmit_errno(err) : 0;
+ release_sock(sk);
+out:
fl6_sock_release(flowlabel);
-
+ if (saddr) kfree(saddr);
return err<0?err:len;
+do_confirm:
+ dst_confirm(dst);
+ if (!(msg->msg_flags & MSG_PROBE) || len)
+ goto back_from_confirm;
+ err = 0;
+ goto done;
}
static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f935dd95041..1efddca3af68 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -14,6 +14,7 @@
* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
* a single port at the same time.
+ * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -738,96 +739,117 @@ discard:
kfree_skb(skb);
return(0);
}
-
/*
- * Sending
+ * Throw away all pending data and cancel the corking. Socket is locked.
*/
-
-struct udpv6fakehdr
+static void udp_v6_flush_pending_frames(struct sock *sk)
{
- struct udphdr uh;
- struct iovec *iov;
- __u32 wcheck;
- __u32 pl_len;
- struct in6_addr *daddr;
-};
+ struct udp_opt *up = udp_sk(sk);
+
+ if (up->pending) {
+ up->pending = 0;
+ ip6_flush_pending_frames(sk);
+ }
+}
/*
- * with checksum
+ * Sending
*/
-static int udpv6_getfrag(const void *data, struct in6_addr *addr,
- char *buff, unsigned int offset, unsigned int len)
+static int udp_v6_push_pending_frames(struct sock *sk, struct udp_opt *up)
{
- struct udpv6fakehdr *udh = (struct udpv6fakehdr *) data;
- char *dst;
- int final = 0;
- int clen = len;
+ struct sk_buff *skb;
+ struct udphdr *uh;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi *fl = np->cork.fl;
+ int err = 0;
- dst = buff;
+ /* Grab the skbuff where UDP header space exists. */
+ if ((skb = skb_peek(&sk->write_queue)) == NULL)
+ goto out;
- if (offset) {
- offset -= sizeof(struct udphdr);
- } else {
- dst += sizeof(struct udphdr);
- final = 1;
- clen -= sizeof(struct udphdr);
+ /*
+ * Create a UDP header
+ */
+ uh = skb->h.uh;
+ uh->source = fl->fl_ip_sport;
+ uh->dest = fl->fl_ip_dport;
+ uh->len = htons(up->len);
+ uh->check = 0;
+
+ if (sk->no_check == UDP_CSUM_NOXMIT) {
+ skb->ip_summed = CHECKSUM_NONE;
+ goto send;
}
- if (csum_partial_copy_fromiovecend(dst, udh->iov, offset,
- clen, &udh->wcheck))
- return -EFAULT;
-
- if (final) {
- struct in6_addr *daddr;
-
- udh->wcheck = csum_partial((char *)udh, sizeof(struct udphdr),
- udh->wcheck);
+ if (skb_queue_len(&sk->write_queue) == 1) {
+ skb->csum = csum_partial((char *)uh,
+ sizeof(struct udphdr), skb->csum);
+ uh->check = csum_ipv6_magic(fl->fl6_src,
+ fl->fl6_dst,
+ up->len, fl->proto, skb->csum);
+ } else {
+ u32 tmp_csum = 0;
- if (udh->daddr) {
- daddr = udh->daddr;
- } else {
- /*
- * use packet destination address
- * this should improve cache locality
- */
- daddr = addr + 1;
+ skb_queue_walk(&sk->write_queue, skb) {
+ tmp_csum = csum_add(tmp_csum, skb->csum);
}
- udh->uh.check = csum_ipv6_magic(addr, daddr,
- udh->pl_len, IPPROTO_UDP,
- udh->wcheck);
- if (udh->uh.check == 0)
- udh->uh.check = -1;
+ tmp_csum = csum_partial((char *)uh,
+ sizeof(struct udphdr), tmp_csum);
+ tmp_csum = csum_ipv6_magic(fl->fl6_src,
+ fl->fl6_dst,
+ up->len, fl->proto, tmp_csum);
+ uh->check = tmp_csum;
- memcpy(buff, udh, sizeof(struct udphdr));
}
- return 0;
+ if (uh->check == 0)
+ uh->check = -1;
+
+send:
+ err = ip6_push_pending_frames(sk);
+out:
+ up->len = 0;
+ up->pending = 0;
+ return err;
}
-static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int ulen)
+static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int len)
{
struct ipv6_txoptions opt_space;
- struct udpv6fakehdr udh;
+ struct udp_opt *up = udp_sk(sk);
struct inet_opt *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ struct in6_addr *daddr, *saddr = NULL;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi fl;
+ struct dst_entry *dst;
int addr_len = msg->msg_namelen;
- struct in6_addr *daddr;
- int len = ulen + sizeof(struct udphdr);
+ int ulen = len;
int addr_type;
int hlimit = -1;
-
+ int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int err;
/* Rough check on arithmetic overflow,
better check is made in ip6_build_xmit
*/
- if (ulen < 0 || ulen > INT_MAX - sizeof(struct udphdr))
+ if (len < 0 || len > INT_MAX - sizeof(struct udphdr))
return -EMSGSIZE;
+ if (up->pending) {
+ /*
+ * There are pending frames.
+ * The socket lock must be held while it's corked.
+ */
+ lock_sock(sk);
+ if (likely(up->pending))
+ goto do_append_data;
+ release_sock(sk);
+ }
+ ulen += sizeof(struct udphdr);
+
fl.fl6_flowlabel = 0;
fl.oif = 0;
@@ -835,7 +857,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (sin6->sin6_family == AF_INET) {
if (__ipv6_only_sock(sk))
return -ENETUNREACH;
- return udp_sendmsg(iocb, sk, msg, ulen);
+ return udp_sendmsg(iocb, sk, msg, len);
}
if (addr_len < SIN6_LEN_RFC2133)
@@ -847,7 +869,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (sin6->sin6_port == 0)
return -EINVAL;
- udh.uh.dest = sin6->sin6_port;
+ up->dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
if (np->sndflow) {
@@ -873,7 +895,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (sk->state != TCP_ESTABLISHED)
return -ENOTCONN;
- udh.uh.dest = inet->dport;
+ up->dport = inet->dport;
daddr = &np->daddr;
fl.fl6_flowlabel = np->flow_label;
}
@@ -888,15 +910,14 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = daddr->s6_addr32[3];
- sin.sin_port = udh.uh.dest;
+ sin.sin_port = up->dport;
msg->msg_name = (struct sockaddr *)(&sin);
msg->msg_namelen = sizeof(sin);
fl6_sock_release(flowlabel);
- return udp_sendmsg(iocb, sk, msg, ulen);
+ return udp_sendmsg(iocb, sk, msg, len);
}
- udh.daddr = NULL;
if (!fl.oif)
fl.oif = sk->bound_dev_if;
fl.fl6_src = NULL;
@@ -922,33 +943,172 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
opt = np->opt;
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
- if (opt && opt->srcrt)
- udh.daddr = daddr;
-
- udh.uh.source = inet->sport;
- udh.uh.len = len < 0x10000 ? htons(len) : 0;
- udh.uh.check = 0;
- udh.iov = msg->msg_iov;
- udh.wcheck = 0;
- udh.pl_len = len;
fl.proto = IPPROTO_UDP;
fl.fl6_dst = daddr;
if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
fl.fl6_src = &np->saddr;
- fl.fl_ip_dport = udh.uh.dest;
- fl.fl_ip_sport = udh.uh.source;
+ fl.fl_ip_dport = up->dport;
+ fl.fl_ip_sport = inet->sport;
+
+ /* merge ip6_build_xmit from ip6_output */
+ if (opt && opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+ fl.fl6_dst = rt0->addr;
+ }
+
+ if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr))
+ fl.oif = np->mcast_oif;
+
+ err = ip6_dst_lookup(sk, &dst, &fl, &saddr);
+ if (err) goto out;
+
+ if (hlimit < 0) {
+ if (ipv6_addr_is_multicast(fl.fl6_dst))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
+ }
+
+ if (msg->msg_flags&MSG_CONFIRM)
+ goto do_confirm;
+back_from_confirm:
+
+ lock_sock(sk);
+ if (unlikely(up->pending)) {
+ /* The socket is already corked while preparing it. */
+ /* ... which is an evident application bug. --ANK */
+ release_sock(sk);
- err = ip6_build_xmit(sk, udpv6_getfrag, &udh, &fl, len, opt, hlimit,
- msg->msg_flags);
+ NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
+ err = -EINVAL;
+ goto out;
+ }
+ up->pending = 1;
+
+do_append_data:
+ up->len += ulen;
+ err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr),
+ hlimit, opt, &fl, (struct rt6_info*)dst,
+ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+ if (err)
+ udp_v6_flush_pending_frames(sk);
+ else if (!corkreq)
+ err = udp_v6_push_pending_frames(sk, up);
+
+ ip6_dst_store(sk, dst, fl.nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
+ if (err > 0)
+ err = np->recverr ? net_xmit_errno(err) : 0;
+ release_sock(sk);
+out:
fl6_sock_release(flowlabel);
+ if (saddr) kfree(saddr);
+ if (!err) {
+ UDP6_INC_STATS_USER(UdpOutDatagrams);
+ return len;
+ }
+ return err;
- if (err < 0)
- return err;
+do_confirm:
+ dst_confirm(dst);
+ if (!(msg->msg_flags&MSG_PROBE) || len)
+ goto back_from_confirm;
+ err = 0;
+ goto out;
+}
+
+static int udpv6_destroy_sock(struct sock *sk)
+{
+ lock_sock(sk);
+ udp_v6_flush_pending_frames(sk);
+ release_sock(sk);
+
+ inet6_destroy_sock(sk);
+
+ return 0;
+}
+
+/*
+ * Socket option code for UDP
+ */
+static int udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
+{
+ struct udp_opt *up = udp_sk(sk);
+ int val;
+ int err = 0;
+
+ if (level != SOL_UDP)
+ return ipv6_setsockopt(sk, level, optname, optval, optlen);
+
+ if(optlen<sizeof(int))
+ return -EINVAL;
+
+ if (get_user(val, (int *)optval))
+ return -EFAULT;
+
+ switch(optname) {
+ case UDP_CORK:
+ if (val != 0) {
+ up->corkflag = 1;
+ } else {
+ up->corkflag = 0;
+ lock_sock(sk);
+ udp_v6_push_pending_frames(sk, up);
+ release_sock(sk);
+ }
+ break;
+
+ case UDP_ENCAP:
+ up->encap_type = val;
+ break;
+
+ default:
+ err = -ENOPROTOOPT;
+ break;
+ };
+
+ return err;
+}
+
+static int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char *optval, int *optlen)
+{
+ struct udp_opt *up = udp_sk(sk);
+ int val, len;
+
+ if (level != SOL_UDP)
+ return ipv6_getsockopt(sk, level, optname, optval, optlen);
+
+ if(get_user(len,optlen))
+ return -EFAULT;
+
+ len = min_t(unsigned int, len, sizeof(int));
+
+ if(len < 0)
+ return -EINVAL;
+
+ switch(optname) {
+ case UDP_CORK:
+ val = up->corkflag;
+ break;
+
+ case UDP_ENCAP:
+ val = up->encap_type;
+ break;
- UDP6_INC_STATS_USER(UdpOutDatagrams);
- return ulen;
+ default:
+ return -ENOPROTOOPT;
+ };
+
+ if(put_user(len, optlen))
+ return -EFAULT;
+ if(copy_to_user(optval, &val,len))
+ return -EFAULT;
+ return 0;
}
static struct inet6_protocol udpv6_protocol = {
@@ -1038,9 +1198,9 @@ struct proto udpv6_prot = {
.connect = udpv6_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
- .destroy = inet6_destroy_sock,
- .setsockopt = ipv6_setsockopt,
- .getsockopt = ipv6_getsockopt,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
.backlog_rcv = udpv6_queue_rcv_skb,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index bfa7f57f1539..1d4222b652c6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1241,7 +1241,8 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
}
}
- if (x1 && x1->id.spi && hdr->sadb_msg_type == SADB_ADD) {
+ if (x1 && ((x1->id.spi && hdr->sadb_msg_type == SADB_ADD) ||
+ (hdr->sadb_msg_type == SADB_UPDATE && xfrm_state_kern(x1)))) {
x->km.state = XFRM_STATE_DEAD;
xfrm_state_put(x);
xfrm_state_put(x1);
@@ -1286,6 +1287,11 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
if (x == NULL)
return -ESRCH;
+ if (xfrm_state_kern(x)) {
+ xfrm_state_put(x);
+ return -EPERM;
+ }
+
xfrm_state_delete(x);
xfrm_state_put(x);
diff --git a/net/netsyms.c b/net/netsyms.c
index e4ac4ea2fd26..62e9dd88033a 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -304,6 +304,7 @@ EXPORT_SYMBOL(xfrm_state_register_afinfo);
EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
EXPORT_SYMBOL(xfrm_state_get_afinfo);
EXPORT_SYMBOL(xfrm_state_put_afinfo);
+EXPORT_SYMBOL(xfrm_state_delete_tunnel);
EXPORT_SYMBOL(xfrm_replay_check);
EXPORT_SYMBOL(xfrm_replay_advance);
EXPORT_SYMBOL(xfrm_check_selectors);
@@ -466,6 +467,8 @@ EXPORT_SYMBOL(sysctl_tcp_tw_recycle);
EXPORT_SYMBOL(sysctl_max_syn_backlog);
#endif
+EXPORT_SYMBOL(ip_generic_getfrag);
+
#endif
EXPORT_SYMBOL(tcp_read_sock);
@@ -570,6 +573,7 @@ EXPORT_SYMBOL(netif_rx);
EXPORT_SYMBOL(netif_receive_skb);
EXPORT_SYMBOL(dev_add_pack);
EXPORT_SYMBOL(dev_remove_pack);
+EXPORT_SYMBOL(__dev_remove_pack);
EXPORT_SYMBOL(dev_get);
EXPORT_SYMBOL(dev_alloc);
EXPORT_SYMBOL(dev_alloc_name);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4e5c32189076..a83a22015c04 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -774,6 +774,7 @@ static int packet_release(struct socket *sock)
*/
dev_remove_pack(&po->prot_hook);
po->running = 0;
+ po->num = 0;
__sock_put(sk);
}
@@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
spin_lock(&po->bind_lock);
if (po->running) {
- dev_remove_pack(&po->prot_hook);
__sock_put(sk);
po->running = 0;
+ po->num = 0;
+ spin_unlock(&po->bind_lock);
+ dev_remove_pack(&po->prot_hook);
+ spin_lock(&po->bind_lock);
}
po->num = protocol;
@@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
if (dev->ifindex == po->ifindex) {
spin_lock(&po->bind_lock);
if (po->running) {
- dev_remove_pack(&po->prot_hook);
+ __dev_remove_pack(&po->prot_hook);
__sock_put(sk);
po->running = 0;
sk->err = ENETDOWN;
@@ -1618,9 +1622,14 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
/* Detach socket from network */
spin_lock(&po->bind_lock);
- if (po->running)
- dev_remove_pack(&po->prot_hook);
+ if (po->running) {
+ __dev_remove_pack(&po->prot_hook);
+ po->num = 0;
+ po->running = 0;
+ }
spin_unlock(&po->bind_lock);
+
+ synchronize_net();
err = -EBUSY;
if (closing || atomic_read(&po->mapped) == 0) {
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 134b7c870583..4b801a1b69b4 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -222,11 +222,6 @@ used on the egress (might slow things for an iota)
*/
if (dev->qdisc_ingress) {
- /* FIXME: Push down to ->enqueue functions --RR */
- if (skb_is_nonlinear(*pskb)
- && skb_linearize(*pskb, GFP_ATOMIC) != 0)
- return NF_DROP;
-
spin_lock(&dev->queue_lock);
if ((q = dev->qdisc_ingress) != NULL)
fwres = q->enqueue(skb, q);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5dc85bd43efd..1e8ce6b27b69 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -172,6 +172,7 @@ struct xfrm_state *xfrm_state_alloc(void)
if (x) {
memset(x, 0, sizeof(struct xfrm_state));
atomic_set(&x->refcnt, 1);
+ atomic_set(&x->tunnel_users, 0);
INIT_LIST_HEAD(&x->bydst);
INIT_LIST_HEAD(&x->byspi);
init_timer(&x->timer);
@@ -234,6 +235,7 @@ static void __xfrm_state_delete(struct xfrm_state *x)
void xfrm_state_delete(struct xfrm_state *x)
{
+ xfrm_state_delete_tunnel(x);
spin_lock_bh(&x->lock);
__xfrm_state_delete(x);
spin_unlock_bh(&x->lock);
@@ -248,7 +250,8 @@ void xfrm_state_flush(u8 proto)
for (i = 0; i < XFRM_DST_HSIZE; i++) {
restart:
list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
- if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) {
+ if (!xfrm_state_kern(x) &&
+ (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
xfrm_state_hold(x);
spin_unlock_bh(&xfrm_state_lock);
@@ -790,6 +793,20 @@ void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
read_unlock(&afinfo->lock);
}
+/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
+void xfrm_state_delete_tunnel(struct xfrm_state *x)
+{
+ if (x->tunnel) {
+ struct xfrm_state *t = x->tunnel;
+
+ if (atomic_read(&t->tunnel_users) == 2)
+ xfrm_state_delete(t);
+ atomic_dec(&t->tunnel_users);
+ xfrm_state_put(t);
+ x->tunnel = NULL;
+ }
+}
+
void __init xfrm_state_init(void)
{
int i;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 28f01f4213c1..291b51b616e8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -281,6 +281,11 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
if (x == NULL)
return -ESRCH;
+ if (xfrm_state_kern(x)) {
+ xfrm_state_put(x);
+ return -EPERM;
+ }
+
xfrm_state_delete(x);
xfrm_state_put(x);