summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2004-08-23 00:29:47 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-08-23 00:29:47 -0700
commit9c9dada1d5cfd06f06b00ef84ef01839b9ac65ed (patch)
treecd290f9c5c159122223c050da7db4fe54f5932b2
parent27b5be2921179dacff74114a80aae95087ad90ab (diff)
parentad9b7f31e119e43028061beaaca8664e57202670 (diff)
Merge bk://kernel.bkbits.net/davem/net-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
-rw-r--r--crypto/Kconfig2
-rw-r--r--crypto/aes.c2
-rw-r--r--crypto/arc4.c4
-rw-r--r--crypto/blowfish.c6
-rw-r--r--crypto/scatterwalk.c2
-rw-r--r--crypto/tcrypt.h2
-rw-r--r--crypto/twofish.c4
-rw-r--r--drivers/net/sungem.c6
-rw-r--r--drivers/net/wan/syncppp.c8
-rw-r--r--drivers/net/wireless/strip.c23
-rw-r--r--drivers/s390/net/lcs.c4
-rw-r--r--drivers/s390/net/qeth_main.c17
-rw-r--r--include/linux/atalk.h29
-rw-r--r--include/linux/if_vlan.h2
-rw-r--r--include/linux/inetdevice.h20
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack.h44
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_core.h20
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_protocol.h21
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_sctp.h25
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_tcp.h37
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_tuple.h6
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_helper.h5
-rw-r--r--include/linux/netfilter_ipv4/ip_tables.h1
-rw-r--r--include/linux/netfilter_ipv4/ipt_sctp.h107
-rw-r--r--include/linux/netfilter_ipv6/ip6_tables.h4
-rw-r--r--include/linux/netlink.h1
-rw-r--r--include/linux/rbtree.h1
-rw-r--r--include/linux/skbuff.h16
-rw-r--r--include/linux/sysctl.h12
-rw-r--r--include/net/if_inet6.h2
-rw-r--r--include/net/ip6_fib.h1
-rw-r--r--include/net/ip6_route.h9
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--include/net/neighbour.h3
-rw-r--r--include/net/route.h5
-rw-r--r--include/net/xfrm.h94
-rw-r--r--include/rxrpc/rxrpc.h7
-rw-r--r--lib/rbtree.c13
-rw-r--r--net/Kconfig13
-rw-r--r--net/appletalk/Makefile3
-rw-r--r--net/appletalk/atalk_proc.c13
-rw-r--r--net/appletalk/ddp.c10
-rw-r--r--net/appletalk/sysctl_net_atalk.c18
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/ipcommon.h3
-rw-r--r--net/bridge/netfilter/ebt_among.c45
-rw-r--r--net/bridge/netfilter/ebt_arp.c51
-rw-r--r--net/bridge/netfilter/ebt_arpreply.c32
-rw-r--r--net/bridge/netfilter/ebt_ip.c31
-rw-r--r--net/bridge/netfilter/ebt_log.c54
-rw-r--r--net/bridge/netfilter/ebt_stp.c22
-rw-r--r--net/bridge/netfilter/ebt_vlan.c14
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/filter.c21
-rw-r--r--net/core/netfilter.c9
-rw-r--r--net/core/netpoll.c7
-rw-r--r--net/core/pktgen.c8
-rw-r--r--net/core/utils.c104
-rw-r--r--net/decnet/dn_dev.c122
-rw-r--r--net/econet/af_econet.c10
-rw-r--r--net/ipv4/Kconfig22
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/ah4.c17
-rw-r--r--net/ipv4/devinet.c105
-rw-r--r--net/ipv4/esp4.c55
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_rules.c23
-rw-r--r--net/ipv4/fib_semantics.c22
-rw-r--r--net/ipv4/icmp.c45
-rw-r--r--net/ipv4/igmp.c92
-rw-r--r--net/ipv4/ip_fragment.c37
-rw-r--r--net/ipv4/ip_nat_dumb.c166
-rw-r--r--net/ipv4/ipcomp.c25
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c104
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c42
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c9
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c9
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_icmp.c35
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c44
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c42
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c9
-rw-r--r--net/ipv4/netfilter/Kconfig12
-rw-r--r--net/ipv4/netfilter/Makefile4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c286
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c43
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c40
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c20
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c166
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c650
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c1038
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c79
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c433
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c20
-rw-r--r--net/ipv4/netfilter/ip_fw_compat_masq.c7
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c11
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c10
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c13
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c11
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c3
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c6
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c10
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c35
-rw-r--r--net/ipv4/netfilter/ip_tables.c55
-rw-r--r--net/ipv4/netfilter/ipchains_core.c42
-rw-r--r--net/ipv4/netfilter/ipfwadm_core.c42
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c29
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c170
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c4
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c15
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c35
-rw-r--r--net/ipv4/netfilter/ipt_ah.c11
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c17
-rw-r--r--net/ipv4/netfilter/ipt_esp.c11
-rw-r--r--net/ipv4/netfilter/ipt_multiport.c19
-rw-r--r--net/ipv4/netfilter/ipt_recent.c11
-rw-r--r--net/ipv4/netfilter/ipt_sctp.c201
-rw-r--r--net/ipv4/netfilter/ipt_tcpmss.c30
-rw-r--r--net/ipv4/netfilter/iptable_filter.c3
-rw-r--r--net/ipv4/raw.c48
-rw-r--r--net/ipv4/route.c69
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/xfrm4_output.c3
-rw-r--r--net/ipv4/xfrm4_policy.c21
-rw-r--r--net/ipv4/xfrm4_tunnel.c14
-rw-r--r--net/ipv6/Kconfig13
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c49
-rw-r--r--net/ipv6/ah6.c29
-rw-r--r--net/ipv6/anycast.c40
-rw-r--r--net/ipv6/esp6.c52
-rw-r--r--net/ipv6/exthdrs_core.c27
-rw-r--r--net/ipv6/icmp.c17
-rw-r--r--net/ipv6/ip6_fib.c23
-rw-r--r--net/ipv6/ip6_flowlabel.c3
-rw-r--r--net/ipv6/ipcomp6.c29
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c3
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c3
-rw-r--r--net/ipv6/raw.c48
-rw-r--r--net/ipv6/reassembly.c37
-rw-r--r--net/ipv6/route.c104
-rw-r--r--net/ipv6/xfrm6_input.c15
-rw-r--r--net/ipv6/xfrm6_output.c3
-rw-r--r--net/ipv6/xfrm6_policy.c26
-rw-r--r--net/ipv6/xfrm6_tunnel.c72
-rw-r--r--net/irda/irlan/irlan_eth.c10
-rw-r--r--net/key/af_key.c9
-rw-r--r--net/rxrpc/call.c57
-rw-r--r--net/sched/sch_api.c42
-rw-r--r--net/sched/sch_atm.c1
-rw-r--r--net/sched/sch_cbq.c14
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sched/sch_hfsc.c315
-rw-r--r--net/sched/sch_htb.c1
-rw-r--r--net/sched/sch_ingress.c14
-rw-r--r--net/sctp/protocol.c8
-rw-r--r--net/sunrpc/xprt.c8
-rw-r--r--net/xfrm/xfrm_export.c2
-rw-r--r--net/xfrm/xfrm_user.c32
-rw-r--r--security/selinux/hooks.c72
163 files changed, 4810 insertions, 2216 deletions
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 12429ef28b39..f5bee997bee9 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -182,7 +182,7 @@ config CRYPTO_TEA
many rounds for security. It is very fast and uses
little memory.
- Xtendend Tiny Encryption Algorithm is a modifcation to
+ Xtendend Tiny Encryption Algorithm is a modification to
the TEA algorithm to address a potential key weakness
in the TEA algorithm.
diff --git a/crypto/aes.c b/crypto/aes.c
index 13b6daa37089..3a26f9c99aee 100644
--- a/crypto/aes.c
+++ b/crypto/aes.c
@@ -160,7 +160,7 @@ gen_tabs (void)
u8 p, q;
/* log and power tables for GF(2**8) finite field with
- 0x011b as modular polynomial - the simplest prmitive
+ 0x011b as modular polynomial - the simplest primitive
root is 0x03, used here to generate the tables */
for (i = 0, p = 1; i < 256; ++i) {
diff --git a/crypto/arc4.c b/crypto/arc4.c
index b66eb1f0d001..9efbcaae88a1 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -3,7 +3,7 @@
*
* ARC4 Cipher Algorithm
*
- * Jon Oberheide <jon@focalhost.com>
+ * Jon Oberheide <jon@oberheide.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -100,4 +100,4 @@ module_exit(arc4_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("ARC4 Cipher Algorithm");
-MODULE_AUTHOR("Jon Oberheide <jon@focalhost.com>");
+MODULE_AUTHOR("Jon Oberheide <jon@oberheide.org>");
diff --git a/crypto/blowfish.c b/crypto/blowfish.c
index 8fd18dcdb679..b6bea821c05f 100644
--- a/crypto/blowfish.c
+++ b/crypto/blowfish.c
@@ -3,9 +3,9 @@
*
* Blowfish Cipher Algorithm, by Bruce Schneier.
* http://www.counterpane.com/blowfish.html
- *
- * Adapated from Kerneli implementation.
- *
+ *
+ * Adapted from Kerneli implementation.
+ *
* Copyright (c) Herbert Valerio Riedel <hvr@hvrlab.org>
* Copyright (c) Kyle McMartin <kyle@debian.org>
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 540dd9ac1504..f6a5c9e5b2e0 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -70,7 +70,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
{
/* walk->data may be pointing the first byte of the next page;
however, we know we transfered at least one byte. So,
- walk->data - 1 will be a virutual address in the mapped page. */
+ walk->data - 1 will be a virtual address in the mapped page. */
if (out)
flush_dcache_page(walk->page);
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 56a825d6539b..81791c1c0142 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -1186,7 +1186,7 @@ struct cipher_testvec tf_cbc_dec_tv_template[] = {
/*
* Serpent test vectors. These are backwards because Serpent writes
- * octect sequences in right-to-left mode.
+ * octet sequences in right-to-left mode.
*/
#define SERPENT_ENC_TEST_VECTORS 4
#define SERPENT_DEC_TEST_VECTORS 4
diff --git a/crypto/twofish.c b/crypto/twofish.c
index 5d6d02c1a95d..4efff8cf9958 100644
--- a/crypto/twofish.c
+++ b/crypto/twofish.c
@@ -1,7 +1,7 @@
/*
* Twofish for CryptoAPI
*
- * Originaly Twofish for GPG
+ * Originally Twofish for GPG
* By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
* 256-bit key length added March 20, 1999
* Some modifications to reduce the text size by Werner Koch, April, 1998
@@ -514,7 +514,7 @@ static const u8 calc_sb_tbl[512] = {
* preprocessed through q0 and q1 respectively; for longer keys they are the
* output of previous stages. j is the index of the first key byte to use.
* CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
- * twice, doing the Psuedo-Hadamard Transform, and doing the necessary
+ * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
* rotations. Its parameters are: a, the array to write the results into,
* j, the index of the first output entry, k and l, the preprocessed indices
* for index 2i, and m and n, the preprocessed indices for index 2i+1.
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index e48ec3421328..72138612d2fb 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -808,16 +808,16 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id, struct pt_regs *regs)
if (gem_status & GREG_STAT_ABNORMAL) {
if (gem_abnormal_irq(dev, gp, gem_status))
- goto out;
+ goto out_unlock;
}
if (gem_status & (GREG_STAT_TXALL | GREG_STAT_TXINTME))
gem_tx(dev, gp, gem_status);
if (gem_status & GREG_STAT_RXDONE)
gem_rx(gp);
-out:
+out_unlock:
spin_unlock(&gp->lock);
-
+out:
return IRQ_HANDLED;
}
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
index f7442d52dabe..2329c23af83e 100644
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -50,6 +50,7 @@
#include <linux/random.h>
#include <linux/pkt_sched.h>
#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
#include <net/syncppp.h>
@@ -767,9 +768,9 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb)
struct in_ifaddr *ifa;
u32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */
#ifdef CONFIG_INET
- if ((in_dev=in_dev_get(dev)) != NULL)
+ rcu_read_lock();
+ if ((in_dev = __in_dev_get(dev)) != NULL)
{
- read_lock(&in_dev->lock);
for (ifa=in_dev->ifa_list; ifa != NULL;
ifa=ifa->ifa_next) {
if (strcmp(dev->name, ifa->ifa_label) == 0)
@@ -779,9 +780,8 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb)
break;
}
}
- read_unlock(&in_dev->lock);
- in_dev_put(in_dev);
}
+ rcu_read_unlock();
#endif
/* I hope both addr and mask are in the net order */
sppp_cisco_send (sp, CISCO_ADDR_REPLY, addr, mask);
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index 98cee21f7d84..c9331f589645 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -106,6 +106,7 @@ static const char StripVersion[] = "1.3A-STUART.CHESHIRE";
#include <linux/seq_file.h>
#include <linux/serial.h>
#include <linux/serialP.h>
+#include <linux/rcupdate.h>
#include <net/arp.h>
#include <linux/ip.h>
@@ -1348,14 +1349,17 @@ static unsigned char *strip_make_packet(unsigned char *buffer,
*/
if (haddr.c[0] == 0xFF) {
u32 brd = 0;
- struct in_device *in_dev = in_dev_get(strip_info->dev);
- if (in_dev == NULL)
+ struct in_device *in_dev;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get(strip_info->dev);
+ if (in_dev == NULL) {
+ rcu_read_unlock();
return NULL;
- read_lock(&in_dev->lock);
+ }
if (in_dev->ifa_list)
brd = in_dev->ifa_list->ifa_broadcast;
- read_unlock(&in_dev->lock);
- in_dev_put(in_dev);
+ rcu_read_unlock();
/* arp_query returns 1 if it succeeds in looking up the address, 0 if it fails */
if (!arp_query(haddr.c, brd, strip_info->dev)) {
@@ -1500,17 +1504,18 @@ static void strip_send(struct strip *strip_info, struct sk_buff *skb)
}
if (1) {
- struct in_device *in_dev = in_dev_get(strip_info->dev);
+ struct in_device *in_dev;
+
brd = addr = 0;
+ rcu_read_lock();
+ in_dev = __in_dev_get(strip_info->dev);
if (in_dev) {
- read_lock(&in_dev->lock);
if (in_dev->ifa_list) {
brd = in_dev->ifa_list->ifa_broadcast;
addr = in_dev->ifa_list->ifa_local;
}
- read_unlock(&in_dev->lock);
- in_dev_put(in_dev);
}
+ rcu_read_unlock();
}
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index cac57951ae28..e4b7bbbeff17 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1002,7 +1002,7 @@ lcs_register_mc_addresses(void *data)
in4_dev = in_dev_get(card->dev);
if (in4_dev == NULL)
return 0;
- read_lock(&in4_dev->lock);
+ read_lock(&in4_dev->mc_list_lock);
spin_lock(&card->ipm_lock);
/* Check for multicast addresses to be removed. */
list_for_each(l, &card->ipm_list) {
@@ -1046,7 +1046,7 @@ lcs_register_mc_addresses(void *data)
list_add(&ipm->list, &card->ipm_list);
}
spin_unlock(&card->ipm_lock);
- read_unlock(&in4_dev->lock);
+ read_unlock(&in4_dev->mc_list_lock);
in_dev_put(in4_dev);
lcs_fix_multicast_list(card);
return 0;
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 17f0f67d8e55..378906eba48f 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -73,6 +73,7 @@ qeth_eyecatcher(void)
#include <linux/reboot.h>
#include <asm/qeth.h>
#include <linux/mii.h>
+#include <linux/rcupdate.h>
#include "qeth.h"
#include "qeth_mpc.h"
@@ -4733,9 +4734,10 @@ qeth_free_vlan_addresses4(struct qeth_card *card, unsigned short vid)
QETH_DBF_TEXT(trace, 4, "frvaddr4");
if (!card->vlangrp)
return;
- in_dev = in_dev_get(card->vlangrp->vlan_devices[vid]);
+ rcu_read_lock();
+ in_dev = __in_dev_get(card->vlangrp->vlan_devices[vid]);
if (!in_dev)
- return;
+ goto out;
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next){
addr = qeth_get_addr_buffer(QETH_PROT_IPV4);
if (addr){
@@ -4746,7 +4748,8 @@ qeth_free_vlan_addresses4(struct qeth_card *card, unsigned short vid)
kfree(addr);
}
}
- in_dev_put(in_dev);
+out:
+ rcu_read_unlock();
}
static void
@@ -4918,9 +4921,9 @@ qeth_add_vlan_mc(struct qeth_card *card)
in_dev = in_dev_get(vg->vlan_devices[i]);
if (!in_dev)
continue;
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
qeth_add_mc(card,in_dev);
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
in_dev_put(in_dev);
}
#endif
@@ -4935,10 +4938,10 @@ qeth_add_multicast_ipv4(struct qeth_card *card)
in4_dev = in_dev_get(card->dev);
if (in4_dev == NULL)
return;
- read_lock(&in4_dev->lock);
+ read_lock(&in4_dev->mc_list_lock);
qeth_add_mc(card, in4_dev);
qeth_add_vlan_mc(card);
- read_unlock(&in4_dev->lock);
+ read_unlock(&in4_dev->mc_list_lock);
in_dev_put(in4_dev);
}
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index c4d2a0949177..2a9b82002591 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -191,10 +191,13 @@ extern int aarp_send_ddp(struct net_device *dev,
extern void aarp_send_probe(struct net_device *dev,
struct atalk_addr *addr);
extern void aarp_device_down(struct net_device *dev);
+extern void aarp_probe_network(struct atalk_iface *atif);
+extern int aarp_proxy_probe_network(struct atalk_iface *atif,
+ struct atalk_addr *sa);
+extern void aarp_proxy_remove(struct net_device *dev,
+ struct atalk_addr *sa);
-#ifdef MODULE
-extern void aarp_cleanup_module(void);
-#endif /* MODULE */
+extern void aarp_cleanup_module(void);
#define at_sk(__sk) ((struct atalk_sock *)(__sk)->sk_protinfo)
@@ -209,8 +212,28 @@ extern rwlock_t atalk_interfaces_lock;
extern struct atalk_route atrtr_default;
+extern struct file_operations atalk_seq_arp_fops;
+
+extern int sysctl_aarp_expiry_time;
+extern int sysctl_aarp_tick_time;
+extern int sysctl_aarp_retransmit_limit;
+extern int sysctl_aarp_resolve_time;
+
+#ifdef CONFIG_SYSCTL
+extern void atalk_register_sysctl(void);
+extern void atalk_unregister_sysctl(void);
+#else
+#define atalk_register_sysctl() do { } while(0)
+#define atalk_unregister_sysctl() do { } while(0)
+#endif
+
+#ifdef CONFIG_PROC_FS
extern int atalk_proc_init(void);
extern void atalk_proc_exit(void);
+#else
+#define atalk_proc_init() 0
+#define atalk_proc_exit() do { } while(0)
+#endif /* CONFIG_PROC_FS */
#endif /* __KERNEL__ */
#endif /* __LINUX_ATALK_H__ */
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 420767fcb3c9..529c401a9a86 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -152,7 +152,7 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
skb->real_dev = skb->dev;
skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK];
if (skb->dev == NULL) {
- kfree_skb(skb);
+ dev_kfree_skb_any(skb);
/* Not NET_RX_DROP, this is not being dropped
* due to congestion.
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 27a5d0a97dbc..ec751e9fb1c2 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -3,6 +3,8 @@
#ifdef __KERNEL__
+#include <linux/rcupdate.h>
+
struct ipv4_devconf
{
int accept_redirects;
@@ -31,13 +33,13 @@ extern struct ipv4_devconf ipv4_devconf;
struct in_device
{
- struct net_device *dev;
+ struct net_device *dev;
atomic_t refcnt;
- rwlock_t lock;
int dead;
struct in_ifaddr *ifa_list; /* IP ifaddr chain */
+ rwlock_t mc_list_lock;
struct ip_mc_list *mc_list; /* IP multicast filter chain */
- rwlock_t mc_lock; /* for mc_tomb */
+ spinlock_t mc_tomb_lock;
struct ip_mc_list *mc_tomb;
unsigned long mr_v1_seen;
unsigned long mr_v2_seen;
@@ -50,6 +52,7 @@ struct in_device
struct neigh_parms *arp_parms;
struct ipv4_devconf cnf;
+ struct rcu_head rcu_head;
};
#define IN_DEV_FORWARD(in_dev) ((in_dev)->cnf.forwarding)
@@ -80,6 +83,7 @@ struct in_ifaddr
{
struct in_ifaddr *ifa_next;
struct in_device *ifa_dev;
+ struct rcu_head rcu_head;
u32 ifa_local;
u32 ifa_address;
u32 ifa_mask;
@@ -133,19 +137,16 @@ static __inline__ int bad_mask(u32 mask, u32 addr)
#define endfor_ifa(in_dev) }
-extern rwlock_t inetdev_lock;
-
-
static __inline__ struct in_device *
in_dev_get(const struct net_device *dev)
{
struct in_device *in_dev;
- read_lock(&inetdev_lock);
+ rcu_read_lock();
in_dev = dev->ip_ptr;
if (in_dev)
atomic_inc(&in_dev->refcnt);
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
return in_dev;
}
@@ -157,8 +158,7 @@ __in_dev_get(const struct net_device *dev)
extern void in_dev_finish_destroy(struct in_device *idev);
-static __inline__ void
-in_dev_put(struct in_device *idev)
+static inline void in_dev_put(struct in_device *idev)
{
if (atomic_dec_and_test(&idev->refcnt))
in_dev_finish_destroy(idev);
diff --git a/include/linux/net.h b/include/linux/net.h
index cec1482f28e2..80e7fec727e3 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -169,6 +169,7 @@ extern struct socket *sockfd_lookup(int fd, int *err);
extern int net_ratelimit(void);
extern unsigned long net_random(void);
extern void net_srandom(unsigned long);
+extern void net_random_init(void);
extern int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t len);
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 1974f162f5a0..28b61a71bce9 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -51,10 +51,12 @@ enum ip_conntrack_status {
#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
#include <linux/netfilter_ipv4/ip_conntrack_icmp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_sctp.h>
/* per conntrack: protocol private data */
union ip_conntrack_proto {
/* insert conntrack proto private data here */
+ struct ip_ct_sctp sctp;
struct ip_ct_tcp tcp;
struct ip_ct_icmp icmp;
};
@@ -156,6 +158,12 @@ struct ip_conntrack_expect
union ip_conntrack_expect_help help;
};
+struct ip_conntrack_counter
+{
+ u_int64_t packets;
+ u_int64_t bytes;
+};
+
struct ip_conntrack_helper;
struct ip_conntrack
@@ -173,6 +181,11 @@ struct ip_conntrack
/* Timer function; drops refcnt when it goes off. */
struct timer_list timeout;
+#ifdef CONFIG_IP_NF_CT_ACCT
+ /* Accounting Information (same cache line as other written members) */
+ struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
+#endif
+
/* If we're expecting another related connection, this will be
in expected linked list */
struct list_head sibling_list;
@@ -245,10 +258,17 @@ extern int invert_tuplepr(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig);
/* Refresh conntrack for this many jiffies */
-extern void ip_ct_refresh(struct ip_conntrack *ct,
- unsigned long extra_jiffies);
+extern void ip_ct_refresh_acct(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb,
+ unsigned long extra_jiffies);
/* These are for NAT. Icky. */
+/* Update TCP window tracking data when NAT mangles the packet */
+extern int ip_conntrack_tcp_update(struct sk_buff *skb,
+ struct ip_conntrack *conntrack,
+ int dir);
+
/* Call me when a conntrack is destroyed. */
extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
@@ -271,6 +291,26 @@ static inline int is_confirmed(struct ip_conntrack *ct)
}
extern unsigned int ip_conntrack_htable_size;
+
+struct ip_conntrack_stat
+{
+ unsigned int searched;
+ unsigned int found;
+ unsigned int new;
+ unsigned int invalid;
+ unsigned int ignore;
+ unsigned int delete;
+ unsigned int delete_list;
+ unsigned int insert;
+ unsigned int insert_failed;
+ unsigned int drop;
+ unsigned int early_drop;
+ unsigned int icmp_error;
+ unsigned int expect_new;
+ unsigned int expect_create;
+ unsigned int expect_delete;
+};
+
/* eg. PROVIDES_CONNTRACK(ftp); */
#define PROVIDES_CONNTRACK(name) \
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
index 4c8b5d189089..9a31e96b7ab7 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -21,15 +21,17 @@ extern struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol);
extern struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol);
extern struct list_head protocol_list;
-/* Returns conntrack if it dealt with ICMP, and filled in skb->nfct */
-extern struct ip_conntrack *icmp_error_track(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum);
-extern int get_tuple(const struct iphdr *iph,
- const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_protocol *protocol);
+extern int
+ip_ct_get_tuple(const struct iphdr *iph,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_protocol *protocol);
+
+extern int
+ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
+ const struct ip_conntrack_tuple *orig,
+ const struct ip_conntrack_protocol *protocol);
/* Find a connection corresponding to a tuple. */
struct ip_conntrack_tuple_hash *
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
index 56e37ef255b7..55d57404acb8 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
@@ -3,6 +3,11 @@
#define _IP_CONNTRACK_PROTOCOL_H
#include <linux/netfilter_ipv4/ip_conntrack.h>
+/* length of buffer to which print_tuple/print_conntrack members are
+ * writing */
+
+#define IP_CT_PRINT_BUFLEN 100
+
struct ip_conntrack_protocol
{
/* Next pointer. */
@@ -50,6 +55,9 @@ struct ip_conntrack_protocol
int (*exp_matches_pkt)(struct ip_conntrack_expect *exp,
const struct sk_buff *skb);
+ int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum);
+
/* Module (if any) which this is connected to. */
struct module *me;
};
@@ -63,4 +71,17 @@ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp;
extern int ip_conntrack_protocol_tcp_init(void);
+
+/* Log invalid packets */
+extern unsigned int ip_ct_log_invalid;
+
+#ifdef DEBUG_INVALID_PACKETS
+#define LOG_INVALID(proto) \
+ (ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
+#else
+#define LOG_INVALID(proto) \
+ ((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
+ && net_ratelimit())
+#endif
+
#endif /*_IP_CONNTRACK_PROTOCOL_H*/
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_sctp.h b/include/linux/netfilter_ipv4/ip_conntrack_sctp.h
new file mode 100644
index 000000000000..7a8d869321f7
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ip_conntrack_sctp.h
@@ -0,0 +1,25 @@
+#ifndef _IP_CONNTRACK_SCTP_H
+#define _IP_CONNTRACK_SCTP_H
+/* SCTP tracking. */
+
+enum sctp_conntrack {
+ SCTP_CONNTRACK_NONE,
+ SCTP_CONNTRACK_CLOSED,
+ SCTP_CONNTRACK_COOKIE_WAIT,
+ SCTP_CONNTRACK_COOKIE_ECHOED,
+ SCTP_CONNTRACK_ESTABLISHED,
+ SCTP_CONNTRACK_SHUTDOWN_SENT,
+ SCTP_CONNTRACK_SHUTDOWN_RECD,
+ SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
+ SCTP_CONNTRACK_MAX
+};
+
+struct ip_ct_sctp
+{
+ enum sctp_conntrack state;
+
+ u_int32_t vtag[IP_CT_DIR_MAX];
+ u_int32_t ttag[IP_CT_DIR_MAX];
+};
+
+#endif /* _IP_CONNTRACK_SCTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h b/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
index d6698c911e11..0ab4590a0b16 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
@@ -4,25 +4,44 @@
enum tcp_conntrack {
TCP_CONNTRACK_NONE,
- TCP_CONNTRACK_ESTABLISHED,
TCP_CONNTRACK_SYN_SENT,
TCP_CONNTRACK_SYN_RECV,
+ TCP_CONNTRACK_ESTABLISHED,
TCP_CONNTRACK_FIN_WAIT,
- TCP_CONNTRACK_TIME_WAIT,
- TCP_CONNTRACK_CLOSE,
TCP_CONNTRACK_CLOSE_WAIT,
TCP_CONNTRACK_LAST_ACK,
+ TCP_CONNTRACK_TIME_WAIT,
+ TCP_CONNTRACK_CLOSE,
TCP_CONNTRACK_LISTEN,
- TCP_CONNTRACK_MAX
+ TCP_CONNTRACK_MAX,
+ TCP_CONNTRACK_IGNORE
+};
+
+/* Window scaling is advertised by the sender */
+#define IP_CT_TCP_STATE_FLAG_WINDOW_SCALE 0x01
+
+/* SACK is permitted by the sender */
+#define IP_CT_TCP_FLAG_SACK_PERM 0x02
+
+struct ip_ct_tcp_state {
+ u_int32_t td_end; /* max of seq + len */
+ u_int32_t td_maxend; /* max of ack + max(win, 1) */
+ u_int32_t td_maxwin; /* max(win) */
+ u_int8_t td_scale; /* window scale factor */
+ u_int8_t loose; /* used when connection picked up from the middle */
+ u_int8_t flags; /* per direction state flags */
};
struct ip_ct_tcp
{
- enum tcp_conntrack state;
-
- /* Poor man's window tracking: sequence number of valid ACK
- handshake completion packet */
- u_int32_t handshake_ack;
+ struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */
+ u_int8_t state; /* state of the connection (enum tcp_conntrack) */
+ /* For detecting stale connections */
+ u_int8_t last_dir; /* Direction of the last packet (enum ip_conntrack_dir) */
+ u_int8_t retrans; /* Number of retransmitted packets */
+ u_int8_t last_index; /* Index of the last packet */
+ u_int32_t last_seq; /* Last sequence number seen in dir */
+ u_int32_t last_end; /* Last seq + len */
};
#endif /* _IP_CONNTRACK_TCP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
index 1e7691189c67..3a71176e2060 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
@@ -25,6 +25,9 @@ union ip_conntrack_manip_proto
struct {
u_int16_t id;
} icmp;
+ struct {
+ u_int16_t port;
+ } sctp;
};
/* The manipulable part of the tuple. */
@@ -55,6 +58,9 @@ struct ip_conntrack_tuple
struct {
u_int8_t type, code;
} icmp;
+ struct {
+ u_int16_t port;
+ } sctp;
} u;
/* The protocol. */
diff --git a/include/linux/netfilter_ipv4/ip_nat_helper.h b/include/linux/netfilter_ipv4/ip_nat_helper.h
index 185a24a6a047..be6bb082d0ba 100644
--- a/include/linux/netfilter_ipv4/ip_nat_helper.h
+++ b/include/linux/netfilter_ipv4/ip_nat_helper.h
@@ -38,11 +38,12 @@ struct ip_nat_helper
struct ip_nat_info *info);
};
-extern struct list_head helpers;
-
extern int ip_nat_helper_register(struct ip_nat_helper *me);
extern void ip_nat_helper_unregister(struct ip_nat_helper *me);
+extern struct ip_nat_helper *
+ip_nat_find_helper(const struct ip_conntrack_tuple *tuple);
+
/* These return true or false. */
extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
struct ip_conntrack *ct,
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index b4c2b2b381c1..02a006f17ac4 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -336,7 +336,6 @@ ipt_get_target(struct ipt_entry *e)
* Main firewall chains definitions and global var's definitions.
*/
#ifdef __KERNEL__
-static DECLARE_MUTEX(ipt_mutex);
#include <linux/init.h>
extern void ipt_init(void) __init;
diff --git a/include/linux/netfilter_ipv4/ipt_sctp.h b/include/linux/netfilter_ipv4/ipt_sctp.h
new file mode 100644
index 000000000000..e93a9ec99fc2
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_sctp.h
@@ -0,0 +1,107 @@
+#ifndef _IPT_SCTP_H_
+#define _IPT_SCTP_H_
+
+#define IPT_SCTP_SRC_PORTS 0x01
+#define IPT_SCTP_DEST_PORTS 0x02
+#define IPT_SCTP_CHUNK_TYPES 0x04
+
+#define IPT_SCTP_VALID_FLAGS 0x07
+
+#define ELEMCOUNT(x) (sizeof(x)/sizeof(x[0]))
+
+
+struct ipt_sctp_flag_info {
+ u_int8_t chunktype;
+ u_int8_t flag;
+ u_int8_t flag_mask;
+};
+
+#define IPT_NUM_SCTP_FLAGS 4
+
+struct ipt_sctp_info {
+ u_int16_t dpts[2]; /* Min, Max */
+ u_int16_t spts[2]; /* Min, Max */
+
+ u_int32_t chunkmap[256 / sizeof (u_int32_t)]; /* Bit mask of chunks to be matched according to RFC 2960 */
+
+#define SCTP_CHUNK_MATCH_ANY 0x01 /* Match if any of the chunk types are present */
+#define SCTP_CHUNK_MATCH_ALL 0x02 /* Match if all of the chunk types are present */
+#define SCTP_CHUNK_MATCH_ONLY 0x04 /* Match if these are the only chunk types present */
+
+ u_int32_t chunk_match_type;
+ struct ipt_sctp_flag_info flag_info[IPT_NUM_SCTP_FLAGS];
+ int flag_count;
+
+ u_int32_t flags;
+ u_int32_t invflags;
+};
+
+#define bytes(type) (sizeof(type) * 8)
+
+#define SCTP_CHUNKMAP_SET(chunkmap, type) \
+ do { \
+ chunkmap[type / bytes(u_int32_t)] |= \
+ 1 << (type % bytes(u_int32_t)); \
+ } while (0)
+
+#define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \
+ do { \
+ chunkmap[type / bytes(u_int32_t)] &= \
+ ~(1 << (type % bytes(u_int32_t))); \
+ } while (0)
+
+#define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \
+({ \
+ (chunkmap[type / bytes (u_int32_t)] & \
+ (1 << (type % bytes (u_int32_t)))) ? 1: 0; \
+})
+
+#define SCTP_CHUNKMAP_RESET(chunkmap) \
+ do { \
+ int i; \
+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
+ chunkmap[i] = 0; \
+ } while (0)
+
+#define SCTP_CHUNKMAP_SET_ALL(chunkmap) \
+ do { \
+ int i; \
+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
+ chunkmap[i] = ~0; \
+ } while (0)
+
+#define SCTP_CHUNKMAP_COPY(destmap, srcmap) \
+ do { \
+ int i; \
+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
+ destmap[i] = srcmap[i]; \
+ } while (0)
+
+#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \
+({ \
+ int i; \
+ int flag = 1; \
+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \
+ if (chunkmap[i]) { \
+ flag = 0; \
+ break; \
+ } \
+ } \
+ flag; \
+})
+
+#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \
+({ \
+ int i; \
+ int flag = 1; \
+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \
+ if (chunkmap[i] != ~0) { \
+ flag = 0; \
+ break; \
+ } \
+ } \
+ flag; \
+})
+
+#endif /* _IPT_SCTP_H_ */
+
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index f9983d16cc1c..6f70cf3df39a 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -107,10 +107,6 @@ struct ip6t_counters
u_int64_t pcnt, bcnt; /* Packet and byte counters */
};
-#ifdef __KERNEL__
-static DECLARE_MUTEX(ip6t_mutex);
-#endif
-
/* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */
#define IP6T_F_PROTO 0x01 /* Set if rule cares about upper
protocols */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 670558170bbd..ee61b0f31174 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -99,6 +99,7 @@ enum {
#ifdef __KERNEL__
#include <linux/capability.h>
+#include <linux/skbuff.h>
struct netlink_skb_parms
{
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index fb2088f0bd4a..3ae0c6e140af 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -123,6 +123,7 @@ extern void rb_erase(struct rb_node *, struct rb_root *);
extern struct rb_node *rb_next(struct rb_node *);
extern struct rb_node *rb_prev(struct rb_node *);
extern struct rb_node *rb_first(struct rb_root *);
+extern struct rb_node *rb_last(struct rb_root *);
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 724d6841d0ae..256c05c11298 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -89,6 +89,8 @@
#define NET_CALLER(arg) __builtin_return_address(0)
#endif
+struct net_device;
+
#ifdef CONFIG_NETFILTER
struct nf_conntrack {
atomic_t use;
@@ -1105,6 +1107,20 @@ extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
extern void skb_split(struct sk_buff *skb,
struct sk_buff *skb1, const u32 len);
+static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
+ int len, void *buffer)
+{
+ int hlen = skb_headlen(skb);
+
+ if (offset + len <= hlen)
+ return skb->data + offset;
+
+ if (skb_copy_bits(skb, offset, buffer, len) < 0)
+ return NULL;
+
+ return buffer;
+}
+
extern void skb_init(void);
extern void skb_add_mtu(int mtu);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 2ae5058a051d..afd89be7c193 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -411,6 +411,18 @@ enum
NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12,
NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13,
NET_IPV4_NF_CONNTRACK_BUCKETS=14,
+ NET_IPV4_NF_CONNTRACK_LOG_INVALID=15,
+ NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16,
+ NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17,
+ NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18,
+ NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19,
+ NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20,
+ NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21,
+ NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22,
+ NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23,
+ NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24,
+ NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
+ NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
};
/* /proc/sys/net/ipv6 */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 48280b138cb9..76ce5f8b6c1e 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -51,6 +51,7 @@ struct inet6_ifaddr
struct timer_list timer;
struct inet6_dev *idev;
+ struct rt6_info *rt;
struct inet6_ifaddr *lst_next; /* next addr in addr_lst */
struct inet6_ifaddr *if_next; /* next addr in inet6_dev */
@@ -133,6 +134,7 @@ struct ifacaddr6
{
struct in6_addr aca_addr;
struct inet6_dev *aca_idev;
+ struct rt6_info *aca_rt;
struct ifacaddr6 *aca_next;
int aca_users;
atomic_t aca_refcnt;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 14d41c4baa99..319904518194 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -20,6 +20,7 @@
#include <net/dst.h>
#include <net/flow.h>
#include <linux/rtnetlink.h>
+#include <linux/spinlock.h>
struct rt6_info;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 75d503dc1b5e..dbfe1d6923fd 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -11,8 +11,10 @@
#include <net/flow.h>
#include <net/ip6_fib.h>
+#include <net/sock.h>
#include <linux/tcp.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
struct pol_chain {
int type;
@@ -40,6 +42,9 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg);
extern int ip6_route_add(struct in6_rtmsg *rtmsg,
struct nlmsghdr *,
void *rtattr);
+extern int ip6_ins_rt(struct rt6_info *,
+ struct nlmsghdr *,
+ void *rtattr);
extern int ip6_del_rt(struct rt6_info *,
struct nlmsghdr *,
void *rtattr);
@@ -69,6 +74,10 @@ extern struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
extern int ndisc_dst_gc(int *more);
extern void fib6_force_start_gc(void);
+extern struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
+ const struct in6_addr *addr,
+ int anycast);
+
/*
* support functions for ND
*
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f14edafeabbd..59825c399e15 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -242,7 +242,6 @@ extern u32 fib_rules_map_destination(u32 daddr, struct fib_result *res);
#ifdef CONFIG_NET_CLS_ROUTE
extern u32 fib_rules_tclass(struct fib_result *res);
#endif
-extern u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags);
extern void fib_rules_init(void);
#endif
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 464203b2abac..2f1c3783f7ba 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -45,6 +45,7 @@
#include <asm/atomic.h>
#include <linux/skbuff.h>
+#include <linux/netdevice.h>
#include <linux/err.h>
#include <linux/sysctl.h>
@@ -53,6 +54,8 @@
#define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
#define NUD_CONNECTED (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE)
+struct neighbour;
+
struct neigh_parms
{
struct neigh_parms *next;
diff --git a/include/net/route.h b/include/net/route.h
index a5e9c575ea3e..5e0100185d95 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -73,11 +73,6 @@ struct rtable
/* Miscellaneous cached information */
__u32 rt_spec_dst; /* RFC1122 specific destination */
struct inet_peer *peer; /* long-living peer info */
-
-#ifdef CONFIG_IP_ROUTE_NAT
- __u32 rt_src_map;
- __u32 rt_dst_map;
-#endif
};
struct ip_rt_acct
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index aaf74999a1f3..756c2016e4a1 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -216,7 +216,7 @@ struct xfrm_type
void (*destructor)(struct xfrm_state *);
int (*input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb);
int (*post_input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb);
- int (*output)(struct sk_buff **pskb);
+ int (*output)(struct sk_buff *pskb);
/* Estimate maximal size of result of transformation of a dgram */
u32 (*get_max_size)(struct xfrm_state *, int size);
};
@@ -304,47 +304,6 @@ extern int xfrm_register_km(struct xfrm_mgr *km);
extern int xfrm_unregister_km(struct xfrm_mgr *km);
-#define XFRM_FLOWCACHE_HASH_SIZE 1024
-
-static inline u32 __flow_hash4(struct flowi *fl)
-{
- u32 hash = fl->fl4_src ^ fl->fl_ip_sport;
-
- hash = ((hash & 0xF0F0F0F0) >> 4) | ((hash & 0x0F0F0F0F) << 4);
-
- hash ^= fl->fl4_dst ^ fl->fl_ip_dport;
- hash ^= (hash >> 10);
- hash ^= (hash >> 20);
- return hash & (XFRM_FLOWCACHE_HASH_SIZE-1);
-}
-
-static inline u32 __flow_hash6(struct flowi *fl)
-{
- u32 hash = fl->fl6_src.s6_addr32[2] ^
- fl->fl6_src.s6_addr32[3] ^
- fl->fl_ip_sport;
-
- hash = ((hash & 0xF0F0F0F0) >> 4) | ((hash & 0x0F0F0F0F) << 4);
-
- hash ^= fl->fl6_dst.s6_addr32[2] ^
- fl->fl6_dst.s6_addr32[3] ^
- fl->fl_ip_dport;
- hash ^= (hash >> 10);
- hash ^= (hash >> 20);
- return hash & (XFRM_FLOWCACHE_HASH_SIZE-1);
-}
-
-static inline u32 flow_hash(struct flowi *fl, unsigned short family)
-{
- switch (family) {
- case AF_INET:
- return __flow_hash4(fl);
- case AF_INET6:
- return __flow_hash6(fl);
- }
- return 0; /*XXX*/
-}
-
extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
static inline void xfrm_pol_hold(struct xfrm_policy *policy)
@@ -462,13 +421,51 @@ static __inline__ int addr_match(void *token1, void *token2, int prefixlen)
return 1;
}
+static __inline__
+u16 xfrm_flowi_sport(struct flowi *fl)
+{
+ u16 port;
+ switch(fl->proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ port = fl->fl_ip_sport;
+ break;
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ port = htons(fl->fl_icmp_type);
+ break;
+ default:
+ port = 0; /*XXX*/
+ }
+ return port;
+}
+
+static __inline__
+u16 xfrm_flowi_dport(struct flowi *fl)
+{
+ u16 port;
+ switch(fl->proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ port = fl->fl_ip_dport;
+ break;
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ port = htons(fl->fl_icmp_code);
+ break;
+ default:
+ port = 0; /*XXX*/
+ }
+ return port;
+}
+
static inline int
__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
{
return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
- !((fl->fl_ip_dport^sel->dport)&sel->dport_mask) &&
- !((fl->fl_ip_sport^sel->sport)&sel->sport_mask) &&
+ !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
+ !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
(fl->proto == sel->proto || !sel->proto) &&
(fl->oif == sel->ifindex || !sel->ifindex);
}
@@ -478,8 +475,8 @@ __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
{
return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
- !((fl->fl_ip_dport^sel->dport)&sel->dport_mask) &&
- !((fl->fl_ip_sport^sel->sport)&sel->sport_mask) &&
+ !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
+ !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
(fl->proto == sel->proto || !sel->proto) &&
(fl->oif == sel->ifindex || !sel->ifindex);
}
@@ -795,8 +792,6 @@ extern void xfrm4_state_init(void);
extern void xfrm4_state_fini(void);
extern void xfrm6_state_init(void);
extern void xfrm6_state_fini(void);
-extern void xfrm6_tunnel_init(void);
-extern void xfrm6_tunnel_fini(void);
extern int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *);
extern struct xfrm_state *xfrm_state_alloc(void);
@@ -821,6 +816,7 @@ extern int xfrm4_rcv(struct sk_buff *skb);
extern int xfrm4_output(struct sk_buff **pskb);
extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
+extern int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi);
extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler);
extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler);
@@ -852,8 +848,6 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig
#endif
void xfrm_policy_init(void);
-void xfrm4_policy_init(void);
-void xfrm6_policy_init(void);
struct xfrm_policy *xfrm_policy_alloc(int gfp);
extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
diff --git a/include/rxrpc/rxrpc.h b/include/rxrpc/rxrpc.h
index df6595c32c37..e9c690964cea 100644
--- a/include/rxrpc/rxrpc.h
+++ b/include/rxrpc/rxrpc.h
@@ -16,10 +16,17 @@
extern uint32_t rxrpc_epoch;
+#ifdef CONFIG_SYSCTL
extern int rxrpc_ktrace;
extern int rxrpc_kdebug;
extern int rxrpc_kproto;
extern int rxrpc_knet;
+#else
+#define rxrpc_ktrace 0
+#define rxrpc_kdebug 0
+#define rxrpc_kproto 0
+#define rxrpc_knet 0
+#endif
extern int rxrpc_sysctl_init(void);
extern void rxrpc_sysctl_cleanup(void);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 621552c344e7..14b791ac5089 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -312,6 +312,19 @@ struct rb_node *rb_first(struct rb_root *root)
}
EXPORT_SYMBOL(rb_first);
+struct rb_node *rb_last(struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+EXPORT_SYMBOL(rb_last);
+
struct rb_node *rb_next(struct rb_node *node)
{
/* If we have a right-hand child, go down and then left as far
diff --git a/net/Kconfig b/net/Kconfig
index 37ee31e7d2a9..cae135013a82 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -324,6 +324,19 @@ config BRIDGE
config VLAN_8021Q
tristate "802.1Q VLAN Support"
+ ---help---
+ Select this and you will be able to create 802.1Q VLAN interfaces
+ on your ethernet interfaces. 802.1Q VLAN supports almost
+ everything a regular ethernet interface does, including
+ firewalling, bridging, and of course IP traffic. You will need
+ the 'vconfig' tool from the VLAN project in order to effectively
+ use VLANs. See the VLAN web page for more information:
+ <http://www.candelatech.com/~greear/vlan.html>
+
+ To compile this code as a module, choose M here: the module
+ will be called 8021q.
+
+ If unsure, say N.
config DECNET
tristate "DECnet Support"
diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile
index cbe8a6a0bef0..d179728ad522 100644
--- a/net/appletalk/Makefile
+++ b/net/appletalk/Makefile
@@ -4,5 +4,6 @@
obj-$(CONFIG_ATALK) += appletalk.o
-appletalk-y := aarp.o ddp.o atalk_proc.o
+appletalk-y := aarp.o ddp.o
+appletalk-$(CONFIG_PROC_FS) += atalk_proc.o
appletalk-$(CONFIG_SYSCTL) += sysctl_net_atalk.o
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 1e00a582277d..4d20501fad77 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -15,8 +15,6 @@
#include <net/sock.h>
#include <linux/atalk.h>
-#ifdef CONFIG_PROC_FS
-extern struct file_operations atalk_seq_arp_fops;
static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos)
{
@@ -321,14 +319,3 @@ void __exit atalk_proc_exit(void)
remove_proc_entry("arp", atalk_proc_dir);
remove_proc_entry("atalk", proc_net);
}
-
-#else /* CONFIG_PROC_FS */
-int __init atalk_proc_init(void)
-{
- return 0;
-}
-
-void __exit atalk_proc_exit(void)
-{
-}
-#endif /* CONFIG_PROC_FS */
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 4185d7b8ed02..588cbe1ec16f 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -61,16 +61,6 @@
#include <net/route.h>
#include <linux/atalk.h>
-extern void aarp_cleanup_module(void);
-
-extern void aarp_probe_network(struct atalk_iface *atif);
-extern int aarp_proxy_probe_network(struct atalk_iface *atif,
- struct atalk_addr *sa);
-extern void aarp_proxy_remove(struct net_device *dev, struct atalk_addr *sa);
-
-extern void atalk_register_sysctl(void);
-extern void atalk_unregister_sysctl(void);
-
struct datalink_proto *ddp_dl, *aarp_dl;
static struct proto_ops atalk_dgram_ops;
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index 25b33f670499..af7f0604395d 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -7,13 +7,9 @@
*/
#include <linux/config.h>
-
-#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
-extern int sysctl_aarp_expiry_time;
-extern int sysctl_aarp_tick_time;
-extern int sysctl_aarp_retransmit_limit;
-extern int sysctl_aarp_resolve_time;
+#include <net/sock.h>
+#include <linux/atalk.h>
static struct ctl_table atalk_table[] = {
{
@@ -85,13 +81,3 @@ void atalk_unregister_sysctl(void)
{
unregister_sysctl_table(atalk_table_header);
}
-
-#else /* CONFIG_PROC_FS */
-void atalk_register_sysctl(void)
-{
-}
-
-void atalk_unregister_sysctl(void)
-{
-}
-#endif /* CONFIG_PROC_FS */
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 4417df3fafa6..5de7c1fd73b5 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -47,8 +47,8 @@
#endif
-struct net_device *clip_devs = NULL;
-struct atm_vcc *atmarpd = NULL;
+static struct net_device *clip_devs;
+static struct atm_vcc *atmarpd;
static struct neigh_table clip_tbl;
static struct timer_list idle_timer;
static int start_timer = 1;
diff --git a/net/atm/ipcommon.h b/net/atm/ipcommon.h
index bc1675eca081..d72165f60939 100644
--- a/net/atm/ipcommon.h
+++ b/net/atm/ipcommon.h
@@ -12,9 +12,6 @@
#include <linux/netdevice.h>
#include <linux/atmdev.h>
-
-extern struct net_device *clip_devs;
-
/*
* Appends all skbs from "from" to "to". The operation is atomic with respect
* to all other skb operations on "from" or "to".
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 727311dfd884..78aa491ceb90 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -73,20 +73,27 @@ static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash
static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr)
{
if (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_IP)) {
- struct iphdr iph;
+ struct iphdr _iph, *ih;
- if (skb_copy_bits(skb, 0, &iph, sizeof(iph)))
+ ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+ if (ih == NULL)
return -1;
- *addr = iph.daddr;
+ *addr = ih->daddr;
} else if (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_ARP)) {
- struct arphdr arph;
+ struct arphdr _arph, *ah;
+ uint32_t buf, *bp;
- if (skb_copy_bits(skb, 0, &arph, sizeof(arph)) ||
- arph.ar_pln != sizeof(uint32_t) || arph.ar_hln != ETH_ALEN)
+ ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ if (ah == NULL ||
+ ah->ar_pln != sizeof(uint32_t) ||
+ ah->ar_hln != ETH_ALEN)
return -1;
- if (skb_copy_bits(skb, sizeof(struct arphdr) +
- 2 * ETH_ALEN + sizeof(uint32_t), addr, sizeof(uint32_t)))
+ bp = skb_header_pointer(skb, sizeof(struct arphdr) +
+ 2 * ETH_ALEN + sizeof(uint32_t),
+ sizeof(uint32_t), &buf);
+ if (bp == NULL)
return -1;
+ *addr = *bp;
}
return 0;
}
@@ -94,20 +101,26 @@ static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr)
static int get_ip_src(const struct sk_buff *skb, uint32_t *addr)
{
if (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_IP)) {
- struct iphdr iph;
+ struct iphdr _iph, *ih;
- if (skb_copy_bits(skb, 0, &iph, sizeof(iph)))
+ ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+ if (ih == NULL)
return -1;
- *addr = iph.saddr;
+ *addr = ih->saddr;
} else if (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_ARP)) {
- struct arphdr arph;
+ struct arphdr _arph, *ah;
+ uint32_t buf, *bp;
- if (skb_copy_bits(skb, 0, &arph, sizeof(arph)) ||
- arph.ar_pln != sizeof(uint32_t) || arph.ar_hln != ETH_ALEN)
+ ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ if (ah == NULL ||
+ ah->ar_pln != sizeof(uint32_t) ||
+ ah->ar_hln != ETH_ALEN)
return -1;
- if (skb_copy_bits(skb, sizeof(struct arphdr) +
- ETH_ALEN, addr, sizeof(uint32_t)))
+ bp = skb_header_pointer(skb, sizeof(struct arphdr) +
+ ETH_ALEN, sizeof(uint32_t), &buf);
+ if (bp == NULL)
return -1;
+ *addr = *bp;
}
return 0;
}
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index eb675848fbc3..e913cac50066 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -19,72 +19,79 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
const struct net_device *out, const void *data, unsigned int datalen)
{
struct ebt_arp_info *info = (struct ebt_arp_info *)data;
- struct arphdr arph;
+ struct arphdr _arph, *ah;
- if (skb_copy_bits(skb, 0, &arph, sizeof(arph)))
+ ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ if (ah == NULL)
return EBT_NOMATCH;
if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode !=
- arph.ar_op, EBT_ARP_OPCODE))
+ ah->ar_op, EBT_ARP_OPCODE))
return EBT_NOMATCH;
if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype !=
- arph.ar_hrd, EBT_ARP_HTYPE))
+ ah->ar_hrd, EBT_ARP_HTYPE))
return EBT_NOMATCH;
if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype !=
- arph.ar_pro, EBT_ARP_PTYPE))
+ ah->ar_pro, EBT_ARP_PTYPE))
return EBT_NOMATCH;
if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) {
- uint32_t addr;
+ uint32_t _addr, *ap;
/* IPv4 addresses are always 4 bytes */
- if (arph.ar_pln != sizeof(uint32_t))
+ if (ah->ar_pln != sizeof(uint32_t))
return EBT_NOMATCH;
if (info->bitmask & EBT_ARP_SRC_IP) {
- if (skb_copy_bits(skb, sizeof(struct arphdr) +
- arph.ar_hln, &addr, sizeof(addr)))
+ ap = skb_header_pointer(skb, sizeof(struct arphdr) +
+ ah->ar_hln, sizeof(_addr),
+ &_addr);
+ if (ap == NULL)
return EBT_NOMATCH;
- if (FWINV(info->saddr != (addr & info->smsk),
+ if (FWINV(info->saddr != (*ap & info->smsk),
EBT_ARP_SRC_IP))
return EBT_NOMATCH;
}
if (info->bitmask & EBT_ARP_DST_IP) {
- if (skb_copy_bits(skb, sizeof(struct arphdr) +
- 2*arph.ar_hln + sizeof(uint32_t), &addr,
- sizeof(addr)))
+ ap = skb_header_pointer(skb, sizeof(struct arphdr) +
+ 2*ah->ar_hln+sizeof(uint32_t),
+ sizeof(_addr), &_addr);
+ if (ap == NULL)
return EBT_NOMATCH;
- if (FWINV(info->daddr != (addr & info->dmsk),
+ if (FWINV(info->daddr != (*ap & info->dmsk),
EBT_ARP_DST_IP))
return EBT_NOMATCH;
}
}
if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
- unsigned char mac[ETH_ALEN];
+ unsigned char _mac[ETH_ALEN], *mp;
uint8_t verdict, i;
/* MAC addresses are 6 bytes */
- if (arph.ar_hln != ETH_ALEN)
+ if (ah->ar_hln != ETH_ALEN)
return EBT_NOMATCH;
if (info->bitmask & EBT_ARP_SRC_MAC) {
- if (skb_copy_bits(skb, sizeof(struct arphdr), &mac,
- ETH_ALEN))
+ mp = skb_header_pointer(skb, sizeof(struct arphdr),
+ sizeof(_mac), &_mac);
+ if (mp == NULL)
return EBT_NOMATCH;
verdict = 0;
for (i = 0; i < 6; i++)
- verdict |= (mac[i] ^ info->smaddr[i]) &
+ verdict |= (mp[i] ^ info->smaddr[i]) &
info->smmsk[i];
if (FWINV(verdict != 0, EBT_ARP_SRC_MAC))
return EBT_NOMATCH;
}
if (info->bitmask & EBT_ARP_DST_MAC) {
- if (skb_copy_bits(skb, sizeof(struct arphdr) +
- arph.ar_hln + arph.ar_pln, &mac, ETH_ALEN))
+ mp = skb_header_pointer(skb, sizeof(struct arphdr) +
+ ah->ar_hln + ah->ar_pln,
+ sizeof(_mac), &_mac);
+ if (mp == NULL)
return EBT_NOMATCH;
verdict = 0;
for (i = 0; i < 6; i++)
- verdict |= (mac[i] ^ info->dmaddr[i]) &
+ verdict |= (mp[i] ^ info->dmaddr[i]) &
info->dmmsk[i];
if (FWINV(verdict != 0, EBT_ARP_DST_MAC))
return EBT_NOMATCH;
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 87ba30dd090f..95189f02fcc0 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -20,30 +20,38 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
const void *data, unsigned int datalen)
{
struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data;
- u32 sip, dip;
- struct arphdr ah;
- unsigned char sha[ETH_ALEN];
+ u32 _sip, *siptr, _dip, *diptr;
+ struct arphdr _ah, *ap;
+ unsigned char _sha[ETH_ALEN], *shp;
struct sk_buff *skb = *pskb;
- if (skb_copy_bits(skb, 0, &ah, sizeof(ah)))
+ ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
+ if (ap == NULL)
return EBT_DROP;
- if (ah.ar_op != __constant_htons(ARPOP_REQUEST) || ah.ar_hln != ETH_ALEN
- || ah.ar_pro != __constant_htons(ETH_P_IP) || ah.ar_pln != 4)
+ if (ap->ar_op != __constant_htons(ARPOP_REQUEST) ||
+ ap->ar_hln != ETH_ALEN ||
+ ap->ar_pro != __constant_htons(ETH_P_IP) ||
+ ap->ar_pln != 4)
return EBT_CONTINUE;
- if (skb_copy_bits(skb, sizeof(ah), &sha, ETH_ALEN))
+ shp = skb_header_pointer(skb, sizeof(_ah), ETH_ALEN, &_sha);
+ if (shp == NULL)
return EBT_DROP;
- if (skb_copy_bits(skb, sizeof(ah) + ETH_ALEN, &sip, sizeof(sip)))
+ siptr = skb_header_pointer(skb, sizeof(_ah) + ETH_ALEN,
+ sizeof(_sip), &_sip);
+ if (siptr == NULL)
return EBT_DROP;
- if (skb_copy_bits(skb, sizeof(ah) + 2 * ETH_ALEN + sizeof(sip),
- &dip, sizeof(dip)))
+ diptr = skb_header_pointer(skb,
+ sizeof(_ah) + 2 * ETH_ALEN + sizeof(_sip),
+ sizeof(_dip), &_dip);
+ if (diptr == NULL)
return EBT_DROP;
- arp_send(ARPOP_REPLY, ETH_P_ARP, sip, (struct net_device *)in,
- dip, sha, info->mac, sha);
+ arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in,
+ *diptr, shp, info->mac, shp);
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 7bab7d065bd3..0b2f19943dac 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -28,41 +28,44 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
unsigned int datalen)
{
struct ebt_ip_info *info = (struct ebt_ip_info *)data;
- union {struct iphdr iph; struct tcpudphdr ports;} u;
+ struct iphdr _iph, *ih;
+ struct tcpudphdr _ports, *pptr;
- if (skb_copy_bits(skb, 0, &u.iph, sizeof(u.iph)))
+ ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+ if (ih == NULL)
return EBT_NOMATCH;
if (info->bitmask & EBT_IP_TOS &&
- FWINV(info->tos != u.iph.tos, EBT_IP_TOS))
+ FWINV(info->tos != ih->tos, EBT_IP_TOS))
return EBT_NOMATCH;
if (info->bitmask & EBT_IP_SOURCE &&
- FWINV((u.iph.saddr & info->smsk) !=
+ FWINV((ih->saddr & info->smsk) !=
info->saddr, EBT_IP_SOURCE))
return EBT_NOMATCH;
if ((info->bitmask & EBT_IP_DEST) &&
- FWINV((u.iph.daddr & info->dmsk) !=
+ FWINV((ih->daddr & info->dmsk) !=
info->daddr, EBT_IP_DEST))
return EBT_NOMATCH;
if (info->bitmask & EBT_IP_PROTO) {
- if (FWINV(info->protocol != u.iph.protocol, EBT_IP_PROTO))
+ if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO))
return EBT_NOMATCH;
if (!(info->bitmask & EBT_IP_DPORT) &&
!(info->bitmask & EBT_IP_SPORT))
return EBT_MATCH;
- if (skb_copy_bits(skb, u.iph.ihl*4, &u.ports,
- sizeof(u.ports)))
+ pptr = skb_header_pointer(skb, ih->ihl*4,
+ sizeof(_ports), &_ports);
+ if (pptr == NULL)
return EBT_NOMATCH;
if (info->bitmask & EBT_IP_DPORT) {
- u.ports.dst = ntohs(u.ports.dst);
- if (FWINV(u.ports.dst < info->dport[0] ||
- u.ports.dst > info->dport[1],
+ u32 dst = ntohs(pptr->dst);
+ if (FWINV(dst < info->dport[0] ||
+ dst > info->dport[1],
EBT_IP_DPORT))
return EBT_NOMATCH;
}
if (info->bitmask & EBT_IP_SPORT) {
- u.ports.src = ntohs(u.ports.src);
- if (FWINV(u.ports.src < info->sport[0] ||
- u.ports.src > info->sport[1],
+ u32 src = ntohs(pptr->src);
+ if (FWINV(src < info->sport[0] ||
+ src > info->sport[1],
EBT_IP_SPORT))
return EBT_NOMATCH;
}
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 2da7c682744d..407dfdbaf688 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -78,23 +78,29 @@ static void ebt_log(const struct sk_buff *skb, const struct net_device *in,
if ((info->bitmask & EBT_LOG_IP) && skb->mac.ethernet->h_proto ==
htons(ETH_P_IP)){
- if (skb_copy_bits(skb, 0, &u.iph, sizeof(u.iph))) {
+ struct iphdr _iph, *ih;
+
+ ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+ if (ih == NULL) {
printk(" INCOMPLETE IP header");
goto out;
}
printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u,",
- NIPQUAD(u.iph.saddr), NIPQUAD(u.iph.daddr));
+ NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
printk(" IP tos=0x%02X, IP proto=%d", u.iph.tos,
- u.iph.protocol);
- if (u.iph.protocol == IPPROTO_TCP ||
- u.iph.protocol == IPPROTO_UDP) {
- if (skb_copy_bits(skb, u.iph.ihl*4, &u.ports,
- sizeof(u.ports))) {
+ ih->protocol);
+ if (ih->protocol == IPPROTO_TCP ||
+ ih->protocol == IPPROTO_UDP) {
+ struct tcpudphdr _ports, *pptr;
+
+ pptr = skb_header_pointer(skb, ih->ihl*4,
+ sizeof(_ports), &_ports);
+ if (pptr == NULL) {
printk(" INCOMPLETE TCP/UDP header");
goto out;
}
- printk(" SPT=%u DPT=%u", ntohs(u.ports.src),
- ntohs(u.ports.dst));
+ printk(" SPT=%u DPT=%u", ntohs(pptr->src),
+ ntohs(pptr->dst));
}
goto out;
}
@@ -102,32 +108,38 @@ static void ebt_log(const struct sk_buff *skb, const struct net_device *in,
if ((info->bitmask & EBT_LOG_ARP) &&
((skb->mac.ethernet->h_proto == __constant_htons(ETH_P_ARP)) ||
(skb->mac.ethernet->h_proto == __constant_htons(ETH_P_RARP)))) {
- if (skb_copy_bits(skb, 0, &u.arph, sizeof(u.arph))) {
+ struct arphdr _arph, *ah;
+
+ ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ if (ah == NULL) {
printk(" INCOMPLETE ARP header");
goto out;
}
printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
- ntohs(u.arph.ar_hrd), ntohs(u.arph.ar_pro),
- ntohs(u.arph.ar_op));
+ ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
+ ntohs(ah->ar_op));
/* If it's for Ethernet and the lengths are OK,
* then log the ARP payload */
- if (u.arph.ar_hrd == __constant_htons(1) &&
- u.arph.ar_hln == ETH_ALEN &&
- u.arph.ar_pln == sizeof(uint32_t)) {
- if (skb_copy_bits(skb, sizeof(u.arph), &u.arpp,
- sizeof(u.arpp))) {
+ if (ah->ar_hrd == __constant_htons(1) &&
+ ah->ar_hln == ETH_ALEN &&
+ ah->ar_pln == sizeof(uint32_t)) {
+ struct arppayload _arpp, *ap;
+
+ ap = skb_header_pointer(skb, sizeof(u.arph),
+ sizeof(_arpp), &_arpp);
+ if (ap == NULL) {
printk(" INCOMPLETE ARP payload");
goto out;
}
printk(" ARP MAC SRC=");
- print_MAC(u.arpp.mac_src);
+ print_MAC(ap->mac_src);
printk(" ARP IP SRC=%u.%u.%u.%u",
- myNIPQUAD(u.arpp.ip_src));
+ myNIPQUAD(ap->ip_src));
printk(" ARP MAC DST=");
- print_MAC(u.arpp.mac_dst);
+ print_MAC(ap->mac_dst);
printk(" ARP IP DST=%u.%u.%u.%u",
- myNIPQUAD(u.arpp.ip_dst));
+ myNIPQUAD(ap->ip_dst));
}
}
out:
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index d0299efa1001..f8a8cdec16ee 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -122,26 +122,30 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in
const struct net_device *out, const void *data, unsigned int datalen)
{
struct ebt_stp_info *info = (struct ebt_stp_info *)data;
- struct stp_header stph;
+ struct stp_header _stph, *sp;
uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
- if (skb_copy_bits(skb, 0, &stph, sizeof(stph)))
+
+ sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph);
+ if (sp == NULL)
return EBT_NOMATCH;
/* The stp code only considers these */
- if (memcmp(&stph, header, sizeof(header)))
+ if (memcmp(sp, header, sizeof(header)))
return EBT_NOMATCH;
if (info->bitmask & EBT_STP_TYPE
- && FWINV(info->type != stph.type, EBT_STP_TYPE))
+ && FWINV(info->type != sp->type, EBT_STP_TYPE))
return EBT_NOMATCH;
- if (stph.type == BPDU_TYPE_CONFIG &&
+ if (sp->type == BPDU_TYPE_CONFIG &&
info->bitmask & EBT_STP_CONFIG_MASK) {
- struct stp_config_pdu stpc;
+ struct stp_config_pdu _stpc, *st;
- if (skb_copy_bits(skb, sizeof(stph), &stpc, sizeof(stpc)))
- return EBT_NOMATCH;
- return ebt_filter_config(info, &stpc);
+ st = skb_header_pointer(skb, sizeof(_stph),
+ sizeof(_stpc), &_stpc);
+ if (st == NULL)
+ return EBT_NOMATCH;
+ return ebt_filter_config(info, st);
}
return EBT_MATCH;
}
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index ec111772bbe9..da30941db2ef 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -21,13 +21,14 @@
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_vlan.h>
-static unsigned char debug;
+static int debug;
#define MODULE_VERS "0.6"
-MODULE_PARM(debug, "0-1b");
+module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages");
MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>");
MODULE_DESCRIPTION("802.1Q match module (ebtables extension), v"
@@ -48,7 +49,7 @@ ebt_filter_vlan(const struct sk_buff *skb,
const void *data, unsigned int datalen)
{
struct ebt_vlan_info *info = (struct ebt_vlan_info *) data;
- struct vlan_hdr frame;
+ struct vlan_hdr _frame, *fp;
unsigned short TCI; /* Whole TCI, given from parsed frame */
unsigned short id; /* VLAN ID, given from frame TCI */
@@ -56,7 +57,8 @@ ebt_filter_vlan(const struct sk_buff *skb,
/* VLAN encapsulated Type/Length field, given from orig frame */
unsigned short encap;
- if (skb_copy_bits(skb, 0, &frame, sizeof(frame)))
+ fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
+ if (fp == NULL)
return EBT_NOMATCH;
/* Tag Control Information (TCI) consists of the following elements:
@@ -66,10 +68,10 @@ ebt_filter_vlan(const struct sk_buff *skb,
* (CFI) is a single bit flag value. Currently ignored.
* - VLAN Identifier (VID). The VID is encoded as
* an unsigned binary number. */
- TCI = ntohs(frame.h_vlan_TCI);
+ TCI = ntohs(fp->h_vlan_TCI);
id = TCI & VLAN_VID_MASK;
prio = (TCI >> 13) & 0x7;
- encap = frame.h_vlan_encapsulated_proto;
+ encap = fp->h_vlan_encapsulated_proto;
/* Checking VLAN Identifier (VID) */
if (GET_BITMASK(EBT_VLAN_ID))
diff --git a/net/core/dev.c b/net/core/dev.c
index a67e65a0f267..da7fabc7aa26 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3279,6 +3279,8 @@ static int __init net_dev_init(void)
BUG_ON(!dev_boot_phase);
+ net_random_init();
+
if (dev_proc_init())
goto out;
diff --git a/net/core/filter.c b/net/core/filter.c
index 9c2a95080768..f3b88205ace2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -183,9 +183,10 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
continue;
}
} else {
- u32 tmp;
- if (!skb_copy_bits(skb, k, &tmp, 4)) {
- A = ntohl(tmp);
+ u32 _tmp, *p;
+ p = skb_header_pointer(skb, k, 4, &_tmp);
+ if (p != NULL) {
+ A = ntohl(*p);
continue;
}
}
@@ -208,9 +209,10 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
continue;
}
} else {
- u16 tmp;
- if (!skb_copy_bits(skb, k, &tmp, 2)) {
- A = ntohs(tmp);
+ u16 _tmp, *p;
+ p = skb_header_pointer(skb, k, 2, &_tmp);
+ if (p != NULL) {
+ A = ntohs(*p);
continue;
}
}
@@ -233,9 +235,10 @@ load_b:
continue;
}
} else {
- u8 tmp;
- if (!skb_copy_bits(skb, k, &tmp, 1)) {
- A = tmp;
+ u8 _tmp, *p;
+ p = skb_header_pointer(skb, k, 1, &_tmp);
+ if (p != NULL) {
+ A = *p;
continue;
}
}
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
index 09d10722632b..a81816a57827 100644
--- a/net/core/netfilter.c
+++ b/net/core/netfilter.c
@@ -695,11 +695,12 @@ int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
/* DaveM says protocol headers are also modifiable. */
switch ((*pskb)->nh.iph->protocol) {
case IPPROTO_TCP: {
- struct tcphdr hdr;
- if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
- &hdr, sizeof(hdr)) != 0)
+ struct tcphdr _hdr, *hp;
+ hp = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+ sizeof(_hdr), &_hdr);
+ if (hp == NULL)
goto copy_skb;
- if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
+ if (writable_len <= (*pskb)->nh.iph->ihl*4 + hp->doff*4)
goto pull_skb;
goto copy_skb;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index ec4dea2d7f7e..5276e4134f87 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -18,6 +18,7 @@
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
+#include <linux/rcupdate.h>
#include <net/tcp.h>
#include <net/udp.h>
@@ -572,16 +573,18 @@ int netpoll_setup(struct netpoll *np)
memcpy(np->local_mac, ndev->dev_addr, 6);
if (!np->local_ip) {
- in_dev = in_dev_get(ndev);
+ rcu_read_lock();
+ in_dev = __in_dev_get(ndev);
if (!in_dev) {
+ rcu_read_unlock();
printk(KERN_ERR "%s: no IP address for %s, aborting\n",
np->name, np->dev_name);
goto release;
}
np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
- in_dev_put(in_dev);
+ rcu_read_unlock();
printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->local_ip));
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d1a62cddb31a..d3ba2c75e530 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -70,6 +70,7 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/inet.h>
+#include <linux/rcupdate.h>
#include <asm/byteorder.h>
#include <asm/bitops.h>
#include <asm/io.h>
@@ -263,14 +264,17 @@ static struct net_device *setup_inject(struct pktgen_info* info)
info->saddr_min = 0;
info->saddr_max = 0;
if (strlen(info->src_min) == 0) {
- struct in_device *in_dev = in_dev_get(odev);
+ struct in_device *in_dev;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get(odev);
if (in_dev) {
if (in_dev->ifa_list) {
info->saddr_min = in_dev->ifa_list->ifa_address;
info->saddr_max = info->saddr_min;
}
- in_dev_put(in_dev);
}
+ rcu_read_unlock();
}
else {
info->saddr_min = in_aton(info->src_min);
diff --git a/net/core/utils.c b/net/core/utils.c
index 8058d9c5e236..6093174581fd 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -19,22 +19,116 @@
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/percpu.h>
#include <asm/system.h>
#include <asm/uaccess.h>
-static unsigned long net_rand_seed = 152L;
+
+/*
+ This is a maximally equidistributed combined Tausworthe generator
+ based on code from GNU Scientific Library 1.5 (30 Jun 2004)
+
+ x_n = (s1_n ^ s2_n ^ s3_n)
+
+ s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
+ s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
+ s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
+
+ The period of this generator is about 2^88.
+
+ From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+ Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
+
+ This is available on the net from L'Ecuyer's home page,
+
+ http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
+ ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
+
+ There is an erratum in the paper "Tables of Maximally
+ Equidistributed Combined LFSR Generators", Mathematics of
+ Computation, 68, 225 (1999), 261--269:
+ http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
+
+ ... the k_j most significant bits of z_j must be non-
+ zero, for each j. (Note: this restriction also applies to the
+ computer code given in [4], but was mistakenly not mentioned in
+ that paper.)
+
+ This affects the seeding procedure by imposing the requirement
+ s1 > 1, s2 > 7, s3 > 15.
+
+*/
+struct nrnd_state {
+ u32 s1, s2, s3;
+};
+
+static DEFINE_PER_CPU(struct nrnd_state, net_rand_state);
+
+static u32 __net_random(struct nrnd_state *state)
+{
+#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
+
+ state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
+ state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
+ state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+
+ return (state->s1 ^ state->s2 ^ state->s3);
+}
+
+static void __net_srandom(struct nrnd_state *state, unsigned long entropy)
+{
+ u32 s = state->s1 ^ entropy;
+
+ if (s == 0)
+ s = 1; /* default seed is 1 */
+
+#define LCG(n) (69069 * n)
+ state->s1 = LCG(s);
+ state->s2 = LCG(state->s1);
+ state->s3 = LCG(state->s2);
+
+ /* "warm it up" */
+ __net_random(state);
+ __net_random(state);
+ __net_random(state);
+ __net_random(state);
+ __net_random(state);
+ __net_random(state);
+}
+
unsigned long net_random(void)
{
- net_rand_seed=net_rand_seed*69069L+1;
- return net_rand_seed^jiffies;
+ unsigned long r;
+ struct nrnd_state *state = &get_cpu_var(net_rand_state);
+ r = __net_random(state);
+ put_cpu_var(state);
+ return r;
}
+
void net_srandom(unsigned long entropy)
{
- net_rand_seed ^= entropy;
- net_random();
+ struct nrnd_state *state = &get_cpu_var(net_rand_state);
+ __net_srandom(state, entropy);
+ put_cpu_var(state);
+}
+
+void __init net_random_init(void)
+{
+ int i;
+ unsigned long seed[NR_CPUS];
+
+ get_random_bytes(seed, sizeof(seed));
+
+ for (i = 0; i < NR_CPUS; i++) {
+ struct nrnd_state *state = &per_cpu(net_rand_state,i);
+
+ memset(state, 0, sizeof(*state));
+ __net_srandom(state, seed[i]);
+ }
}
int net_msg_cost = 5*HZ;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 11b0f0c6d45c..5a05efb83092 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -247,21 +247,6 @@ static struct dn_dev_sysctl_table {
}, {0}}
};
-static inline __u16 mtu2blksize(struct net_device *dev)
-{
- u32 blksize = dev->mtu;
- if (blksize > 0xffff)
- blksize = 0xffff;
-
- if (dev->type == ARPHRD_ETHER ||
- dev->type == ARPHRD_PPP ||
- dev->type == ARPHRD_IPGRE ||
- dev->type == ARPHRD_LOOPBACK)
- blksize -= 2;
-
- return (__u16)blksize;
-}
-
static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
{
struct dn_dev_sysctl_table *t;
@@ -314,52 +299,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
}
}
-struct net_device *dn_dev_get_default(void)
-{
- struct net_device *dev;
- read_lock(&dndev_lock);
- dev = decnet_default_device;
- if (dev) {
- if (dev->dn_ptr)
- dev_hold(dev);
- else
- dev = NULL;
- }
- read_unlock(&dndev_lock);
- return dev;
-}
-
-int dn_dev_set_default(struct net_device *dev, int force)
-{
- struct net_device *old = NULL;
- int rv = -EBUSY;
- if (!dev->dn_ptr)
- return -ENODEV;
- write_lock(&dndev_lock);
- if (force || decnet_default_device == NULL) {
- old = decnet_default_device;
- decnet_default_device = dev;
- rv = 0;
- }
- write_unlock(&dndev_lock);
- if (old)
- dev_put(dev);
- return rv;
-}
-
-static void dn_dev_check_default(struct net_device *dev)
-{
- write_lock(&dndev_lock);
- if (dev == decnet_default_device) {
- decnet_default_device = NULL;
- } else {
- dev = NULL;
- }
- write_unlock(&dndev_lock);
- if (dev)
- dev_put(dev);
-}
-
static int dn_forwarding_proc(ctl_table *table, int write,
struct file *filep,
void __user *buffer,
@@ -454,6 +393,21 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *
#endif /* CONFIG_SYSCTL */
+static inline __u16 mtu2blksize(struct net_device *dev)
+{
+ u32 blksize = dev->mtu;
+ if (blksize > 0xffff)
+ blksize = 0xffff;
+
+ if (dev->type == ARPHRD_ETHER ||
+ dev->type == ARPHRD_PPP ||
+ dev->type == ARPHRD_IPGRE ||
+ dev->type == ARPHRD_LOOPBACK)
+ blksize -= 2;
+
+ return (__u16)blksize;
+}
+
static struct dn_ifaddr *dn_dev_alloc_ifa(void)
{
struct dn_ifaddr *ifa;
@@ -635,6 +589,52 @@ rarok:
goto done;
}
+struct net_device *dn_dev_get_default(void)
+{
+ struct net_device *dev;
+ read_lock(&dndev_lock);
+ dev = decnet_default_device;
+ if (dev) {
+ if (dev->dn_ptr)
+ dev_hold(dev);
+ else
+ dev = NULL;
+ }
+ read_unlock(&dndev_lock);
+ return dev;
+}
+
+int dn_dev_set_default(struct net_device *dev, int force)
+{
+ struct net_device *old = NULL;
+ int rv = -EBUSY;
+ if (!dev->dn_ptr)
+ return -ENODEV;
+ write_lock(&dndev_lock);
+ if (force || decnet_default_device == NULL) {
+ old = decnet_default_device;
+ decnet_default_device = dev;
+ rv = 0;
+ }
+ write_unlock(&dndev_lock);
+ if (old)
+ dev_put(dev);
+ return rv;
+}
+
+static void dn_dev_check_default(struct net_device *dev)
+{
+ write_lock(&dndev_lock);
+ if (dev == decnet_default_device) {
+ decnet_default_device = NULL;
+ } else {
+ dev = NULL;
+ }
+ write_unlock(&dndev_lock);
+ if (dev)
+ dev_put(dev);
+}
+
static struct dn_dev *dn_dev_by_index(int ifindex)
{
struct net_device *dev;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 07b4cff2f44d..fc31ae1209d1 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -39,6 +39,7 @@
#include <net/udp.h>
#include <net/ip.h>
#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -401,16 +402,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
y.x maps to IP a.b.c.x. This should be replaced with something
more flexible and more aware of subnet masks. */
{
- struct in_device *idev = in_dev_get(dev);
+ struct in_device *idev;
unsigned long network = 0;
+
+ rcu_read_lock();
+ idev = __in_dev_get(dev);
if (idev) {
- read_lock(&idev->lock);
if (idev->ifa_list)
network = ntohl(idev->ifa_list->ifa_address) &
0xffffff00; /* !!! */
- read_unlock(&idev->lock);
- in_dev_put(idev);
}
+ rcu_read_unlock();
udpdest.sin_addr.s_addr = htonl(network | addr.station);
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index de00c668c98c..fc370970ba83 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -82,16 +82,6 @@ config IP_ROUTE_FWMARK
If you say Y here, you will be able to specify different routes for
packets with different mark values (see iptables(8), MARK target).
-config IP_ROUTE_NAT
- bool "IP: fast network address translation"
- depends on IP_MULTIPLE_TABLES
- help
- If you say Y here, your router will be able to modify source and
- destination addresses of packets that pass through it, in a manner
- you specify. General information about Network Address Translation
- can be gotten from the document
- <http://www.hasenstein.com/linux-ip-nat/diplom/nat.html>.
-
config IP_ROUTE_MULTIPATH
bool "IP: equal cost multipath"
depends on IP_ADVANCED_ROUTER
@@ -187,7 +177,7 @@ config IP_PNP_RARP
config NET_IPIP
tristate "IP: tunneling"
depends on INET
- select XFRM
+ select INET_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
@@ -351,6 +341,7 @@ config INET_ESP
config INET_IPCOMP
tristate "IP: IPComp transformation"
select XFRM
+ select INET_TUNNEL
select CRYPTO
select CRYPTO_DEFLATE
---help---
@@ -359,5 +350,14 @@ config INET_IPCOMP
If unsure, say Y.
+config INET_TUNNEL
+ tristate "IP: tunnel transformation"
+ select XFRM
+ ---help---
+ Support for generic IP tunnel transformation, which is required by
+ the IP tunneling module as well as tunnel mode IPComp.
+
+ If unsure, say Y.
+
source "net/ipv4/ipvs/Kconfig"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 0fe409afa094..a7a7a35574d4 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -19,9 +19,10 @@ obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_AH) += ah4.o
obj-$(CONFIG_INET_ESP) += esp4.o
obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
+obj-$(CONFIG_INET_TUNNEL) += xfrm4_tunnel.o
obj-$(CONFIG_IP_PNP) += ipconfig.o
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IP_VS) += ipvs/
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
- xfrm4_tunnel.o xfrm4_output.o
+ xfrm4_output.o
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 9784f0376980..970fe58b4880 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -53,10 +53,10 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
return 0;
}
-static int ah_output(struct sk_buff **pskb)
+static int ah_output(struct sk_buff *skb)
{
int err;
- struct dst_entry *dst = (*pskb)->dst;
+ struct dst_entry *dst = skb->dst;
struct xfrm_state *x = dst->xfrm;
struct iphdr *iph, *top_iph;
struct ip_auth_hdr *ah;
@@ -66,7 +66,7 @@ static int ah_output(struct sk_buff **pskb)
char buf[60];
} tmp_iph;
- top_iph = (*pskb)->nh.iph;
+ top_iph = skb->nh.iph;
iph = &tmp_iph.iph;
iph->tos = top_iph->tos;
@@ -85,7 +85,7 @@ static int ah_output(struct sk_buff **pskb)
ah->nexthdr = top_iph->protocol;
top_iph->tos = 0;
- top_iph->tot_len = htons((*pskb)->len);
+ top_iph->tot_len = htons(skb->len);
top_iph->frag_off = 0;
top_iph->ttl = 0;
top_iph->protocol = IPPROTO_AH;
@@ -98,7 +98,7 @@ static int ah_output(struct sk_buff **pskb)
ah->reserved = 0;
ah->spi = x->id.spi;
ah->seq_no = htonl(++x->replay.oseq);
- ahp->icv(ahp, *pskb, ah->auth_data);
+ ahp->icv(ahp, skb, ah->auth_data);
top_iph->tos = iph->tos;
top_iph->ttl = iph->ttl;
@@ -116,7 +116,7 @@ error:
return err;
}
-int ah_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+static int ah_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
{
int ah_hlen;
struct iphdr *iph;
@@ -184,7 +184,7 @@ out:
return -EINVAL;
}
-void ah4_err(struct sk_buff *skb, u32 info)
+static void ah4_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr*)skb->data;
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
@@ -214,6 +214,9 @@ static int ah_init_state(struct xfrm_state *x, void *args)
if (x->aalg->alg_key_len > 512)
goto error;
+ if (x->encap)
+ goto error;
+
ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
if (ahp == NULL)
return -ENOMEM;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 4781dea42dfe..fc9930460864 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -88,31 +88,31 @@ static void devinet_sysctl_register(struct in_device *in_dev,
static void devinet_sysctl_unregister(struct ipv4_devconf *p);
#endif
-int inet_ifa_count;
-int inet_dev_count;
-
/* Locks all the inet devices. */
-rwlock_t inetdev_lock = RW_LOCK_UNLOCKED;
-
static struct in_ifaddr *inet_alloc_ifa(void)
{
struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
if (ifa) {
memset(ifa, 0, sizeof(*ifa));
- inet_ifa_count++;
+ INIT_RCU_HEAD(&ifa->rcu_head);
}
return ifa;
}
-static __inline__ void inet_free_ifa(struct in_ifaddr *ifa)
+static void inet_rcu_free_ifa(struct rcu_head *head)
{
+ struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
if (ifa->ifa_dev)
- __in_dev_put(ifa->ifa_dev);
+ in_dev_put(ifa->ifa_dev);
kfree(ifa);
- inet_ifa_count--;
+}
+
+static inline void inet_free_ifa(struct in_ifaddr *ifa)
+{
+ call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
}
void in_dev_finish_destroy(struct in_device *idev)
@@ -129,7 +129,6 @@ void in_dev_finish_destroy(struct in_device *idev)
if (!idev->dead)
printk("Freeing alive in_device %p\n", idev);
else {
- inet_dev_count--;
kfree(idev);
}
}
@@ -144,24 +143,24 @@ struct in_device *inetdev_init(struct net_device *dev)
if (!in_dev)
goto out;
memset(in_dev, 0, sizeof(*in_dev));
- in_dev->lock = RW_LOCK_UNLOCKED;
+ INIT_RCU_HEAD(&in_dev->rcu_head);
memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
in_dev->cnf.sysctl = NULL;
in_dev->dev = dev;
if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
goto out_kfree;
- inet_dev_count++;
/* Reference in_dev->dev */
dev_hold(dev);
#ifdef CONFIG_SYSCTL
neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
NET_IPV4_NEIGH, "ipv4", NULL);
#endif
- write_lock_bh(&inetdev_lock);
- dev->ip_ptr = in_dev;
+
/* Account for reference dev->ip_ptr */
in_dev_hold(in_dev);
- write_unlock_bh(&inetdev_lock);
+ smp_wmb();
+ dev->ip_ptr = in_dev;
+
#ifdef CONFIG_SYSCTL
devinet_sysctl_register(in_dev, &in_dev->cnf);
#endif
@@ -176,6 +175,12 @@ out_kfree:
goto out;
}
+static void in_dev_rcu_put(struct rcu_head *head)
+{
+ struct in_device *idev = container_of(head, struct in_device, rcu_head);
+ in_dev_put(idev);
+}
+
static void inetdev_destroy(struct in_device *in_dev)
{
struct in_ifaddr *ifa;
@@ -194,30 +199,28 @@ static void inetdev_destroy(struct in_device *in_dev)
#ifdef CONFIG_SYSCTL
devinet_sysctl_unregister(&in_dev->cnf);
#endif
- write_lock_bh(&inetdev_lock);
+
in_dev->dev->ip_ptr = NULL;
- /* in_dev_put following below will kill the in_device */
- write_unlock_bh(&inetdev_lock);
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(in_dev->arp_parms);
#endif
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
- in_dev_put(in_dev);
+ call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
}
int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
{
- read_lock(&in_dev->lock);
+ rcu_read_lock();
for_primary_ifa(in_dev) {
if (inet_ifa_match(a, ifa)) {
if (!b || inet_ifa_match(b, ifa)) {
- read_unlock(&in_dev->lock);
+ rcu_read_unlock();
return 1;
}
}
} endfor_ifa(in_dev);
- read_unlock(&in_dev->lock);
+ rcu_read_unlock();
return 0;
}
@@ -241,9 +244,8 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
ifap1 = &ifa->ifa_next;
continue;
}
- write_lock_bh(&in_dev->lock);
+
*ifap1 = ifa->ifa_next;
- write_unlock_bh(&in_dev->lock);
rtmsg_ifa(RTM_DELADDR, ifa);
notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
@@ -253,9 +255,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
/* 2. Unlink it */
- write_lock_bh(&in_dev->lock);
*ifap = ifa1->ifa_next;
- write_unlock_bh(&in_dev->lock);
/* 3. Announce address deletion */
@@ -317,9 +317,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
}
ifa->ifa_next = *ifap;
- write_lock_bh(&in_dev->lock);
*ifap = ifa;
- write_unlock_bh(&in_dev->lock);
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
@@ -771,12 +769,11 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
u32 addr = 0;
struct in_device *in_dev;
- read_lock(&inetdev_lock);
+ rcu_read_lock();
in_dev = __in_dev_get(dev);
if (!in_dev)
- goto out_unlock_inetdev;
+ goto no_in_dev;
- read_lock(&in_dev->lock);
for_primary_ifa(in_dev) {
if (ifa->ifa_scope > scope)
continue;
@@ -787,8 +784,8 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
if (!addr)
addr = ifa->ifa_local;
} endfor_ifa(in_dev);
- read_unlock(&in_dev->lock);
- read_unlock(&inetdev_lock);
+no_in_dev:
+ rcu_read_unlock();
if (addr)
goto out;
@@ -798,30 +795,24 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
in dev_base list.
*/
read_lock(&dev_base_lock);
- read_lock(&inetdev_lock);
+ rcu_read_lock();
for (dev = dev_base; dev; dev = dev->next) {
if ((in_dev = __in_dev_get(dev)) == NULL)
continue;
- read_lock(&in_dev->lock);
for_primary_ifa(in_dev) {
if (ifa->ifa_scope != RT_SCOPE_LINK &&
ifa->ifa_scope <= scope) {
- read_unlock(&in_dev->lock);
addr = ifa->ifa_local;
goto out_unlock_both;
}
} endfor_ifa(in_dev);
- read_unlock(&in_dev->lock);
}
out_unlock_both:
- read_unlock(&inetdev_lock);
read_unlock(&dev_base_lock);
+ rcu_read_unlock();
out:
return addr;
-out_unlock_inetdev:
- read_unlock(&inetdev_lock);
- goto out;
}
static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
@@ -874,29 +865,24 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
struct in_device *in_dev;
if (dev) {
- read_lock(&inetdev_lock);
- if ((in_dev = __in_dev_get(dev))) {
- read_lock(&in_dev->lock);
+ rcu_read_lock();
+ if ((in_dev = __in_dev_get(dev)))
addr = confirm_addr_indev(in_dev, dst, local, scope);
- read_unlock(&in_dev->lock);
- }
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
return addr;
}
read_lock(&dev_base_lock);
- read_lock(&inetdev_lock);
+ rcu_read_lock();
for (dev = dev_base; dev; dev = dev->next) {
if ((in_dev = __in_dev_get(dev))) {
- read_lock(&in_dev->lock);
addr = confirm_addr_indev(in_dev, dst, local, scope);
- read_unlock(&in_dev->lock);
if (addr)
break;
}
}
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
read_unlock(&dev_base_lock);
return addr;
@@ -1065,12 +1051,12 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (idx > s_idx)
s_ip_idx = 0;
- read_lock(&inetdev_lock);
+ rcu_read_lock();
if ((in_dev = __in_dev_get(dev)) == NULL) {
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
continue;
}
- read_lock(&in_dev->lock);
+
for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
ifa = ifa->ifa_next, ip_idx++) {
if (ip_idx < s_ip_idx)
@@ -1078,13 +1064,11 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
RTM_NEWADDR) <= 0) {
- read_unlock(&in_dev->lock);
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
goto done;
}
}
- read_unlock(&in_dev->lock);
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
}
done:
@@ -1138,11 +1122,11 @@ void inet_forward_change(void)
read_lock(&dev_base_lock);
for (dev = dev_base; dev; dev = dev->next) {
struct in_device *in_dev;
- read_lock(&inetdev_lock);
+ rcu_read_lock();
in_dev = __in_dev_get(dev);
if (in_dev)
in_dev->cnf.forwarding = on;
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
}
read_unlock(&dev_base_lock);
@@ -1508,6 +1492,5 @@ EXPORT_SYMBOL(devinet_ioctl);
EXPORT_SYMBOL(in_dev_finish_destroy);
EXPORT_SYMBOL(inet_select_addr);
EXPORT_SYMBOL(inetdev_by_index);
-EXPORT_SYMBOL(inetdev_lock);
EXPORT_SYMBOL(register_inetaddr_notifier);
EXPORT_SYMBOL(unregister_inetaddr_notifier);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 07a594b831d2..27d8f808bad4 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -17,10 +17,10 @@ struct esp_decap_data {
__u8 proto;
};
-int esp_output(struct sk_buff **pskb)
+static int esp_output(struct sk_buff *skb)
{
int err;
- struct dst_entry *dst = (*pskb)->dst;
+ struct dst_entry *dst = skb->dst;
struct xfrm_state *x = dst->xfrm;
struct iphdr *top_iph;
struct ip_esp_hdr *esph;
@@ -33,13 +33,13 @@ int esp_output(struct sk_buff **pskb)
int nfrags;
/* Strip IP+ESP header. */
- __skb_pull(*pskb, (*pskb)->h.raw - (*pskb)->data);
+ __skb_pull(skb, skb->h.raw - skb->data);
/* Now skb is pure payload to encrypt */
err = -ENOMEM;
/* Round to block size */
- clen = (*pskb)->len;
+ clen = skb->len;
esp = x->data;
alen = esp->auth.icv_trunc_len;
@@ -49,22 +49,22 @@ int esp_output(struct sk_buff **pskb)
if (esp->conf.padlen)
clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
- if ((nfrags = skb_cow_data(*pskb, clen-(*pskb)->len+alen, &trailer)) < 0)
+ if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
goto error;
/* Fill padding... */
do {
int i;
- for (i=0; i<clen-(*pskb)->len - 2; i++)
+ for (i=0; i<clen-skb->len - 2; i++)
*(u8*)(trailer->tail + i) = i+1;
} while (0);
- *(u8*)(trailer->tail + clen-(*pskb)->len - 2) = (clen - (*pskb)->len)-2;
- pskb_put(*pskb, trailer, clen - (*pskb)->len);
+ *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+ pskb_put(skb, trailer, clen - skb->len);
- __skb_push(*pskb, (*pskb)->data - (*pskb)->nh.raw);
- top_iph = (*pskb)->nh.iph;
- esph = (struct ip_esp_hdr *)((*pskb)->nh.raw + top_iph->ihl*4);
- top_iph->tot_len = htons((*pskb)->len + alen);
+ __skb_push(skb, skb->data - skb->nh.raw);
+ top_iph = skb->nh.iph;
+ esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4);
+ top_iph->tot_len = htons(skb->len + alen);
*(u8*)(trailer->tail - 1) = top_iph->protocol;
/* this is non-NULL only with UDP Encapsulation */
@@ -76,7 +76,7 @@ int esp_output(struct sk_buff **pskb)
uh = (struct udphdr *)esph;
uh->source = encap->encap_sport;
uh->dest = encap->encap_dport;
- uh->len = htons((*pskb)->len + alen - top_iph->ihl*4);
+ uh->len = htons(skb->len + alen - top_iph->ihl*4);
uh->check = 0;
switch (encap->encap_type) {
@@ -109,7 +109,7 @@ int esp_output(struct sk_buff **pskb)
if (!sg)
goto error;
}
- skb_to_sgvec(*pskb, sg, esph->enc_data+esp->conf.ivlen-(*pskb)->data, clen);
+ skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
crypto_cipher_encrypt(tfm, sg, sg, clen);
if (unlikely(sg != &esp->sgbuf[0]))
kfree(sg);
@@ -121,9 +121,9 @@ int esp_output(struct sk_buff **pskb)
}
if (esp->auth.icv_full_len) {
- esp->auth.icv(esp, *pskb, (u8*)esph-(*pskb)->data,
+ esp->auth.icv(esp, skb, (u8*)esph-skb->data,
sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
- pskb_put(*pskb, trailer, alen);
+ pskb_put(skb, trailer, alen);
}
ip_send_check(top_iph);
@@ -139,7 +139,7 @@ error:
* expensive, so we only support truncated data, which is the recommended
* and common case.
*/
-int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
{
struct iphdr *iph;
struct ip_esp_hdr *esph;
@@ -246,7 +246,7 @@ out:
return -EINVAL;
}
-int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
{
if (x->encap) {
@@ -320,7 +320,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
return mtu + x->props.header_len + esp->auth.icv_trunc_len;
}
-void esp4_err(struct sk_buff *skb, u32 info)
+static void esp4_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr*)skb->data;
struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
@@ -338,7 +338,7 @@ void esp4_err(struct sk_buff *skb, u32 info)
xfrm_state_put(x);
}
-void esp_destroy(struct xfrm_state *x)
+static void esp_destroy(struct xfrm_state *x)
{
struct esp_data *esp = x->data;
@@ -364,7 +364,7 @@ void esp_destroy(struct xfrm_state *x)
kfree(esp);
}
-int esp_init_state(struct xfrm_state *x, void *args)
+static int esp_init_state(struct xfrm_state *x, void *args)
{
struct esp_data *esp = NULL;
@@ -436,6 +436,7 @@ int esp_init_state(struct xfrm_state *x, void *args)
switch (encap->encap_type) {
default:
+ goto error;
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
@@ -449,15 +450,9 @@ int esp_init_state(struct xfrm_state *x, void *args)
return 0;
error:
- if (esp) {
- if (esp->auth.tfm)
- crypto_free_tfm(esp->auth.tfm);
- if (esp->auth.work_icv)
- kfree(esp->auth.work_icv);
- if (esp->conf.tfm)
- crypto_free_tfm(esp->conf.tfm);
- kfree(esp);
- }
+ x->data = esp;
+ esp_destroy(x);
+ x->data = NULL;
return -EINVAL;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f5b008a9d7d0..f13e797c32e8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -172,13 +172,13 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
int ret;
no_addr = rpf = 0;
- read_lock(&inetdev_lock);
+ rcu_read_lock();
in_dev = __in_dev_get(dev);
if (in_dev) {
no_addr = in_dev->ifa_list == NULL;
rpf = IN_DEV_RPFILTER(in_dev);
}
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
if (in_dev == NULL)
goto e_inval;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 66e78bb4e2d4..ad2481f8fa68 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -176,7 +176,7 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
table_id = rtm->rtm_table;
if (table_id == RT_TABLE_UNSPEC) {
struct fib_table *table;
- if (rtm->rtm_type == RTN_UNICAST || rtm->rtm_type == RTN_NAT) {
+ if (rtm->rtm_type == RTN_UNICAST) {
if ((table = fib_empty_table()) == NULL)
return -ENOBUFS;
table_id = table->tb_id;
@@ -251,26 +251,6 @@ u32 fib_rules_map_destination(u32 daddr, struct fib_result *res)
return (daddr&~mask)|res->fi->fib_nh->nh_gw;
}
-u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags)
-{
- struct fib_rule *r = res->r;
-
- if (r->r_action == RTN_NAT) {
- int addrtype = inet_addr_type(r->r_srcmap);
-
- if (addrtype == RTN_NAT) {
- /* Packet is from translated source; remember it */
- saddr = (saddr&~r->r_srcmask)|r->r_srcmap;
- *flags |= RTCF_SNAT;
- } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) {
- /* Packet is from masqueraded source; remember it */
- saddr = r->r_srcmap;
- *flags |= RTCF_MASQ;
- }
- }
- return saddr;
-}
-
#ifdef CONFIG_NET_CLS_ROUTE
u32 fib_rules_tclass(struct fib_result *res)
{
@@ -334,7 +314,6 @@ FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
FRprintk("tb %d r %d ", r->r_table, r->r_action);
switch (r->r_action) {
case RTN_UNICAST:
- case RTN_NAT:
policy = r;
break;
case RTN_UNREACHABLE:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c1b6060df3f1..51191971eb12 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -124,17 +124,10 @@ static struct
.error = -EAGAIN,
.scope = RT_SCOPE_UNIVERSE,
}, /* RTN_THROW */
-#ifdef CONFIG_IP_ROUTE_NAT
- {
- .error = 0,
- .scope = RT_SCOPE_HOST,
- }, /* RTN_NAT */
-#else
{
.error = -EINVAL,
.scope = RT_SCOPE_NOWHERE,
}, /* RTN_NAT */
-#endif
{
.error = -EINVAL,
.scope = RT_SCOPE_NOWHERE,
@@ -543,15 +536,6 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
#endif
}
-#ifdef CONFIG_IP_ROUTE_NAT
- if (r->rtm_type == RTN_NAT) {
- if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
- goto err_inval;
- memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
- goto link_it;
- }
-#endif
-
if (fib_props[r->rtm_type].error) {
if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
goto err_inval;
@@ -629,12 +613,6 @@ fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struc
res->fi = fi;
switch (type) {
-#ifdef CONFIG_IP_ROUTE_NAT
- case RTN_NAT:
- FIB_RES_RESET(*res);
- atomic_inc(&fi->fib_clntref);
- return 0;
-#endif
case RTN_UNICAST:
case RTN_LOCAL:
case RTN_BROADCAST:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index be40431b73cf..062589289b4f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -478,20 +478,25 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
* ICMP error
*/
if (iph->protocol == IPPROTO_ICMP) {
- u8 inner_type;
-
- if (skb_copy_bits(skb_in,
- skb_in->nh.raw + (iph->ihl << 2) +
- offsetof(struct icmphdr, type) -
- skb_in->data, &inner_type, 1))
+ u8 _inner_type, *itp;
+
+ itp = skb_header_pointer(skb_in,
+ skb_in->nh.raw +
+ (iph->ihl << 2) +
+ offsetof(struct icmphdr,
+ type) -
+ skb_in->data,
+ sizeof(_inner_type),
+ &_inner_type);
+ if (itp == NULL)
goto out;
/*
* Assume any unknown ICMP type is an error. This
* isn't specified by the RFC, but think about it..
*/
- if (inner_type > NR_ICMP_TYPES ||
- icmp_pointers[inner_type].error)
+ if (*itp > NR_ICMP_TYPES ||
+ icmp_pointers[*itp].error)
goto out;
}
}
@@ -503,16 +508,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
* Construct source address and options.
*/
-#ifdef CONFIG_IP_ROUTE_NAT
- /*
- * Restore original addresses if packet has been translated.
- */
- if (rt->rt_flags & RTCF_NAT && IPCB(skb_in)->flags & IPSKB_TRANSLATED) {
- iph->daddr = rt->fl.fl4_dst;
- iph->saddr = rt->fl.fl4_src;
- }
-#endif
-
saddr = iph->daddr;
if (!(rt->rt_flags & RTCF_LOCAL))
saddr = 0;
@@ -879,7 +874,6 @@ static void icmp_address_reply(struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct in_device *in_dev;
struct in_ifaddr *ifa;
- u32 mask;
if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
goto out;
@@ -887,24 +881,27 @@ static void icmp_address_reply(struct sk_buff *skb)
in_dev = in_dev_get(dev);
if (!in_dev)
goto out;
- read_lock(&in_dev->lock);
+ rcu_read_lock();
if (in_dev->ifa_list &&
IN_DEV_LOG_MARTIANS(in_dev) &&
IN_DEV_FORWARD(in_dev)) {
- if (skb_copy_bits(skb, 0, &mask, 4))
+ u32 _mask, *mp;
+
+ mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
+ if (mp == NULL)
BUG();
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
- if (mask == ifa->ifa_mask &&
+ if (*mp == ifa->ifa_mask &&
inet_ifa_match(rt->rt_src, ifa))
break;
}
if (!ifa && net_ratelimit()) {
printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from "
"%s/%u.%u.%u.%u\n",
- NIPQUAD(mask), dev->name, NIPQUAD(rt->rt_src));
+ NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src));
}
}
- read_unlock(&in_dev->lock);
+ rcu_read_unlock();
in_dev_put(in_dev);
out:;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 01db76123d88..d1815d3efd6c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -487,7 +487,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
int type;
if (!pmc) {
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
@@ -499,7 +499,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
skb = add_grec(skb, pmc, type, 0, 0);
spin_unlock_bh(&pmc->lock);
}
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
} else {
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -541,8 +541,8 @@ static void igmpv3_send_cr(struct in_device *in_dev)
struct sk_buff *skb = NULL;
int type, dtype;
- read_lock(&in_dev->lock);
- write_lock_bh(&in_dev->mc_lock);
+ read_lock(&in_dev->mc_list_lock);
+ spin_lock_bh(&in_dev->mc_tomb_lock);
/* deleted MCA's */
pmc_prev = NULL;
@@ -575,7 +575,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
} else
pmc_prev = pmc;
}
- write_unlock_bh(&in_dev->mc_lock);
+ spin_unlock_bh(&in_dev->mc_tomb_lock);
/* change recs */
for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
@@ -601,7 +601,8 @@ static void igmpv3_send_cr(struct in_device *in_dev)
}
spin_unlock_bh(&pmc->lock);
}
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
+
if (!skb)
return;
(void) igmpv3_sendpack(skb);
@@ -759,14 +760,14 @@ static void igmp_heard_report(struct in_device *in_dev, u32 group)
if (group == IGMP_ALL_HOSTS)
return;
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
for (im=in_dev->mc_list; im!=NULL; im=im->next) {
if (im->multiaddr == group) {
igmp_stop_timer(im);
break;
}
}
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
}
static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
@@ -840,7 +841,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
* - Use the igmp->igmp_code field as the maximum
* delay possible
*/
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
for (im=in_dev->mc_list; im!=NULL; im=im->next) {
if (group && group != im->multiaddr)
continue;
@@ -856,7 +857,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
spin_unlock_bh(&im->lock);
igmp_mod_timer(im, max_delay);
}
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
}
int igmp_rcv(struct sk_buff *skb)
@@ -982,10 +983,10 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
}
spin_unlock_bh(&im->lock);
- write_lock_bh(&in_dev->mc_lock);
+ spin_lock_bh(&in_dev->mc_tomb_lock);
pmc->next = in_dev->mc_tomb;
in_dev->mc_tomb = pmc;
- write_unlock_bh(&in_dev->mc_lock);
+ spin_unlock_bh(&in_dev->mc_tomb_lock);
}
static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr)
@@ -993,7 +994,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr)
struct ip_mc_list *pmc, *pmc_prev;
struct ip_sf_list *psf, *psf_next;
- write_lock_bh(&in_dev->mc_lock);
+ spin_lock_bh(&in_dev->mc_tomb_lock);
pmc_prev = NULL;
for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) {
if (pmc->multiaddr == multiaddr)
@@ -1006,7 +1007,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr)
else
in_dev->mc_tomb = pmc->next;
}
- write_unlock_bh(&in_dev->mc_lock);
+ spin_unlock_bh(&in_dev->mc_tomb_lock);
if (pmc) {
for (psf=pmc->tomb; psf; psf=psf_next) {
psf_next = psf->sf_next;
@@ -1021,10 +1022,10 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
{
struct ip_mc_list *pmc, *nextpmc;
- write_lock_bh(&in_dev->mc_lock);
+ spin_lock_bh(&in_dev->mc_tomb_lock);
pmc = in_dev->mc_tomb;
in_dev->mc_tomb = NULL;
- write_unlock_bh(&in_dev->mc_lock);
+ spin_unlock_bh(&in_dev->mc_tomb_lock);
for (; pmc; pmc = nextpmc) {
nextpmc = pmc->next;
@@ -1033,7 +1034,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
kfree(pmc);
}
/* clear dead sources, too */
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
struct ip_sf_list *psf, *psf_next;
@@ -1046,7 +1047,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
kfree(psf);
}
}
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
}
#endif
@@ -1167,10 +1168,10 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
im->gsquery = 0;
#endif
im->loaded = 0;
- write_lock_bh(&in_dev->lock);
+ write_lock_bh(&in_dev->mc_list_lock);
im->next=in_dev->mc_list;
in_dev->mc_list=im;
- write_unlock_bh(&in_dev->lock);
+ write_unlock_bh(&in_dev->mc_list_lock);
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, im->multiaddr);
#endif
@@ -1194,9 +1195,9 @@ void ip_mc_dec_group(struct in_device *in_dev, u32 addr)
for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
if (i->multiaddr==addr) {
if (--i->users == 0) {
- write_lock_bh(&in_dev->lock);
+ write_lock_bh(&in_dev->mc_list_lock);
*ip = i->next;
- write_unlock_bh(&in_dev->lock);
+ write_unlock_bh(&in_dev->mc_list_lock);
igmp_group_dropped(i);
if (!in_dev->dead)
@@ -1251,7 +1252,8 @@ void ip_mc_init_dev(struct in_device *in_dev)
in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
#endif
- in_dev->mc_lock = RW_LOCK_UNLOCKED;
+ in_dev->mc_list_lock = RW_LOCK_UNLOCKED;
+ in_dev->mc_tomb_lock = SPIN_LOCK_UNLOCKED;
}
/* Device going up */
@@ -1281,17 +1283,17 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
/* Deactivate timers */
ip_mc_down(in_dev);
- write_lock_bh(&in_dev->lock);
+ write_lock_bh(&in_dev->mc_list_lock);
while ((i = in_dev->mc_list) != NULL) {
in_dev->mc_list = i->next;
- write_unlock_bh(&in_dev->lock);
+ write_unlock_bh(&in_dev->mc_list_lock);
igmp_group_dropped(i);
ip_ma_put(i);
- write_lock_bh(&in_dev->lock);
+ write_lock_bh(&in_dev->mc_list_lock);
}
- write_unlock_bh(&in_dev->lock);
+ write_unlock_bh(&in_dev->mc_list_lock);
}
static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
@@ -1391,18 +1393,18 @@ int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
if (!in_dev)
return -ENODEV;
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
if (*pmca == pmc->multiaddr)
break;
}
if (!pmc) {
/* MCA not found?? bug */
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
return -ESRCH;
}
spin_lock_bh(&pmc->lock);
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
#ifdef CONFIG_IP_MULTICAST
sf_markstate(pmc);
#endif
@@ -1527,18 +1529,18 @@ int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
if (!in_dev)
return -ENODEV;
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
if (*pmca == pmc->multiaddr)
break;
}
if (!pmc) {
/* MCA not found?? bug */
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
return -ESRCH;
}
spin_lock_bh(&pmc->lock);
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
#ifdef CONFIG_IP_MULTICAST
sf_markstate(pmc);
@@ -2095,7 +2097,7 @@ int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto)
struct ip_sf_list *psf;
int rv = 0;
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
for (im=in_dev->mc_list; im; im=im->next) {
if (im->multiaddr == mc_addr)
break;
@@ -2117,7 +2119,7 @@ int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto)
} else
rv = 1; /* unspecified source; tentatively allow */
}
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
return rv;
}
@@ -2141,13 +2143,13 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
in_dev = in_dev_get(state->dev);
if (!in_dev)
continue;
- read_lock(&in_dev->lock);
+ read_lock(&in_dev->mc_list_lock);
im = in_dev->mc_list;
if (im) {
state->in_dev = in_dev;
break;
}
- read_unlock(&in_dev->lock);
+ read_unlock(&in_dev->mc_list_lock);
in_dev_put(in_dev);
}
return im;
@@ -2159,7 +2161,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
im = im->next;
while (!im) {
if (likely(state->in_dev != NULL)) {
- read_unlock(&state->in_dev->lock);
+ read_unlock(&state->in_dev->mc_list_lock);
in_dev_put(state->in_dev);
}
state->dev = state->dev->next;
@@ -2170,7 +2172,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
state->in_dev = in_dev_get(state->dev);
if (!state->in_dev)
continue;
- read_lock(&state->in_dev->lock);
+ read_lock(&state->in_dev->mc_list_lock);
im = state->in_dev->mc_list;
}
return im;
@@ -2206,7 +2208,7 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
{
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
if (likely(state->in_dev != NULL)) {
- read_unlock(&state->in_dev->lock);
+ read_unlock(&state->in_dev->mc_list_lock);
in_dev_put(state->in_dev);
state->in_dev = NULL;
}
@@ -2304,7 +2306,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
idev = in_dev_get(state->dev);
if (unlikely(idev == NULL))
continue;
- read_lock_bh(&idev->lock);
+ read_lock(&idev->mc_list_lock);
im = idev->mc_list;
if (likely(im != NULL)) {
spin_lock_bh(&im->lock);
@@ -2316,7 +2318,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
}
spin_unlock_bh(&im->lock);
}
- read_unlock_bh(&idev->lock);
+ read_unlock(&idev->mc_list_lock);
in_dev_put(idev);
}
return psf;
@@ -2332,7 +2334,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
state->im = state->im->next;
while (!state->im) {
if (likely(state->idev != NULL)) {
- read_unlock_bh(&state->idev->lock);
+ read_unlock(&state->idev->mc_list_lock);
in_dev_put(state->idev);
}
state->dev = state->dev->next;
@@ -2343,7 +2345,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
state->idev = in_dev_get(state->dev);
if (!state->idev)
continue;
- read_lock_bh(&state->idev->lock);
+ read_lock(&state->idev->mc_list_lock);
state->im = state->idev->mc_list;
}
if (!state->im)
@@ -2389,7 +2391,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
state->im = NULL;
}
if (likely(state->idev != NULL)) {
- read_unlock_bh(&state->idev->lock);
+ read_unlock(&state->idev->mc_list_lock);
in_dev_put(state->idev);
state->idev = NULL;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 6fd69feffce4..b9f1586ae455 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -169,14 +169,18 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
/* Memory Tracking Functions. */
-static __inline__ void frag_kfree_skb(struct sk_buff *skb)
+static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
{
+ if (work)
+ *work -= skb->truesize;
atomic_sub(skb->truesize, &ip_frag_mem);
kfree_skb(skb);
}
-static __inline__ void frag_free_queue(struct ipq *qp)
+static __inline__ void frag_free_queue(struct ipq *qp, int *work)
{
+ if (work)
+ *work -= sizeof(struct ipq);
atomic_sub(sizeof(struct ipq), &ip_frag_mem);
kfree(qp);
}
@@ -195,7 +199,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
/* Destruction primitives. */
/* Complete destruction of ipq. */
-static void ip_frag_destroy(struct ipq *qp)
+static void ip_frag_destroy(struct ipq *qp, int *work)
{
struct sk_buff *fp;
@@ -207,18 +211,18 @@ static void ip_frag_destroy(struct ipq *qp)
while (fp) {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(fp);
+ frag_kfree_skb(fp, work);
fp = xp;
}
/* Finally, release the queue descriptor itself. */
- frag_free_queue(qp);
+ frag_free_queue(qp, work);
}
-static __inline__ void ipq_put(struct ipq *ipq)
+static __inline__ void ipq_put(struct ipq *ipq, int *work)
{
if (atomic_dec_and_test(&ipq->refcnt))
- ip_frag_destroy(ipq);
+ ip_frag_destroy(ipq, work);
}
/* Kill ipq entry. It is not destroyed immediately,
@@ -243,10 +247,13 @@ static void ip_evictor(void)
{
struct ipq *qp;
struct list_head *tmp;
+ int work;
- for(;;) {
- if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh)
- return;
+ work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
+ if (work <= 0)
+ return;
+
+ while (work > 0) {
read_lock(&ipfrag_lock);
if (list_empty(&ipq_lru_list)) {
read_unlock(&ipfrag_lock);
@@ -262,7 +269,7 @@ static void ip_evictor(void)
ipq_kill(qp);
spin_unlock(&qp->lock);
- ipq_put(qp);
+ ipq_put(qp, &work);
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
}
}
@@ -294,7 +301,7 @@ static void ip_expire(unsigned long arg)
}
out:
spin_unlock(&qp->lock);
- ipq_put(qp);
+ ipq_put(qp, NULL);
}
/* Creation primitives. */
@@ -317,7 +324,7 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
atomic_inc(&qp->refcnt);
write_unlock(&ipfrag_lock);
qp_in->last_in |= COMPLETE;
- ipq_put(qp_in);
+ ipq_put(qp_in, NULL);
return qp;
}
}
@@ -506,7 +513,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->fragments = next;
qp->meat -= free_it->len;
- frag_kfree_skb(free_it);
+ frag_kfree_skb(free_it, NULL);
}
}
@@ -657,7 +664,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
ret = ip_frag_reasm(qp, dev);
spin_unlock(&qp->lock);
- ipq_put(qp);
+ ipq_put(qp, NULL);
return ret;
}
diff --git a/net/ipv4/ip_nat_dumb.c b/net/ipv4/ip_nat_dumb.c
deleted file mode 100644
index b58b5e22d019..000000000000
--- a/net/ipv4/ip_nat_dumb.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * Dumb Network Address Translation.
- *
- * Version: $Id: ip_nat_dumb.c,v 1.11 2000/12/13 18:31:48 davem Exp $
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Fixes:
- * Rani Assaf : A zero checksum is a special case
- * only in UDP
- * Rani Assaf : Added ICMP messages rewriting
- * Rani Assaf : Repaired wrong changes, made by ANK.
- *
- *
- * NOTE: It is just working model of real NAT.
- */
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/netdevice.h>
-#include <net/sock.h>
-#include <net/ip.h>
-#include <net/icmp.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <linux/route.h>
-#include <net/route.h>
-#include <net/ip_fib.h>
-
-
-int
-ip_do_nat(struct sk_buff *skb)
-{
- struct rtable *rt = (struct rtable*)skb->dst;
- struct iphdr *iph = skb->nh.iph;
- u32 odaddr = iph->daddr;
- u32 osaddr = iph->saddr;
- u16 check;
-
- IPCB(skb)->flags |= IPSKB_TRANSLATED;
-
- /* Rewrite IP header */
- iph->daddr = rt->rt_dst_map;
- iph->saddr = rt->rt_src_map;
- iph->check = 0;
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-
- /* If it is the first fragment, rewrite protocol headers */
-
- if (!(iph->frag_off & htons(IP_OFFSET))) {
- u16 *cksum;
-
- switch(iph->protocol) {
- case IPPROTO_TCP:
- cksum = (u16*)&((struct tcphdr*)(((char*)iph) + (iph->ihl<<2)))->check;
- if ((u8*)(cksum+1) > skb->tail)
- goto truncated;
- check = *cksum;
- if (skb->ip_summed != CHECKSUM_HW)
- check = ~check;
- check = csum_tcpudp_magic(iph->saddr, iph->daddr, 0, 0, check);
- check = csum_tcpudp_magic(~osaddr, ~odaddr, 0, 0, ~check);
- if (skb->ip_summed == CHECKSUM_HW)
- check = ~check;
- *cksum = check;
- break;
- case IPPROTO_UDP:
- cksum = (u16*)&((struct udphdr*)(((char*)iph) + (iph->ihl<<2)))->check;
- if ((u8*)(cksum+1) > skb->tail)
- goto truncated;
- if ((check = *cksum) != 0) {
- check = csum_tcpudp_magic(iph->saddr, iph->daddr, 0, 0, ~check);
- check = csum_tcpudp_magic(~osaddr, ~odaddr, 0, 0, ~check);
- *cksum = check ? : 0xFFFF;
- }
- break;
- case IPPROTO_ICMP:
- {
- struct icmphdr *icmph = (struct icmphdr*)((char*)iph + (iph->ihl<<2));
- struct iphdr *ciph;
- u32 idaddr, isaddr;
- int updated;
-
- if ((icmph->type != ICMP_DEST_UNREACH) &&
- (icmph->type != ICMP_TIME_EXCEEDED) &&
- (icmph->type != ICMP_PARAMETERPROB))
- break;
-
- ciph = (struct iphdr *) (icmph + 1);
-
- if ((u8*)(ciph+1) > skb->tail)
- goto truncated;
-
- isaddr = ciph->saddr;
- idaddr = ciph->daddr;
- updated = 0;
-
- if (rt->rt_flags&RTCF_DNAT && ciph->saddr == odaddr) {
- ciph->saddr = iph->daddr;
- updated = 1;
- }
- if (rt->rt_flags&RTCF_SNAT) {
- if (ciph->daddr != osaddr) {
- struct fib_result res;
- unsigned flags = 0;
- struct flowi fl = {
- .iif = skb->dev->ifindex,
- .nl_u =
- { .ip4_u =
- { .daddr = ciph->saddr,
- .saddr = ciph->daddr,
-#ifdef CONFIG_IP_ROUTE_TOS
- .tos = RT_TOS(ciph->tos)
-#endif
- } },
- .proto = ciph->protocol };
-
- /* Use fib_lookup() until we get our own
- * hash table of NATed hosts -- Rani
- */
- if (fib_lookup(&fl, &res) == 0) {
- if (res.r) {
- ciph->daddr = fib_rules_policy(ciph->daddr, &res, &flags);
- if (ciph->daddr != idaddr)
- updated = 1;
- }
- fib_res_put(&res);
- }
- } else {
- ciph->daddr = iph->saddr;
- updated = 1;
- }
- }
- if (updated) {
- cksum = &icmph->checksum;
- /* Using tcpudp primitive. Why not? */
- check = csum_tcpudp_magic(ciph->saddr, ciph->daddr, 0, 0, ~(*cksum));
- *cksum = csum_tcpudp_magic(~isaddr, ~idaddr, 0, 0, ~check);
- }
- break;
- }
- default:
- break;
- }
- }
- return NET_RX_SUCCESS;
-
-truncated:
- /* should be return NET_RX_BAD; */
- return -EINVAL;
-}
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 7ce7469a3c04..095028111e64 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -120,20 +120,20 @@ out:
return err;
}
-static int ipcomp_output(struct sk_buff **pskb)
+static int ipcomp_output(struct sk_buff *skb)
{
int err;
- struct dst_entry *dst = (*pskb)->dst;
+ struct dst_entry *dst = skb->dst;
struct xfrm_state *x = dst->xfrm;
struct iphdr *iph;
struct ip_comp_hdr *ipch;
struct ipcomp_data *ipcd = x->data;
int hdr_len = 0;
- iph = (*pskb)->nh.iph;
- iph->tot_len = htons((*pskb)->len);
+ iph = skb->nh.iph;
+ iph->tot_len = htons(skb->len);
hdr_len = iph->ihl * 4;
- if (((*pskb)->len - hdr_len) < ipcd->threshold) {
+ if ((skb->len - hdr_len) < ipcd->threshold) {
/* Don't bother compressing */
if (x->props.mode) {
ip_send_check(iph);
@@ -141,17 +141,17 @@ static int ipcomp_output(struct sk_buff **pskb)
goto out_ok;
}
- if ((skb_is_nonlinear(*pskb) || skb_cloned(*pskb)) &&
- skb_linearize(*pskb, GFP_ATOMIC) != 0) {
+ if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+ skb_linearize(skb, GFP_ATOMIC) != 0) {
err = -ENOMEM;
goto error;
}
- err = ipcomp_compress(x, *pskb);
+ err = ipcomp_compress(x, skb);
if (err) {
if (err == -EMSGSIZE) {
if (x->props.mode) {
- iph = (*pskb)->nh.iph;
+ iph = skb->nh.iph;
ip_send_check(iph);
}
goto out_ok;
@@ -160,8 +160,8 @@ static int ipcomp_output(struct sk_buff **pskb)
}
/* Install ipcomp header, convert into ipcomp datagram. */
- iph = (*pskb)->nh.iph;
- iph->tot_len = htons((*pskb)->len);
+ iph = skb->nh.iph;
+ iph->tot_len = htons(skb->len);
ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4);
ipch->nexthdr = iph->protocol;
ipch->flags = 0;
@@ -288,6 +288,9 @@ static int ipcomp_init_state(struct xfrm_state *x, void *args)
if (!x->calg)
goto out;
+ if (x->encap)
+ goto out;
+
err = -ENOMEM;
ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
if (!ipcd)
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 9f4b94f0768d..323a1e7746b8 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -383,21 +383,23 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_conn *cp = NULL;
struct iphdr *iph = skb->nh.iph;
struct ip_vs_dest *dest;
- __u16 ports[2];
+ __u16 _ports[2], *pptr;
- if (skb_copy_bits(skb, iph->ihl*4, ports, sizeof(ports)) < 0)
+ pptr = skb_header_pointer(skb, iph->ihl*4,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
return NULL;
/*
* Persistent service
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
- return ip_vs_sched_persist(svc, skb, ports);
+ return ip_vs_sched_persist(svc, skb, pptr);
/*
* Non-persistent service
*/
- if (!svc->fwmark && ports[1] != svc->port) {
+ if (!svc->fwmark && pptr[1] != svc->port) {
if (!svc->port)
IP_VS_ERR("Schedule: port zero only supported "
"in persistent services, "
@@ -415,9 +417,9 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* Create a connection entry.
*/
cp = ip_vs_conn_new(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1],
- dest->addr, dest->port?dest->port:ports[1],
+ iph->saddr, pptr[0],
+ iph->daddr, pptr[1],
+ dest->addr, dest->port?dest->port:pptr[1],
0,
dest);
if (cp == NULL)
@@ -444,10 +446,12 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
- __u16 ports[2];
+ __u16 _ports[2], *pptr;
struct iphdr *iph = skb->nh.iph;
- if (skb_copy_bits(skb, iph->ihl*4, ports, sizeof(ports)) < 0) {
+ pptr = skb_header_pointer(skb, iph->ihl*4,
+ sizeof(_ports), _ports);
+ if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP;
}
@@ -465,8 +469,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
/* create a new connection entry */
IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
cp = ip_vs_conn_new(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1],
+ iph->saddr, pptr[0],
+ iph->daddr, pptr[1],
0, 0,
IP_VS_CONN_F_BYPASS,
NULL);
@@ -494,7 +498,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets.
*/
- if ((svc->port == FTPPORT) && (ports[1] != FTPPORT)) {
+ if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
ip_vs_service_put(svc);
return NF_ACCEPT;
}
@@ -607,8 +611,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
{
struct sk_buff *skb = *pskb;
struct iphdr *iph;
- struct icmphdr icmph;
- struct iphdr ciph; /* The ip header contained within the ICMP */
+ struct icmphdr _icmph, *ic;
+ struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset, ihl, verdict;
@@ -625,11 +629,12 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
iph = skb->nh.iph;
offset = ihl = iph->ihl * 4;
- if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0)
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
return NF_DROP;
IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
- icmph.type, ntohs(icmp_id(&icmph)),
+ ic->type, ntohs(icmp_id(ic)),
NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
/*
@@ -639,33 +644,34 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
* this means that some packets will manage to get a long way
* down this stack and then be rejected, but that's life.
*/
- if ((icmph.type != ICMP_DEST_UNREACH) &&
- (icmph.type != ICMP_SOURCE_QUENCH) &&
- (icmph.type != ICMP_TIME_EXCEEDED)) {
+ if ((ic->type != ICMP_DEST_UNREACH) &&
+ (ic->type != ICMP_SOURCE_QUENCH) &&
+ (ic->type != ICMP_TIME_EXCEEDED)) {
*related = 0;
return NF_ACCEPT;
}
/* Now find the contained IP header */
- offset += sizeof(icmph);
- if (skb_copy_bits(skb, offset, &ciph, sizeof(ciph)) < 0)
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(ciph.protocol);
+ pp = ip_vs_proto_get(cih->protocol);
if (!pp)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(ciph.frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
- offset += ciph.ihl * 4;
+ offset += cih->ihl * 4;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(skb, pp, cih, offset, 1);
if (!cp)
return NF_ACCEPT;
@@ -685,7 +691,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
goto out;
}
- if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol)
+ if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
if (!ip_vs_make_skb_writable(pskb, offset))
goto out;
@@ -707,11 +713,13 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
static inline int is_tcp_reset(const struct sk_buff *skb)
{
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
- if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) < 0)
+ th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return 0;
- return tcph.rst;
+ return th->rst;
}
/*
@@ -777,12 +785,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP)) {
- __u16 ports[2];
+ __u16 _ports[2], *pptr;
- if (skb_copy_bits(skb, ihl, ports, sizeof(ports)) < 0)
+ pptr = skb_header_pointer(skb, ihl,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(iph->protocol,
- iph->saddr, ports[0])) {
+ iph->saddr, pptr[0])) {
/*
* Notify the real server: there is no
* existing entry if it is not RST
@@ -866,8 +876,8 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
{
struct sk_buff *skb = *pskb;
struct iphdr *iph;
- struct icmphdr icmph;
- struct iphdr ciph; /* The ip header contained within the ICMP */
+ struct icmphdr _icmph, *ic;
+ struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset, ihl, verdict;
@@ -884,11 +894,12 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
iph = skb->nh.iph;
offset = ihl = iph->ihl * 4;
- if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0)
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
return NF_DROP;
IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
- icmph.type, ntohs(icmp_id(&icmph)),
+ ic->type, ntohs(icmp_id(ic)),
NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
/*
@@ -898,33 +909,34 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
* this means that some packets will manage to get a long way
* down this stack and then be rejected, but that's life.
*/
- if ((icmph.type != ICMP_DEST_UNREACH) &&
- (icmph.type != ICMP_SOURCE_QUENCH) &&
- (icmph.type != ICMP_TIME_EXCEEDED)) {
+ if ((ic->type != ICMP_DEST_UNREACH) &&
+ (ic->type != ICMP_SOURCE_QUENCH) &&
+ (ic->type != ICMP_TIME_EXCEEDED)) {
*related = 0;
return NF_ACCEPT;
}
/* Now find the contained IP header */
- offset += sizeof(icmph);
- if (skb_copy_bits(skb, offset, &ciph, sizeof(ciph)) < 0)
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(ciph.protocol);
+ pp = ip_vs_proto_get(cih->protocol);
if (!pp)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(ciph.frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
- offset += ciph.ihl * 4;
+ offset += cih->ihl * 4;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(skb, pp, &ciph, offset, 1);
+ cp = pp->conn_in_get(skb, pp, cih, offset, 1);
if (!cp)
return NF_ACCEPT;
@@ -941,7 +953,7 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
- if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol)
+ if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
/* do not touch skb anymore */
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index caf24e3754c6..dfd0a7dd3b75 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -166,27 +166,33 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
const char *msg)
{
char buf[128];
- __u16 ports[2];
- struct iphdr iph;
+ struct iphdr _iph, *ih;
- if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0)
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
sprintf(buf, "%s TRUNCATED", pp->name);
- else if (iph.frag_off & __constant_htons(IP_OFFSET))
+ else if (ih->frag_off & __constant_htons(IP_OFFSET))
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
- pp->name, NIPQUAD(iph.saddr),
- NIPQUAD(iph.daddr));
- else if (skb_copy_bits(skb, offset + iph.ihl*4, ports, sizeof(ports)) < 0)
- sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
- pp->name,
- NIPQUAD(iph.saddr),
- NIPQUAD(iph.daddr));
- else
- sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
- pp->name,
- NIPQUAD(iph.saddr),
- ntohs(ports[0]),
- NIPQUAD(iph.daddr),
- ntohs(ports[1]));
+ pp->name, NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr));
+ else {
+ __u16 _ports[2], *pptr
+;
+ pptr = skb_header_pointer(skb, offset + ih->ihl*4,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
+ sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
+ pp->name,
+ NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr));
+ else
+ sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
+ pp->name,
+ NIPQUAD(ih->saddr),
+ ntohs(pptr[0]),
+ NIPQUAD(ih->daddr),
+ ntohs(pptr[1]));
+ }
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index ffea536b09a8..453e94a0bbd7 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -129,14 +129,15 @@ ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
char buf[256];
- struct iphdr iph;
+ struct iphdr _iph, *ih;
- if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0)
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
sprintf(buf, "%s TRUNCATED", pp->name);
else
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
- pp->name, NIPQUAD(iph.saddr),
- NIPQUAD(iph.daddr));
+ pp->name, NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr));
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index 1922388327b8..478e5c7c7e8e 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -128,14 +128,15 @@ esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
char buf[256];
- struct iphdr iph;
+ struct iphdr _iph, *ih;
- if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0)
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
sprintf(buf, "%s TRUNCATED", pp->name);
else
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
- pp->name, NIPQUAD(iph.saddr),
- NIPQUAD(iph.daddr));
+ pp->name, NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr));
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_icmp.c b/net/ipv4/ipvs/ip_vs_proto_icmp.c
index d611b5a36d48..747e0333f5de 100644
--- a/net/ipv4/ipvs/ip_vs_proto_icmp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_icmp.c
@@ -104,24 +104,29 @@ icmp_debug_packet(struct ip_vs_protocol *pp,
const char *msg)
{
char buf[256];
- struct iphdr iph;
- struct icmphdr icmph;
+ struct iphdr _iph, *ih;
- if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0)
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
sprintf(buf, "%s TRUNCATED", pp->name);
- else if (iph.frag_off & __constant_htons(IP_OFFSET))
+ else if (ih->frag_off & __constant_htons(IP_OFFSET))
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
- pp->name, NIPQUAD(iph.saddr),
- NIPQUAD(iph.daddr));
- else if (skb_copy_bits(skb, offset + iph.ihl*4, &icmph, sizeof(icmph)) < 0)
- sprintf(buf, "%s TRUNCATED to %u bytes\n",
- pp->name, skb->len - offset);
- else
- sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d",
- pp->name, NIPQUAD(iph.saddr),
- NIPQUAD(iph.daddr),
- icmph.type, icmph.code);
-
+ pp->name, NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr));
+ else {
+ struct icmphdr _icmph, *ic;
+
+ ic = skb_header_pointer(skb, offset + ih->ihl*4,
+ sizeof(_icmph), &_icmph);
+ if (ic == NULL)
+ sprintf(buf, "%s TRUNCATED to %u bytes\n",
+ pp->name, skb->len - offset);
+ else
+ sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d",
+ pp->name, NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr),
+ ic->type, ic->code);
+ }
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 2f00e914288a..bd8f898bfe19 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -29,19 +29,20 @@ static struct ip_vs_conn *
tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
- __u16 ports[2];
+ __u16 _ports[2], *pptr;
- if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
return ip_vs_conn_in_get(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1]);
+ iph->saddr, pptr[0],
+ iph->daddr, pptr[1]);
} else {
return ip_vs_conn_in_get(iph->protocol,
- iph->daddr, ports[1],
- iph->saddr, ports[0]);
+ iph->daddr, pptr[1],
+ iph->saddr, pptr[0]);
}
}
@@ -49,19 +50,20 @@ static struct ip_vs_conn *
tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
- __u16 ports[2];
+ __u16 _ports[2], *pptr;
- if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
return ip_vs_conn_out_get(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1]);
+ iph->saddr, pptr[0],
+ iph->daddr, pptr[1]);
} else {
return ip_vs_conn_out_get(iph->protocol,
- iph->daddr, ports[1],
- iph->saddr, ports[0]);
+ iph->daddr, pptr[1],
+ iph->saddr, pptr[0]);
}
}
@@ -72,16 +74,18 @@ tcp_conn_schedule(struct sk_buff *skb,
int *verdict, struct ip_vs_conn **cpp)
{
struct ip_vs_service *svc;
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
+ th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
*verdict = NF_DROP;
return 0;
}
- if (tcph.syn &&
+ if (th->syn &&
(svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
- skb->nh.iph->daddr, tcph.dest))) {
+ skb->nh.iph->daddr, th->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -483,13 +487,15 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0)
+ th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return 0;
spin_lock(&cp->lock);
- set_tcp_state(pp, cp, direction, &tcph);
+ set_tcp_state(pp, cp, direction, th);
spin_unlock(&cp->lock);
return 1;
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 81501c938605..443ec4578d40 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -26,19 +26,20 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
struct ip_vs_conn *cp;
- __u16 ports[2];
+ __u16 _ports[2], *pptr;
- if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1]);
+ iph->saddr, pptr[0],
+ iph->daddr, pptr[1]);
} else {
cp = ip_vs_conn_in_get(iph->protocol,
- iph->daddr, ports[1],
- iph->saddr, ports[0]);
+ iph->daddr, pptr[1],
+ iph->saddr, pptr[0]);
}
return cp;
@@ -50,19 +51,21 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
struct ip_vs_conn *cp;
- __u16 ports[2];
+ __u16 _ports[2], *pptr;
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, ports, sizeof(ports)) < 0)
+ pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1]);
+ iph->saddr, pptr[0],
+ iph->daddr, pptr[1]);
} else {
cp = ip_vs_conn_out_get(iph->protocol,
- iph->daddr, ports[1],
- iph->saddr, ports[0]);
+ iph->daddr, pptr[1],
+ iph->saddr, pptr[0]);
}
return cp;
@@ -74,15 +77,17 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
struct ip_vs_service *svc;
- struct udphdr udph;
+ struct udphdr _udph, *uh;
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
+ uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL) {
*verdict = NF_DROP;
return 0;
}
if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
- skb->nh.iph->daddr, udph.dest))) {
+ skb->nh.iph->daddr, uh->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -230,13 +235,14 @@ udp_dnat_handler(struct sk_buff **pskb,
static int
udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
{
- struct udphdr udph;
+ struct udphdr _udph, *uh;
unsigned int udphoff = skb->nh.iph->ihl*4;
- if (skb_copy_bits(skb, udphoff, &udph, sizeof(udph)) < 0)
+ uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
+ if (uh == NULL)
return 0;
- if (udph.check != 0) {
+ if (uh->check != 0) {
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, udphoff,
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 204767be411e..3a85f7a8d02a 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -234,11 +234,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
- __u16 pt;
- if (skb_copy_bits(skb, iph->ihl*4, &pt, sizeof(pt)) < 0)
+ __u16 _pt, *p;
+ p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
+ if (p == NULL)
goto tx_error;
- ip_vs_conn_fill_cport(cp, pt);
- IP_VS_DBG(10, "filled cport=%d\n", ntohs(pt));
+ ip_vs_conn_fill_cport(cp, *p);
+ IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index b58141ead442..07c0fb9044b8 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -628,5 +628,17 @@ config IP_NF_MATCH_REALM
If you want to compile it as a module, say M here and read
Documentation/modules.txt. If unsure, say `N'.
+config IP_NF_CT_ACCT
+ bool "Connection tracking flow accounting"
+ depends on IP_NF_CONNTRACK
+
+config IP_NF_MATCH_SCTP
+ tristate 'SCTP protocol match support'
+ depends on IP_NF_IPTABLES
+
+config IP_NF_CT_PROTO_SCTP
+ tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
+ depends on IP_NF_CONNTRACK && EXPERIMENTAL
+
endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index bdb23fde133f..164f4332a72d 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -19,6 +19,9 @@ ipchains-objs := $(ip_nf_compat-objs) ipchains_core.o
# connection tracking
obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+# SCTP protocol connection tracking
+obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
+
# connection tracking helpers
obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
@@ -43,6 +46,7 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
# matches
obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
+obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index 4e8f4d83baf2..a54ef782f8b5 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
+#include <linux/moduleparam.h>
#include <net/checksum.h>
#include <net/udp.h>
@@ -34,7 +35,7 @@ static unsigned int master_timeout = 300;
MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
MODULE_DESCRIPTION("Amanda connection tracking module");
MODULE_LICENSE("GPL");
-MODULE_PARM(master_timeout, "i");
+module_param(master_timeout, int, 0600);
MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
static char *conns[] = { "DATA ", "MESG ", "INDEX " };
@@ -48,7 +49,7 @@ static int help(struct sk_buff *skb,
{
struct ip_conntrack_expect *exp;
struct ip_ct_amanda_expect *exp_amanda_info;
- char *data, *data_limit, *tmp;
+ char *amp, *data, *data_limit, *tmp;
unsigned int dataoff, i;
u_int16_t port, len;
@@ -58,7 +59,7 @@ static int help(struct sk_buff *skb,
/* increase the UDP timeout of the master connection as replies from
* Amanda clients to the server can be quite delayed */
- ip_ct_refresh(ct, master_timeout * HZ);
+ ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
/* No data? */
dataoff = skb->nh.iph->ihl*4 + sizeof(struct udphdr);
@@ -69,9 +70,11 @@ static int help(struct sk_buff *skb,
}
LOCK_BH(&amanda_buffer_lock);
- skb_copy_bits(skb, dataoff, amanda_buffer, skb->len - dataoff);
- data = amanda_buffer;
- data_limit = amanda_buffer + skb->len - dataoff;
+ amp = skb_header_pointer(skb, dataoff,
+ skb->len - dataoff, amanda_buffer);
+ BUG_ON(amp == NULL);
+ data = amp;
+ data_limit = amp + skb->len - dataoff;
*data_limit = '\0';
/* Search for the CONNECT string */
@@ -107,7 +110,7 @@ static int help(struct sk_buff *skb,
exp->mask.dst.u.tcp.port = 0xFFFF;
exp_amanda_info = &exp->help.exp_amanda_info;
- exp_amanda_info->offset = tmp - amanda_buffer;
+ exp_amanda_info->offset = tmp - amp;
exp_amanda_info->port = port;
exp_amanda_info->len = len;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 00a89f4f8d8b..f6def5a4b491 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -34,8 +34,9 @@
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/jhash.h>
-/* For ERR_PTR(). Yeah, I know... --RR */
-#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
/* This rwlock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
@@ -59,17 +60,23 @@
DECLARE_RWLOCK(ip_conntrack_lock);
DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
+/* ip_conntrack_standalone needs this */
+atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL(ip_conntrack_count);
+
void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
LIST_HEAD(ip_conntrack_expect_list);
LIST_HEAD(protocol_list);
static LIST_HEAD(helpers);
unsigned int ip_conntrack_htable_size = 0;
int ip_conntrack_max;
-static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
struct list_head *ip_conntrack_hash;
static kmem_cache_t *ip_conntrack_cachep;
+static kmem_cache_t *ip_conntrack_expect_cachep;
struct ip_conntrack ip_conntrack_untracked;
+DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
@@ -127,11 +134,11 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple)
}
int
-get_tuple(const struct iphdr *iph,
- const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_protocol *protocol)
+ip_ct_get_tuple(const struct iphdr *iph,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_protocol *protocol)
{
/* Never happen */
if (iph->frag_off & htons(IP_OFFSET)) {
@@ -147,10 +154,10 @@ get_tuple(const struct iphdr *iph,
return protocol->pkt_to_tuple(skb, dataoff, tuple);
}
-static int
-invert_tuple(struct ip_conntrack_tuple *inverse,
- const struct ip_conntrack_tuple *orig,
- const struct ip_conntrack_protocol *protocol)
+int
+ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
+ const struct ip_conntrack_tuple *orig,
+ const struct ip_conntrack_protocol *protocol)
{
inverse->src.ip = orig->dst.ip;
inverse->dst.ip = orig->src.ip;
@@ -177,7 +184,8 @@ destroy_expect(struct ip_conntrack_expect *exp)
IP_NF_ASSERT(atomic_read(&exp->use) == 0);
IP_NF_ASSERT(!timer_pending(&exp->timeout));
- kfree(exp);
+ kmem_cache_free(ip_conntrack_expect_cachep, exp);
+ __get_cpu_var(ip_conntrack_stat).expect_delete++;
}
inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
@@ -336,7 +344,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
list_del(&ct->master->expected_list);
master = ct->master->expectant;
}
- kfree(ct->master);
+ kmem_cache_free(ip_conntrack_expect_cachep, ct->master);
}
WRITE_UNLOCK(&ip_conntrack_lock);
@@ -346,12 +354,15 @@ destroy_conntrack(struct nf_conntrack *nfct)
DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
kmem_cache_free(ip_conntrack_cachep, ct);
atomic_dec(&ip_conntrack_count);
+ __get_cpu_var(ip_conntrack_stat).delete++;
}
static void death_by_timeout(unsigned long ul_conntrack)
{
struct ip_conntrack *ct = (void *)ul_conntrack;
+ __get_cpu_var(ip_conntrack_stat).delete_list++;
+
WRITE_LOCK(&ip_conntrack_lock);
clean_from_lists(ct);
WRITE_UNLOCK(&ip_conntrack_lock);
@@ -374,13 +385,19 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
{
struct ip_conntrack_tuple_hash *h;
unsigned int hash = hash_conntrack(tuple);
+ /* use per_cpu() to avoid multiple calls to smp_processor_id() */
+ unsigned int cpu = smp_processor_id();
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
- h = LIST_FIND(&ip_conntrack_hash[hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- tuple, ignored_conntrack);
- return h;
+ list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
+ if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+ per_cpu(ip_conntrack_stat, cpu).found++;
+ return h;
+ }
+ per_cpu(ip_conntrack_stat, cpu).searched++;
+ }
+
+ return NULL;
}
/* Find a connection corresponding to a tuple. */
@@ -474,10 +491,12 @@ __ip_conntrack_confirm(struct nf_ct_info *nfct)
atomic_inc(&ct->ct_general.use);
set_bit(IPS_CONFIRMED_BIT, &ct->status);
WRITE_UNLOCK(&ip_conntrack_lock);
+ __get_cpu_var(ip_conntrack_stat).insert++;
return NF_ACCEPT;
}
WRITE_UNLOCK(&ip_conntrack_lock);
+ __get_cpu_var(ip_conntrack_stat).insert_failed++;
return NF_DROP;
}
@@ -496,83 +515,6 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
return h != NULL;
}
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-struct ip_conntrack *
-icmp_error_track(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct ip_conntrack_tuple innertuple, origtuple;
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } inside;
- struct ip_conntrack_protocol *innerproto;
- struct ip_conntrack_tuple_hash *h;
- int dataoff;
-
- IP_NF_ASSERT(skb->nfct == NULL);
-
- /* Not enough header? */
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
- return NULL;
-
- if (inside.icmp.type != ICMP_DEST_UNREACH
- && inside.icmp.type != ICMP_SOURCE_QUENCH
- && inside.icmp.type != ICMP_TIME_EXCEEDED
- && inside.icmp.type != ICMP_PARAMETERPROB
- && inside.icmp.type != ICMP_REDIRECT)
- return NULL;
-
- /* Ignore ICMP's containing fragments (shouldn't happen) */
- if (inside.ip.frag_off & htons(IP_OFFSET)) {
- DEBUGP("icmp_error_track: fragment of proto %u\n",
- inside.ip.protocol);
- return NULL;
- }
-
- innerproto = ip_ct_find_proto(inside.ip.protocol);
- dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
- /* Are they talking about one of our connections? */
- if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
- DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
- return NULL;
- }
-
- /* Ordinarily, we'd expect the inverted tupleproto, but it's
- been preserved inside the ICMP. */
- if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
- DEBUGP("icmp_error_track: Can't invert tuple\n");
- return NULL;
- }
-
- *ctinfo = IP_CT_RELATED;
-
- h = ip_conntrack_find_get(&innertuple, NULL);
- if (!h) {
- /* Locally generated ICMPs will match inverted if they
- haven't been SNAT'ed yet */
- /* FIXME: NAT code has to handle half-done double NAT --RR */
- if (hooknum == NF_IP_LOCAL_OUT)
- h = ip_conntrack_find_get(&origtuple, NULL);
-
- if (!h) {
- DEBUGP("icmp_error_track: no match\n");
- return NULL;
- }
- /* Reverse direction from that found */
- if (DIRECTION(h) != IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- } else {
- if (DIRECTION(h) == IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- }
-
- /* Update skb to refer to this connection */
- skb->nfct = &h->ctrack->infos[*ctinfo];
- return h->ctrack;
-}
-
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
@@ -598,6 +540,7 @@ static int early_drop(struct list_head *chain)
if (del_timer(&h->ctrack->timeout)) {
death_by_timeout((unsigned long)h->ctrack);
dropped = 1;
+ __get_cpu_var(ip_conntrack_stat).early_drop++;
}
ip_conntrack_put(h->ctrack);
return dropped;
@@ -654,7 +597,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
}
}
- if (!invert_tuple(&repl_tuple, tuple, protocol)) {
+ if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
DEBUGP("Can't invert tuple.\n");
return NULL;
}
@@ -693,41 +636,53 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_expect *, tuple);
READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
- /* If master is not in hash table yet (ie. packet hasn't left
- this machine yet), how can other end know about expected?
- Hence these are not the droids you are looking for (if
- master ct never got confirmed, we'd hold a reference to it
- and weird things would happen to future packets). */
- if (expected && !is_confirmed(expected->expectant))
- expected = NULL;
-
- /* Look up the conntrack helper for master connections only */
- if (!expected)
- conntrack->helper = ip_ct_find_helper(&repl_tuple);
+ if (expected) {
+ /* If master is not in hash table yet (ie. packet hasn't left
+ this machine yet), how can other end know about expected?
+ Hence these are not the droids you are looking for (if
+ master ct never got confirmed, we'd hold a reference to it
+ and weird things would happen to future packets). */
+ if (!is_confirmed(expected->expectant)) {
+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
+ goto end;
+ }
- /* If the expectation is dying, then this is a loser. */
- if (expected
- && expected->expectant->helper->timeout
- && ! del_timer(&expected->timeout))
- expected = NULL;
+ /* Expectation is dying... */
+ if (expected->expectant->helper->timeout
+ && !del_timer(&expected->timeout))
+ goto end;
- if (expected) {
DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
conntrack, expected);
/* Welcome, Mr. Bond. We've been expecting you... */
+ IP_NF_ASSERT(master_ct(conntrack));
__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
conntrack->master = expected;
expected->sibling = conntrack;
LIST_DELETE(&ip_conntrack_expect_list, expected);
expected->expectant->expecting--;
nf_conntrack_get(&master_ct(conntrack)->infos[0]);
+
+ /* this is a braindead... --pablo */
+ atomic_inc(&ip_conntrack_count);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ if (expected->expectfn)
+ expected->expectfn(conntrack);
+
+ __get_cpu_var(ip_conntrack_stat).expect_new++;
+
+ goto ret;
+ } else {
+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
+
+ __get_cpu_var(ip_conntrack_stat).new++;
}
- atomic_inc(&ip_conntrack_count);
+
+end: atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
- if (expected && expected->expectfn)
- expected->expectfn(conntrack);
- return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
+ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
}
/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
@@ -743,7 +698,8 @@ resolve_normal_ct(struct sk_buff *skb,
IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
- if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
+ if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
+ &tuple,proto))
return NULL;
/* look for tuple match */
@@ -823,38 +779,51 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
#endif
/* Previously seen (loopback or untracked)? Ignore. */
- if ((*pskb)->nfct)
+ if ((*pskb)->nfct) {
+ __get_cpu_var(ip_conntrack_stat).ignore++;
return NF_ACCEPT;
+ }
proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
- /* It may be an icmp error... */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
- && icmp_error_track(*pskb, &ctinfo, hooknum))
- return NF_ACCEPT;
+ /* It may be an special packet, error, unclean...
+ * inverse of the return code tells to the netfilter
+ * core what to do with the packet. */
+ if (proto->error != NULL
+ && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
+ __get_cpu_var(ip_conntrack_stat).icmp_error++;
+ return -ret;
+ }
- if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
+ if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
/* Not valid part of a connection */
+ __get_cpu_var(ip_conntrack_stat).invalid++;
return NF_ACCEPT;
+ }
- if (IS_ERR(ct))
+ if (IS_ERR(ct)) {
/* Too stressed to deal. */
+ __get_cpu_var(ip_conntrack_stat).drop++;
return NF_DROP;
+ }
IP_NF_ASSERT((*pskb)->nfct);
ret = proto->packet(ct, *pskb, ctinfo);
- if (ret == -1) {
- /* Invalid */
+ if (ret < 0) {
+ /* Invalid: inverse of the return code tells
+ * the netfilter core what to do*/
nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL;
- return NF_ACCEPT;
+ __get_cpu_var(ip_conntrack_stat).invalid++;
+ return -ret;
}
if (ret != NF_DROP && ct->helper) {
ret = ct->helper->help(*pskb, ct, ctinfo);
if (ret == -1) {
/* Invalid */
+ __get_cpu_var(ip_conntrack_stat).invalid++;
nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL;
return NF_ACCEPT;
@@ -869,7 +838,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
int invert_tuplepr(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig)
{
- return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
+ return ip_ct_invert_tuple(inverse, orig,
+ ip_ct_find_proto(orig->dst.protonum));
}
static inline int resent_expect(const struct ip_conntrack_expect *i,
@@ -923,9 +893,8 @@ struct ip_conntrack_expect *
ip_conntrack_expect_alloc(void)
{
struct ip_conntrack_expect *new;
-
- new = (struct ip_conntrack_expect *)
- kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
+
+ new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
if (!new) {
DEBUGP("expect_related: OOM allocating expect\n");
return NULL;
@@ -933,6 +902,7 @@ ip_conntrack_expect_alloc(void)
/* tuple_cmp compares whole union, we have to initialized cleanly */
memset(new, 0, sizeof(struct ip_conntrack_expect));
+ atomic_set(&new->use, 1);
return new;
}
@@ -944,7 +914,6 @@ ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
new->expectant = related_to;
new->sibling = NULL;
- atomic_set(&new->use, 1);
/* add to expected list for this connection */
list_add_tail(&new->expected_list, &related_to->sibling_list);
@@ -997,7 +966,8 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
}
WRITE_UNLOCK(&ip_conntrack_lock);
- kfree(expect);
+ /* This expectation is not inserted so no need to lock */
+ kmem_cache_free(ip_conntrack_expect_cachep, expect);
return -EEXIST;
} else if (related_to->helper->max_expected &&
@@ -1015,7 +985,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
related_to->helper->name,
NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
- kfree(expect);
+ kmem_cache_free(ip_conntrack_expect_cachep, expect);
return -EPERM;
}
DEBUGP("ip_conntrack: max number of expected "
@@ -1049,7 +1019,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
WRITE_UNLOCK(&ip_conntrack_lock);
DEBUGP("expect_related: busy!\n");
- kfree(expect);
+ kmem_cache_free(ip_conntrack_expect_cachep, expect);
return -EBUSY;
}
@@ -1057,6 +1027,8 @@ out: ip_conntrack_expect_insert(expect, related_to);
WRITE_UNLOCK(&ip_conntrack_lock);
+ __get_cpu_var(ip_conntrack_stat).expect_create++;
+
return ret;
}
@@ -1164,21 +1136,39 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
synchronize_net();
}
-/* Refresh conntrack for this many jiffies. */
-void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
+static inline void ct_add_counters(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_NF_CT_ACCT
+ if (skb) {
+ ct->counters[CTINFO2DIR(ctinfo)].packets++;
+ ct->counters[CTINFO2DIR(ctinfo)].bytes +=
+ ntohs(skb->nh.iph->tot_len);
+ }
+#endif
+}
+
+/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
+void ip_ct_refresh_acct(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb,
+ unsigned long extra_jiffies)
{
IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
/* If not in hash table, timer will not be active yet */
- if (!is_confirmed(ct))
+ if (!is_confirmed(ct)) {
ct->timeout.expires = extra_jiffies;
- else {
+ ct_add_counters(ct, ctinfo, skb);
+ } else {
WRITE_LOCK(&ip_conntrack_lock);
/* Need del_timer for race avoidance (may already be dying). */
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
}
+ ct_add_counters(ct, ctinfo, skb);
WRITE_UNLOCK(&ip_conntrack_lock);
}
}
@@ -1368,12 +1358,13 @@ void ip_conntrack_cleanup(void)
}
kmem_cache_destroy(ip_conntrack_cachep);
+ kmem_cache_destroy(ip_conntrack_expect_cachep);
vfree(ip_conntrack_hash);
nf_unregister_sockopt(&so_getorigdst);
}
static int hashsize;
-MODULE_PARM(hashsize, "i");
+module_param(hashsize, int, 0400);
int __init ip_conntrack_init(void)
{
@@ -1420,6 +1411,15 @@ int __init ip_conntrack_init(void)
printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
goto err_free_hash;
}
+
+ ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
+ sizeof(struct ip_conntrack_expect),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!ip_conntrack_expect_cachep) {
+ printk(KERN_ERR "Unable to create ip_expect slab cache\n");
+ goto err_free_conntrack_slab;
+ }
+
/* Don't NEED lock here, but good form anyway. */
WRITE_LOCK(&ip_conntrack_lock);
/* Sew in builtin protocols. */
@@ -1447,6 +1447,8 @@ int __init ip_conntrack_init(void)
return ret;
+err_free_conntrack_slab:
+ kmem_cache_destroy(ip_conntrack_cachep);
err_free_hash:
vfree(ip_conntrack_hash);
err_unreg_sockopt:
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index b9c27d5e458b..c1403a0cc7d4 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -19,6 +19,7 @@
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+#include <linux/moduleparam.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
@@ -33,10 +34,10 @@ struct module *ip_conntrack_ftp = THIS_MODULE;
#define MAX_PORTS 8
static int ports[MAX_PORTS];
static int ports_c;
-MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+module_param_array(ports, int, ports_c, 0400);
static int loose;
-MODULE_PARM(loose, "i");
+module_param(loose, int, 0600);
#if 0
#define DEBUGP printk
@@ -247,7 +248,8 @@ static int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo)
{
unsigned int dataoff, datalen;
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
+ char *fb_ptr;
u_int32_t old_seq_aft_nl;
int old_seq_aft_nl_set, ret;
u_int32_t array[6] = { 0 };
@@ -267,10 +269,12 @@ static int help(struct sk_buff *skb,
return NF_ACCEPT;
}
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) != 0)
+ th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return NF_ACCEPT;
- dataoff = skb->nh.iph->ihl*4 + tcph.doff*4;
+ dataoff = skb->nh.iph->ihl*4 + th->doff*4;
/* No data? */
if (dataoff >= skb->len) {
DEBUGP("ftp: skblen = %u\n", skb->len);
@@ -279,26 +283,28 @@ static int help(struct sk_buff *skb,
datalen = skb->len - dataoff;
LOCK_BH(&ip_ftp_lock);
- skb_copy_bits(skb, dataoff, ftp_buffer, skb->len - dataoff);
+ fb_ptr = skb_header_pointer(skb, dataoff,
+ skb->len - dataoff, ftp_buffer);
+ BUG_ON(fb_ptr == NULL);
old_seq_aft_nl_set = ct_ftp_info->seq_aft_nl_set[dir];
old_seq_aft_nl = ct_ftp_info->seq_aft_nl[dir];
DEBUGP("conntrack_ftp: datalen %u\n", datalen);
- if (ftp_buffer[datalen - 1] == '\n') {
+ if (fb_ptr[datalen - 1] == '\n') {
DEBUGP("conntrack_ftp: datalen %u ends in \\n\n", datalen);
if (!old_seq_aft_nl_set
- || after(ntohl(tcph.seq) + datalen, old_seq_aft_nl)) {
+ || after(ntohl(th->seq) + datalen, old_seq_aft_nl)) {
DEBUGP("conntrack_ftp: updating nl to %u\n",
- ntohl(tcph.seq) + datalen);
+ ntohl(th->seq) + datalen);
ct_ftp_info->seq_aft_nl[dir] =
- ntohl(tcph.seq) + datalen;
+ ntohl(th->seq) + datalen;
ct_ftp_info->seq_aft_nl_set[dir] = 1;
}
}
if(!old_seq_aft_nl_set ||
- (ntohl(tcph.seq) != old_seq_aft_nl)) {
+ (ntohl(th->seq) != old_seq_aft_nl)) {
DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u)\n",
old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
ret = NF_ACCEPT;
@@ -315,7 +321,7 @@ static int help(struct sk_buff *skb,
for (i = 0; i < ARRAY_SIZE(search); i++) {
if (search[i].dir != dir) continue;
- found = find_pattern(ftp_buffer, skb->len - dataoff,
+ found = find_pattern(fb_ptr, skb->len - dataoff,
search[i].pattern,
search[i].plen,
search[i].skip,
@@ -333,7 +339,7 @@ static int help(struct sk_buff *skb,
if (net_ratelimit())
printk("conntrack_ftp: partial %s %u+%u\n",
search[i].pattern,
- ntohl(tcph.seq), datalen);
+ ntohl(th->seq), datalen);
ret = NF_DROP;
goto out;
} else if (found == 0) { /* No match */
@@ -343,7 +349,7 @@ static int help(struct sk_buff *skb,
DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
(int)matchlen, data + matchoff,
- matchlen, ntohl(tcph.seq) + matchoff);
+ matchlen, ntohl(th->seq) + matchoff);
/* Allocate expectation which will be inserted */
exp = ip_conntrack_expect_alloc();
@@ -357,7 +363,7 @@ static int help(struct sk_buff *skb,
/* Update the ftp info */
if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
== ct->tuplehash[dir].tuple.src.ip) {
- exp->seq = ntohl(tcph.seq) + matchoff;
+ exp->seq = ntohl(th->seq) + matchoff;
exp_ftp_info->len = matchlen;
exp_ftp_info->ftptype = search[i].ftptype;
exp_ftp_info->port = array[4] << 8 | array[5];
@@ -420,10 +426,10 @@ static int __init init(void)
int i, ret;
char *tmpname;
- if (ports[0] == 0)
- ports[0] = FTP_PORT;
+ if (ports_c == 0)
+ ports[ports_c++] = FTP_PORT;
- for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ for (i = 0; i < ports_c; i++) {
ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i].tuple.dst.protonum = IPPROTO_TCP;
ftp[i].mask.src.u.tcp.port = 0xFFFF;
@@ -449,7 +455,6 @@ static int __init init(void)
fini();
return ret;
}
- ports_c++;
}
return 0;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index 32b5daee81c0..0d0afe131e4e 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -32,6 +32,7 @@
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+#include <linux/moduleparam.h>
#define MAX_PORTS 8
static int ports[MAX_PORTS];
@@ -44,11 +45,11 @@ static char irc_buffer[65536];
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
MODULE_LICENSE("GPL");
-MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+module_param_array(ports, int, ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-MODULE_PARM(max_dcc_channels, "i");
+module_param(max_dcc_channels, int, 0400);
MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-MODULE_PARM(dcc_timeout, "i");
+module_param(dcc_timeout, int, 0400);
MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
@@ -101,8 +102,8 @@ static int help(struct sk_buff *skb,
struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
{
unsigned int dataoff;
- struct tcphdr tcph;
- char *data, *data_limit;
+ struct tcphdr _tcph, *th;
+ char *data, *data_limit, *ib_ptr;
int dir = CTINFO2DIR(ctinfo);
struct ip_conntrack_expect *exp;
struct ip_ct_irc_expect *exp_irc_info = NULL;
@@ -126,19 +127,23 @@ static int help(struct sk_buff *skb,
}
/* Not a full tcp header? */
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) != 0)
+ th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return NF_ACCEPT;
/* No data? */
- dataoff = skb->nh.iph->ihl*4 + tcph.doff*4;
+ dataoff = skb->nh.iph->ihl*4 + th->doff*4;
if (dataoff >= skb->len)
return NF_ACCEPT;
LOCK_BH(&ip_irc_lock);
- skb_copy_bits(skb, dataoff, irc_buffer, skb->len - dataoff);
+ ib_ptr = skb_header_pointer(skb, dataoff,
+ skb->len - dataoff, irc_buffer);
+ BUG_ON(ib_ptr == NULL);
- data = irc_buffer;
- data_limit = irc_buffer + skb->len - dataoff;
+ data = ib_ptr;
+ data_limit = ib_ptr + skb->len - dataoff;
/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
* 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
@@ -152,8 +157,8 @@ static int help(struct sk_buff *skb,
/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
- NIPQUAD(iph->saddr), ntohs(tcph.source),
- NIPQUAD(iph->daddr), ntohs(tcph.dest));
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest));
for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
@@ -197,8 +202,8 @@ static int help(struct sk_buff *skb,
/* save position of address in dcc string,
* necessary for NAT */
- DEBUGP("tcph->seq = %u\n", tcph.seq);
- exp->seq = ntohl(tcph.seq) + (addr_beg_p - irc_buffer);
+ DEBUGP("tcph->seq = %u\n", th->seq);
+ exp->seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
exp_irc_info->len = (addr_end_p - addr_beg_p);
exp_irc_info->port = dcc_port;
DEBUGP("wrote info seq=%u (ofs=%u), len=%d\n",
@@ -252,10 +257,10 @@ static int __init init(void)
}
/* If no port given, default to standard irc port */
- if (ports[0] == 0)
- ports[0] = IRC_PORT;
+ if (ports_c == 0)
+ ports[ports_c++] = IRC_PORT;
- for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ for (i = 0; i < ports_c; i++) {
hlpr = &irc_helpers[i];
hlpr->tuple.src.u.tcp.port = htons(ports[i]);
hlpr->tuple.dst.protonum = IPPROTO_TCP;
@@ -284,7 +289,6 @@ static int __init init(void)
fini();
return -EBUSY;
}
- ports_c++;
}
return 0;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index 0df558a58020..c7a913149b8e 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -50,9 +50,9 @@ static unsigned int generic_print_conntrack(char *buffer,
/* Returns verdict for packet, or -1 for invalid. */
static int packet(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
- enum ip_conntrack_info conntrackinfo)
+ enum ip_conntrack_info ctinfo)
{
- ip_ct_refresh(conntrack, ip_ct_generic_timeout);
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
return NF_ACCEPT;
}
@@ -62,8 +62,14 @@ static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
return 1;
}
-struct ip_conntrack_protocol ip_conntrack_generic_protocol
-= { { NULL, NULL }, 0, "unknown",
- generic_pkt_to_tuple, generic_invert_tuple, generic_print_tuple,
- generic_print_conntrack, packet, new, NULL, NULL, NULL };
-
+struct ip_conntrack_protocol ip_conntrack_generic_protocol =
+{
+ .proto = 0,
+ .name = "unknown",
+ .pkt_to_tuple = generic_pkt_to_tuple,
+ .invert_tuple = generic_invert_tuple,
+ .print_tuple = generic_print_tuple,
+ .print_conntrack = generic_print_conntrack,
+ .packet = packet,
+ .new = new,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 47114840fa84..b2f0dee33f2a 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -12,6 +12,11 @@
#include <linux/netfilter.h>
#include <linux/in.h>
#include <linux/icmp.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
unsigned long ip_ct_icmp_timeout = 30*HZ;
@@ -26,14 +31,15 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct ip_conntrack_tuple *tuple)
{
- struct icmphdr hdr;
+ struct icmphdr _hdr, *hp;
- if (skb_copy_bits(skb, dataoff, &hdr, sizeof(hdr)) != 0)
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
return 0;
- tuple->dst.u.icmp.type = hdr.type;
- tuple->src.u.icmp.id = hdr.un.echo.id;
- tuple->dst.u.icmp.code = hdr.code;
+ tuple->dst.u.icmp.type = hp->type;
+ tuple->src.u.icmp.id = hp->un.echo.id;
+ tuple->dst.u.icmp.code = hp->code;
return 1;
}
@@ -94,7 +100,7 @@ static int icmp_packet(struct ip_conntrack *ct,
ct->timeout.function((unsigned long)ct);
} else {
atomic_inc(&ct->proto.icmp.count);
- ip_ct_refresh(ct, ip_ct_icmp_timeout);
+ ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
}
return NF_ACCEPT;
@@ -122,7 +128,147 @@ static int icmp_new(struct ip_conntrack *conntrack,
return 1;
}
-struct ip_conntrack_protocol ip_conntrack_protocol_icmp
-= { { NULL, NULL }, IPPROTO_ICMP, "icmp",
- icmp_pkt_to_tuple, icmp_invert_tuple, icmp_print_tuple,
- icmp_print_conntrack, icmp_packet, icmp_new, NULL, NULL, NULL };
+static int
+icmp_error_message(struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct ip_conntrack_tuple innertuple, origtuple;
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } inside;
+ struct ip_conntrack_protocol *innerproto;
+ struct ip_conntrack_tuple_hash *h;
+ int dataoff;
+
+ IP_NF_ASSERT(skb->nfct == NULL);
+
+ /* Not enough header? */
+ if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
+ return NF_ACCEPT;
+
+ /* Ignore ICMP's containing fragments (shouldn't happen) */
+ if (inside.ip.frag_off & htons(IP_OFFSET)) {
+ DEBUGP("icmp_error_track: fragment of proto %u\n",
+ inside.ip.protocol);
+ return NF_ACCEPT;
+ }
+
+ innerproto = ip_ct_find_proto(inside.ip.protocol);
+ dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
+ /* Are they talking about one of our connections? */
+ if (!ip_ct_get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
+ DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
+ return NF_ACCEPT;
+ }
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+ DEBUGP("icmp_error_track: Can't invert tuple\n");
+ return NF_ACCEPT;
+ }
+
+ *ctinfo = IP_CT_RELATED;
+
+ h = ip_conntrack_find_get(&innertuple, NULL);
+ if (!h) {
+ /* Locally generated ICMPs will match inverted if they
+ haven't been SNAT'ed yet */
+ /* FIXME: NAT code has to handle half-done double NAT --RR */
+ if (hooknum == NF_IP_LOCAL_OUT)
+ h = ip_conntrack_find_get(&origtuple, NULL);
+
+ if (!h) {
+ DEBUGP("icmp_error_track: no match\n");
+ return NF_ACCEPT;
+ }
+ /* Reverse direction from that found */
+ if (DIRECTION(h) != IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+ } else {
+ if (DIRECTION(h) == IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+ }
+
+ /* Update skb to refer to this connection */
+ skb->nfct = &h->ctrack->infos[*ctinfo];
+ return -NF_ACCEPT;
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct icmphdr icmph;
+
+ /* Not enough header? */
+ if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph))!=0) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_icmp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* See ip_conntrack_proto_tcp.c */
+ if (hooknum != NF_IP_PRE_ROUTING)
+ goto checksum_skipped;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_HW:
+ if (!(u16)csum_fold(skb->csum))
+ break;
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_icmp: bad HW ICMP checksum ");
+ return -NF_ACCEPT;
+ case CHECKSUM_NONE:
+ if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_icmp: bad ICMP checksum ");
+ return -NF_ACCEPT;
+ }
+ default:
+ break;
+ }
+
+checksum_skipped:
+ /*
+ * 18 is the highest 'known' ICMP type. Anything else is a mystery
+ *
+ * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
+ * discarded.
+ */
+ if (icmph.type > NR_ICMP_TYPES) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_icmp: invalid ICMP type ");
+ return -NF_ACCEPT;
+ }
+
+ /* Need to track icmp error message? */
+ if (icmph.type != ICMP_DEST_UNREACH
+ && icmph.type != ICMP_SOURCE_QUENCH
+ && icmph.type != ICMP_TIME_EXCEEDED
+ && icmph.type != ICMP_PARAMETERPROB
+ && icmph.type != ICMP_REDIRECT)
+ return NF_ACCEPT;
+
+ return icmp_error_message(skb, ctinfo, hooknum);
+}
+
+struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
+{
+ .proto = IPPROTO_ICMP,
+ .name = "icmp",
+ .pkt_to_tuple = icmp_pkt_to_tuple,
+ .invert_tuple = icmp_invert_tuple,
+ .print_tuple = icmp_print_tuple,
+ .print_conntrack = icmp_print_conntrack,
+ .packet = icmp_packet,
+ .new = icmp_new,
+ .error = icmp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
new file mode 100644
index 000000000000..8296e7c52cf8
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -0,0 +1,650 @@
+/*
+ * Connection tracking protocol helper module for SCTP.
+ *
+ * SCTP is defined in RFC 2960. References to various sections in this code
+ * are to this RFC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Added support for proc manipulation of timeouts.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/string.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#if 0
+#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Protects conntrack->proto.sctp */
+static DECLARE_RWLOCK(sctp_lock);
+
+/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+ closely. They're more complex. --RR
+
+ And so for me for SCTP :D -Kiran */
+
+static const char *sctp_conntrack_names[] = {
+ "NONE",
+ "CLOSED",
+ "COOKIE_WAIT",
+ "COOKIE_ECHOED",
+ "ESTABLISHED",
+ "SHUTDOWN_SENT",
+ "SHUTDOWN_RECD",
+ "SHUTDOWN_ACK_SENT",
+};
+
+#define SECS * HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS * 24 HOURS
+
+unsigned long ip_ct_sctp_timeout_closed = 10 SECS;
+unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS;
+unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
+unsigned long ip_ct_sctp_timeout_established = 5 DAYS;
+unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
+unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
+unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
+
+static unsigned long * sctp_timeouts[]
+= { 0, /* SCTP_CONNTRACK_NONE */
+ &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
+ &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
+ &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
+ &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
+ &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
+ &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
+ &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
+ };
+
+#define sNO SCTP_CONNTRACK_NONE
+#define sCL SCTP_CONNTRACK_CLOSED
+#define sCW SCTP_CONNTRACK_COOKIE_WAIT
+#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
+#define sES SCTP_CONNTRACK_ESTABLISHED
+#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
+#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
+#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define sIV SCTP_CONNTRACK_MAX
+
+/*
+ These are the descriptions of the states:
+
+NOTE: These state names are tantalizingly similar to the states of an
+SCTP endpoint. But the interpretation of the states is a little different,
+considering that these are the states of the connection and not of an end
+point. Please note the subtleties. -Kiran
+
+NONE - Nothing so far.
+COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
+ an INIT_ACK chunk in the reply direction.
+COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
+ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
+SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
+SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
+SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
+ to that of the SHUTDOWN chunk.
+CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
+ the SHUTDOWN chunk. Connection is closed.
+*/
+
+/* TODO
+ - I have assumed that the first INIT is in the original direction.
+ This messes things when an INIT comes in the reply direction in CLOSED
+ state.
+ - Check the error type in the reply dir before transitioning from
+cookie echoed to closed.
+ - Sec 5.2.4 of RFC 2960
+ - Multi Homing support.
+*/
+
+/* SCTP conntrack state transitions */
+static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+ {
+/* ORIGINAL */
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+ },
+ {
+/* REPLY */
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+ }
+};
+
+static int sctp_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple)
+{
+ sctp_sctphdr_t hdr;
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ /* Actually only need first 8 bytes. */
+ if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0)
+ return 0;
+
+ tuple->src.u.sctp.port = hdr.source;
+ tuple->dst.u.sctp.port = hdr.dest;
+
+ return 1;
+}
+
+static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *orig)
+{
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ tuple->src.u.sctp.port = orig->dst.u.sctp.port;
+ tuple->dst.u.sctp.port = orig->src.u.sctp.port;
+ return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static unsigned int sctp_print_tuple(char *buffer,
+ const struct ip_conntrack_tuple *tuple)
+{
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ return sprintf(buffer, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.sctp.port),
+ ntohs(tuple->dst.u.sctp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static unsigned int sctp_print_conntrack(char *buffer,
+ const struct ip_conntrack *conntrack)
+{
+ enum sctp_conntrack state;
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ READ_LOCK(&sctp_lock);
+ state = conntrack->proto.sctp.state;
+ READ_UNLOCK(&sctp_lock);
+
+ return sprintf(buffer, "%s ", sctp_conntrack_names[state]);
+}
+
+#define for_each_sctp_chunk(skb, sch, offset, count) \
+for (offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t), count = 0; \
+ offset < skb->len && !skb_copy_bits(skb, offset, &sch, sizeof(sch)); \
+ offset += (htons(sch.length) + 3) & ~3, count++)
+
+/* Some validity checks to make sure the chunks are fine */
+static int do_basic_checks(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb,
+ char *map)
+{
+ u_int32_t offset, count;
+ sctp_chunkhdr_t sch;
+ int flag;
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ flag = 0;
+
+ for_each_sctp_chunk (skb, sch, offset, count) {
+ DEBUGP("Chunk Num: %d Type: %d\n", count, sch.type);
+
+ if (sch.type == SCTP_CID_INIT
+ || sch.type == SCTP_CID_INIT_ACK
+ || sch.type == SCTP_CID_SHUTDOWN_COMPLETE) {
+ flag = 1;
+ }
+
+ /* Cookie Ack/Echo chunks not the first OR
+ Init / Init Ack / Shutdown compl chunks not the only chunks */
+ if ((sch.type == SCTP_CID_COOKIE_ACK
+ || sch.type == SCTP_CID_COOKIE_ECHO
+ || flag)
+ && count !=0 ) {
+ DEBUGP("Basic checks failed\n");
+ return 1;
+ }
+
+ if (map) {
+ set_bit (sch.type, (void *)map);
+ }
+ }
+
+ DEBUGP("Basic checks passed\n");
+ return 0;
+}
+
+static int new_state(enum ip_conntrack_dir dir,
+ enum sctp_conntrack cur_state,
+ int chunk_type)
+{
+ int i;
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ DEBUGP("Chunk type: %d\n", chunk_type);
+
+ switch (chunk_type) {
+ case SCTP_CID_INIT:
+ DEBUGP("SCTP_CID_INIT\n");
+ i = 0; break;
+ case SCTP_CID_INIT_ACK:
+ DEBUGP("SCTP_CID_INIT_ACK\n");
+ i = 1; break;
+ case SCTP_CID_ABORT:
+ DEBUGP("SCTP_CID_ABORT\n");
+ i = 2; break;
+ case SCTP_CID_SHUTDOWN:
+ DEBUGP("SCTP_CID_SHUTDOWN\n");
+ i = 3; break;
+ case SCTP_CID_SHUTDOWN_ACK:
+ DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
+ i = 4; break;
+ case SCTP_CID_ERROR:
+ DEBUGP("SCTP_CID_ERROR\n");
+ i = 5; break;
+ case SCTP_CID_COOKIE_ECHO:
+ DEBUGP("SCTP_CID_COOKIE_ECHO\n");
+ i = 6; break;
+ case SCTP_CID_COOKIE_ACK:
+ DEBUGP("SCTP_CID_COOKIE_ACK\n");
+ i = 7; break;
+ case SCTP_CID_SHUTDOWN_COMPLETE:
+ DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
+ i = 8; break;
+ default:
+ /* Other chunks like DATA, SACK, HEARTBEAT and
+ its ACK do not cause a change in state */
+ DEBUGP("Unknown chunk type, Will stay in %s\n",
+ sctp_conntrack_names[cur_state]);
+ return cur_state;
+ }
+
+ DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
+ dir, sctp_conntrack_names[cur_state], chunk_type,
+ sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
+
+ return sctp_conntracks[dir][i][cur_state];
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int sctp_packet(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo)
+{
+ enum sctp_conntrack newconntrack, oldsctpstate;
+ sctp_sctphdr_t sctph;
+ sctp_chunkhdr_t sch;
+ u_int32_t offset, count;
+ char map[256 / sizeof (char)] = {0};
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &sctph, sizeof(sctph)) != 0)
+ return -1;
+
+ if (do_basic_checks(conntrack, skb, map) != 0)
+ return -1;
+
+ /* Check the verification tag (Sec 8.5) */
+ if (!test_bit(SCTP_CID_INIT, (void *)map)
+ && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
+ && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
+ && !test_bit(SCTP_CID_ABORT, (void *)map)
+ && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
+ && (sctph.vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+ DEBUGP("Verification tag check failed\n");
+ return -1;
+ }
+
+ oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
+ for_each_sctp_chunk (skb, sch, offset, count) {
+ WRITE_LOCK(&sctp_lock);
+
+ /* Special cases of Verification tag check (Sec 8.5.1) */
+ if (sch.type == SCTP_CID_INIT) {
+ /* Sec 8.5.1 (A) */
+ if (sctph.vtag != 0) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ } else if (sch.type == SCTP_CID_ABORT) {
+ /* Sec 8.5.1 (B) */
+ if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
+ && !(sctph.vtag == conntrack->proto.sctp.vtag
+ [1 - CTINFO2DIR(ctinfo)])) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ } else if (sch.type == SCTP_CID_SHUTDOWN_COMPLETE) {
+ /* Sec 8.5.1 (C) */
+ if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
+ && !(sctph.vtag == conntrack->proto.sctp.vtag
+ [1 - CTINFO2DIR(ctinfo)]
+ && (sch.flags & 1))) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ } else if (sch.type == SCTP_CID_COOKIE_ECHO) {
+ /* Sec 8.5.1 (D) */
+ if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ }
+
+ oldsctpstate = conntrack->proto.sctp.state;
+ newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch.type);
+
+ /* Invalid */
+ if (newconntrack == SCTP_CONNTRACK_MAX) {
+ DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
+ CTINFO2DIR(ctinfo), sch.type, oldsctpstate);
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+
+ /* If it is an INIT or an INIT ACK note down the vtag */
+ if (sch.type == SCTP_CID_INIT
+ || sch.type == SCTP_CID_INIT_ACK) {
+ sctp_inithdr_t inithdr;
+
+ if (skb_copy_bits(skb, offset + sizeof (sctp_chunkhdr_t),
+ &inithdr, sizeof(inithdr)) != 0) {
+ WRITE_UNLOCK(&sctp_lock);
+ return -1;
+ }
+ DEBUGP("Setting vtag %x for dir %d\n",
+ inithdr.init_tag, CTINFO2DIR(ctinfo));
+ conntrack->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = inithdr.init_tag;
+ }
+
+ conntrack->proto.sctp.state = newconntrack;
+ WRITE_UNLOCK(&sctp_lock);
+ }
+
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
+
+ if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
+ && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
+ && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
+ DEBUGP("Setting assured bit\n");
+ set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int sctp_new(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb)
+{
+ enum sctp_conntrack newconntrack;
+ sctp_sctphdr_t sctph;
+ sctp_chunkhdr_t sch;
+ u_int32_t offset, count;
+ char map[256 / sizeof (char)] = {0};
+
+ DEBUGP(__FUNCTION__);
+ DEBUGP("\n");
+
+ if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &sctph, sizeof(sctph)) != 0)
+ return -1;
+
+ if (do_basic_checks(conntrack, skb, map) != 0)
+ return -1;
+
+ /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+ if ((test_bit (SCTP_CID_ABORT, (void *)map))
+ || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
+ || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
+ return -1;
+ }
+
+ newconntrack = SCTP_CONNTRACK_MAX;
+ for_each_sctp_chunk (skb, sch, offset, count) {
+ /* Don't need lock here: this conntrack not in circulation yet */
+ newconntrack = new_state (IP_CT_DIR_ORIGINAL,
+ SCTP_CONNTRACK_NONE, sch.type);
+
+ /* Invalid: delete conntrack */
+ if (newconntrack == SCTP_CONNTRACK_MAX) {
+ DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
+ return 0;
+ }
+
+ /* Copy the vtag into the state info */
+ if (sch.type == SCTP_CID_INIT) {
+ if (sctph.vtag == 0) {
+ sctp_inithdr_t inithdr;
+
+ if (skb_copy_bits(skb, offset + sizeof (sctp_chunkhdr_t),
+ &inithdr, sizeof(inithdr)) != 0) {
+ return -1;
+ }
+
+ DEBUGP("Setting vtag %x for new conn\n",
+ inithdr.init_tag);
+
+ conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
+ inithdr.init_tag;
+ } else {
+ /* Sec 8.5.1 (A) */
+ return -1;
+ }
+ }
+ /* If it is a shutdown ack OOTB packet, we expect a return
+ shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+ else {
+ DEBUGP("Setting vtag %x for new conn OOTB\n",
+ sctph.vtag);
+ conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sctph.vtag;
+ }
+
+ conntrack->proto.sctp.state = newconntrack;
+ }
+
+ return 1;
+}
+
+static int sctp_exp_matches_pkt(struct ip_conntrack_expect *exp,
+ const struct sk_buff *skb)
+{
+ /* To be implemented */
+ return 0;
+}
+
+struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
+ .list = { NULL, NULL },
+ .proto = IPPROTO_SCTP,
+ .name = "sctp",
+ .pkt_to_tuple = sctp_pkt_to_tuple,
+ .invert_tuple = sctp_invert_tuple,
+ .print_tuple = sctp_print_tuple,
+ .print_conntrack = sctp_print_conntrack,
+ .packet = sctp_packet,
+ .new = sctp_new,
+ .destroy = NULL,
+ .exp_matches_pkt = sctp_exp_matches_pkt,
+ .me = THIS_MODULE
+};
+
+#ifdef CONFIG_SYSCTL
+static ctl_table ip_ct_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
+ .procname = "ip_conntrack_sctp_timeout_closed",
+ .data = &ip_ct_sctp_timeout_closed,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
+ .procname = "ip_conntrack_sctp_timeout_cookie_wait",
+ .data = &ip_ct_sctp_timeout_cookie_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
+ .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
+ .data = &ip_ct_sctp_timeout_cookie_echoed,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
+ .procname = "ip_conntrack_sctp_timeout_established",
+ .data = &ip_ct_sctp_timeout_established,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
+ .data = &ip_ct_sctp_timeout_shutdown_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
+ .data = &ip_ct_sctp_timeout_shutdown_recd,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
+ .data = &ip_ct_sctp_timeout_shutdown_ack_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_netfilter_table[] = {
+ {
+ .ctl_name = NET_IPV4_NETFILTER,
+ .procname = "netfilter",
+ .mode = 0555,
+ .child = ip_ct_sysctl_table,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_ipv4_table[] = {
+ {
+ .ctl_name = NET_IPV4,
+ .procname = "ipv4",
+ .mode = 0555,
+ .child = ip_ct_netfilter_table,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_net_table[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = ip_ct_ipv4_table,
+ },
+ { .ctl_name = 0 }
+};
+
+static struct ctl_table_header *ip_ct_sysctl_header;
+#endif
+
+int __init init(void)
+{
+ int ret;
+
+ ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
+ if (ret) {
+ printk("ip_conntrack_proto_sctp: protocol register failed\n");
+ goto out;
+ }
+
+#ifdef CONFIG_SYSCTL
+ ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+ if (ip_ct_sysctl_header == NULL) {
+ printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
+ goto cleanup;
+ }
+#endif
+
+ return ret;
+
+ cleanup:
+#ifdef CONFIG_SYSCTL
+ ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
+#endif
+ out:
+ DEBUGP("SCTP conntrack module loading %s\n",
+ ret ? "failed": "succeeded");
+ return ret;
+}
+
+void __exit fini(void)
+{
+ ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(ip_ct_sysctl_header);
+#endif
+ DEBUGP("SCTP conntrack module unloaded\n");
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 463cafa6692a..64c7538c4b18 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -4,8 +4,22 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
+ *
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
+ * - Real stateful connection tracking
+ * - Modified state transitions table
+ * - Window scaling support added
+ * - SACK support added
+ *
+ * Willy Tarreau:
+ * - State table bugfixes
+ * - More robust state changes
+ * - Tuning timer parameters
+ *
+ * version 2.2
*/
+#include <linux/config.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
@@ -14,16 +28,18 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
-#include <linux/string.h>
+#include <linux/spinlock.h>
#include <net/tcp.h>
+#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#if 0
#define DEBUGP printk
+#define DEBUGP_VARS
#else
#define DEBUGP(format, args...)
#endif
@@ -31,28 +47,40 @@
/* Protects conntrack->proto.tcp */
static DECLARE_RWLOCK(tcp_lock);
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR */
+/* "Be conservative in what you do,
+ be liberal in what you accept from others."
+ If it's non-zero, we mark only out of window RST segments as INVALID. */
+int ip_ct_tcp_be_liberal = 0;
-/* Actually, I believe that neither ipmasq (where this code is stolen
- from) nor ipfilter do it exactly right. A new conntrack machine taking
- into account packet loss (which creates uncertainty as to exactly
- the conntrack of the connection) is required. RSN. --RR */
+/* When connection is picked up from the middle, how many packets are required
+ to pass in each direction when we assume we are in sync - if any side uses
+ window scaling, we lost the game.
+ If it is set to zero, we disable picking up already established
+ connections. */
+int ip_ct_tcp_loose = 3;
+
+/* Max number of the retransmitted packets without receiving an (acceptable)
+ ACK from the destination. If this number is reached, a shorter timer
+ will be started. */
+int ip_ct_tcp_max_retrans = 3;
+
+ /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+ closely. They're more complex. --RR */
static const char *tcp_conntrack_names[] = {
"NONE",
- "ESTABLISHED",
"SYN_SENT",
"SYN_RECV",
+ "ESTABLISHED",
"FIN_WAIT",
- "TIME_WAIT",
- "CLOSE",
"CLOSE_WAIT",
"LAST_ACK",
+ "TIME_WAIT",
+ "CLOSE",
"LISTEN"
};
-
-#define SECS *HZ
+
+#define SECS * HZ
#define MINS * 60 SECS
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
@@ -66,64 +94,214 @@ unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS;
unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS;
unsigned long ip_ct_tcp_timeout_close = 10 SECS;
+/* RFC1122 says the R2 limit should be at least 100 seconds.
+ Linux uses 15 packets as limit, which corresponds
+ to ~13-30min depending on RTO. */
+unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS;
+
static unsigned long * tcp_timeouts[]
= { NULL, /* TCP_CONNTRACK_NONE */
- &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
&ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
&ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
+ &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
&ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
- &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
- &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
&ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
&ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
+ &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
+ &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
NULL, /* TCP_CONNTRACK_LISTEN */
};
#define sNO TCP_CONNTRACK_NONE
-#define sES TCP_CONNTRACK_ESTABLISHED
#define sSS TCP_CONNTRACK_SYN_SENT
#define sSR TCP_CONNTRACK_SYN_RECV
+#define sES TCP_CONNTRACK_ESTABLISHED
#define sFW TCP_CONNTRACK_FIN_WAIT
-#define sTW TCP_CONNTRACK_TIME_WAIT
-#define sCL TCP_CONNTRACK_CLOSE
#define sCW TCP_CONNTRACK_CLOSE_WAIT
#define sLA TCP_CONNTRACK_LAST_ACK
+#define sTW TCP_CONNTRACK_TIME_WAIT
+#define sCL TCP_CONNTRACK_CLOSE
#define sLI TCP_CONNTRACK_LISTEN
#define sIV TCP_CONNTRACK_MAX
+#define sIG TCP_CONNTRACK_IGNORE
-static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = {
+/* What TCP flags are set from RST/SYN/FIN/ACK. */
+enum tcp_bit_set {
+ TCP_SYN_SET,
+ TCP_SYNACK_SET,
+ TCP_FIN_SET,
+ TCP_ACK_SET,
+ TCP_RST_SET,
+ TCP_NONE_SET,
+};
+
+/*
+ * The TCP state transition table needs a few words...
+ *
+ * We are the man in the middle. All the packets go through us
+ * but might get lost in transit to the destination.
+ * It is assumed that the destinations can't receive segments
+ * we haven't seen.
+ *
+ * The checked segment is in window, but our windows are *not*
+ * equivalent with the ones of the sender/receiver. We always
+ * try to guess the state of the current sender.
+ *
+ * The meaning of the states are:
+ *
+ * NONE: initial state
+ * SYN_SENT: SYN-only packet seen
+ * SYN_RECV: SYN-ACK packet seen
+ * ESTABLISHED: ACK packet seen
+ * FIN_WAIT: FIN packet seen
+ * CLOSE_WAIT: ACK seen (after FIN)
+ * LAST_ACK: FIN seen (after FIN)
+ * TIME_WAIT: last ACK seen
+ * CLOSE: closed connection
+ *
+ * LISTEN state is not used.
+ *
+ * Packets marked as IGNORED (sIG):
+ * if they may be either invalid or valid
+ * and the receiver may send back a connection
+ * closing RST or a SYN/ACK.
+ *
+ * Packets marked as INVALID (sIV):
+ * if they are invalid
+ * or we do not support the request (simultaneous open)
+ */
+static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
{
-/* ORIGINAL */
-/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */
-/*syn*/ {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI },
-/*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI },
-/*ack*/ {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES },
-/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL },
-/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+/* ORIGINAL */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/*
+ * sNO -> sSS Initialize a new connection
+ * sSS -> sSS Retransmitted SYN
+ * sSR -> sIG Late retransmitted SYN?
+ * sES -> sIG Error: SYNs in window outside the SYN_SENT state
+ * are errors. Receiver will reply with RST
+ * and close the connection.
+ * Or we are not in sync and hold a dead connection.
+ * sFW -> sIG
+ * sCW -> sIG
+ * sLA -> sIG
+ * sTW -> sSS Reopened connection (RFC 1122).
+ * sCL -> sSS
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * A SYN/ACK from the client is always invalid:
+ * - either it tries to set up a simultaneous open, which is
+ * not supported;
+ * - or the firewall has just been inserted between the two hosts
+ * during the session set-up. The SYN will be retransmitted
+ * by the true client (or it'll time out).
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ * sNO -> sIV Too late and no reason to do anything...
+ * sSS -> sIV Client migth not send FIN in this state:
+ * we enforce waiting for a SYN/ACK reply first.
+ * sSR -> sFW Close started.
+ * sES -> sFW
+ * sFW -> sLA FIN seen in both directions, waiting for
+ * the last ACK.
+ * Migth be a retransmitted FIN as well...
+ * sCW -> sLA
+ * sLA -> sLA Retransmitted FIN. Remain in the same state.
+ * sTW -> sTW
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ * sNO -> sES Assumed.
+ * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
+ * sSR -> sES Established state is reached.
+ * sES -> sES :-)
+ * sFW -> sCW Normal close request answered by ACK.
+ * sCW -> sCW
+ * sLA -> sTW Last ACK detected.
+ * sTW -> sTW Retransmitted last ACK. Remain in the same state.
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
},
{
-/* REPLY */
-/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */
-/*syn*/ {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR },
-/*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI },
-/*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI },
-/*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI },
-/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
- }
+/* REPLY */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * sNO -> sIV Never reached.
+ * sSS -> sIV Simultaneous open, not supported
+ * sSR -> sIV Simultaneous open, not supported.
+ * sES -> sIV Server may not initiate a connection.
+ * sFW -> sIV
+ * sCW -> sIV
+ * sLA -> sIV
+ * sTW -> sIV Reopened connection, but server may not do it.
+ * sCL -> sIV
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/*
+ * sSS -> sSR Standard open.
+ * sSR -> sSR Retransmitted SYN/ACK.
+ * sES -> sIG Late retransmitted SYN/ACK?
+ * sFW -> sIG
+ * sCW -> sIG
+ * sLA -> sIG
+ * sTW -> sIG
+ * sCL -> sIG
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ * sSS -> sIV Server might not send FIN in this state.
+ * sSR -> sFW Close started.
+ * sES -> sFW
+ * sFW -> sLA FIN seen in both directions.
+ * sCW -> sLA
+ * sLA -> sLA Retransmitted FIN.
+ * sTW -> sTW
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*ack*/ { sIV, sIV, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
+ * sSR -> sIV Simultaneous open.
+ * sES -> sES :-)
+ * sFW -> sCW Normal close request answered by ACK.
+ * sCW -> sCW
+ * sLA -> sTW Last ACK detected.
+ * sTW -> sTW Retransmitted last ACK.
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+ }
};
static int tcp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple)
{
- struct tcphdr hdr;
+ struct tcphdr _hdr, *hp;
/* Actually only need first 8 bytes. */
- if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0)
+ hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ if (hp == NULL)
return 0;
- tuple->src.u.tcp.port = hdr.source;
- tuple->dst.u.tcp.port = hdr.dest;
+ tuple->src.u.tcp.port = hp->source;
+ tuple->dst.u.tcp.port = hp->dest;
return 1;
}
@@ -160,11 +338,488 @@ static unsigned int tcp_print_conntrack(char *buffer,
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
{
- if (tcph->rst) return 3;
- else if (tcph->syn) return 0;
- else if (tcph->fin) return 1;
- else if (tcph->ack) return 2;
- else return 4;
+ if (tcph->rst) return TCP_RST_SET;
+ else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
+ else if (tcph->fin) return TCP_FIN_SET;
+ else if (tcph->ack) return TCP_ACK_SET;
+ else return TCP_NONE_SET;
+}
+
+/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
+ in IP Filter' by Guido van Rooij.
+
+ http://www.nluug.nl/events/sane2000/papers.html
+ http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
+
+ The boundaries and the conditions are slightly changed:
+
+ td_maxend = max(sack + max(win,1)) seen in reply packets
+ td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+ td_end = max(seq + len) seen in sent packets
+
+ I. Upper bound for valid data: seq + len <= sender.td_maxend
+ II. Lower bound for valid data: seq >= sender.td_end - receiver.td_maxwin
+ III. Upper bound for valid ack: sack <= receiver.td_end
+ IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
+
+ where sack is the highest right edge of sack block found in the packet.
+
+ The upper bound limit for a valid ack is not ignored -
+ we doesn't have to deal with fragments.
+*/
+
+static inline __u32 segment_seq_plus_len(__u32 seq,
+ size_t len,
+ struct iphdr *iph,
+ struct tcphdr *tcph)
+ {
+ return (seq + len - (iph->ihl + tcph->doff)*4
+ + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
+}
+
+/* Fixme: what about big packets? */
+#define MAXACKWINCONST 66000
+#define MAXACKWINDOW(sender) \
+ ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
+ : MAXACKWINCONST)
+
+/*
+ * Simplified tcp_parse_options routine from tcp_input.c
+ */
+static void tcp_options(const struct sk_buff *skb,
+ struct iphdr *iph,
+ struct tcphdr *tcph,
+ struct ip_ct_tcp_state *state)
+{
+ unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+ unsigned char *ptr;
+ int length = (tcph->doff*4) - sizeof(struct tcphdr);
+
+ if (!length)
+ return;
+
+ ptr = skb_header_pointer(skb,
+ (iph->ihl * 4) + sizeof(struct tcphdr),
+ length, buff);
+ BUG_ON(ptr == NULL);
+
+ state->td_scale =
+ state->flags = 0;
+
+ while (length > 0) {
+ int opcode=*ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ opsize=*ptr++;
+ if (opsize < 2) /* "silly options" */
+ return;
+ if (opsize > length)
+ break; /* don't parse partial options */
+
+ if (opcode == TCPOPT_SACK_PERM
+ && opsize == TCPOLEN_SACK_PERM)
+ state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
+ else if (opcode == TCPOPT_WINDOW
+ && opsize == TCPOLEN_WINDOW) {
+ state->td_scale = *(u_int8_t *)ptr;
+
+ if (state->td_scale > 14) {
+ /* See RFC1323 */
+ state->td_scale = 14;
+ }
+ state->flags |=
+ IP_CT_TCP_STATE_FLAG_WINDOW_SCALE;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+}
+
+static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
+{
+ __u32 tmp;
+ unsigned char *ptr;
+ int length = (tcph->doff*4) - sizeof(struct tcphdr);
+
+ /* Fast path for timestamp-only option */
+ if (length == TCPOLEN_TSTAMP_ALIGNED*4
+ && *(__u32 *)(tcph + 1) ==
+ __constant_ntohl((TCPOPT_NOP << 24)
+ | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8)
+ | TCPOLEN_TIMESTAMP))
+ return;
+
+ ptr = (unsigned char *)(tcph + 1);
+ while (length > 0) {
+ int opcode=*ptr++;
+ int opsize, i;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ opsize=*ptr++;
+ if (opsize < 2) /* "silly options" */
+ return;
+ if (opsize > length)
+ break; /* don't parse partial options */
+
+ if (opcode == TCPOPT_SACK
+ && opsize >= (TCPOLEN_SACK_BASE
+ + TCPOLEN_SACK_PERBLOCK)
+ && !((opsize - TCPOLEN_SACK_BASE)
+ % TCPOLEN_SACK_PERBLOCK)) {
+ for (i = 0;
+ i < (opsize - TCPOLEN_SACK_BASE);
+ i += TCPOLEN_SACK_PERBLOCK) {
+ tmp = ntohl(*((u_int32_t *)(ptr+i)+1));
+
+ if (after(tmp, *sack))
+ *sack = tmp;
+ }
+ return;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+}
+
+static int tcp_in_window(struct ip_ct_tcp *state,
+ enum ip_conntrack_dir dir,
+ unsigned int *index,
+ const struct sk_buff *skb,
+ struct iphdr *iph,
+ struct tcphdr *tcph)
+{
+ struct ip_ct_tcp_state *sender = &state->seen[dir];
+ struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+ __u32 seq, ack, sack, end, win, swin;
+ int res;
+
+ /*
+ * Get the required data from the packet.
+ */
+ seq = ntohl(tcph->seq);
+ ack = sack = ntohl(tcph->ack_seq);
+ win = ntohs(tcph->window);
+ end = segment_seq_plus_len(seq, skb->len, iph, tcph);
+
+ if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
+ tcp_sack(tcph, &sack);
+
+ DEBUGP("tcp_in_window: START\n");
+ DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+ "seq=%u ack=%u sack=%u win=%u end=%u\n",
+ NIPQUAD(iph->saddr), ntohs(tcph->source),
+ NIPQUAD(iph->daddr), ntohs(tcph->dest),
+ seq, ack, sack, win, end);
+ DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+
+ if (sender->td_end == 0) {
+ /*
+ * Initialize sender data.
+ */
+ if (tcph->syn && tcph->ack) {
+ /*
+ * Outgoing SYN-ACK in reply to a SYN.
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, iph, tcph, sender);
+ /*
+ * RFC 1323:
+ * Both sides must send the Window Scale option
+ * to enable window scaling in either direction.
+ */
+ if (!(sender->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE
+ && receiver->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE))
+ sender->td_scale =
+ receiver->td_scale = 0;
+ } else {
+ /*
+ * We are in the middle of a connection,
+ * its history is lost for us.
+ * Let's try to use the data from the packet.
+ */
+ sender->td_end = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+ sender->td_maxend = end + sender->td_maxwin;
+ }
+ } else if (state->state == TCP_CONNTRACK_SYN_SENT
+ && dir == IP_CT_DIR_ORIGINAL
+ && after(end, sender->td_end)) {
+ /*
+ * RFC 793: "if a TCP is reinitialized ... then it need
+ * not wait at all; it must only be sure to use sequence
+ * numbers larger than those recently used."
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, iph, tcph, sender);
+ }
+
+ if (!(tcph->ack)) {
+ /*
+ * If there is no ACK, just pretend it was set and OK.
+ */
+ ack = sack = receiver->td_end;
+ } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
+ (TCP_FLAG_ACK|TCP_FLAG_RST))
+ && (ack == 0)) {
+ /*
+ * Broken TCP stacks, that set ACK in RST packets as well
+ * with zero ack value.
+ */
+ ack = sack = receiver->td_end;
+ }
+
+ if (seq == end)
+ /*
+ * Packets contains no data: we assume it is valid
+ * and check the ack value only.
+ */
+ seq = end = sender->td_end;
+
+ DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+ "seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n",
+ NIPQUAD(iph->saddr), ntohs(tcph->source),
+ NIPQUAD(iph->daddr), ntohs(tcph->dest),
+ seq, ack, sack, win, end,
+ after(end, sender->td_maxend) && before(seq, sender->td_maxend)
+ ? sender->td_maxend : end);
+ DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+
+ /* Ignore data over the right edge of the receiver's window. */
+ if (after(end, sender->td_maxend) &&
+ before(seq, sender->td_maxend)) {
+ end = sender->td_maxend;
+ if (*index == TCP_FIN_SET)
+ *index = TCP_ACK_SET;
+ }
+ DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
+ before(end, sender->td_maxend + 1)
+ || before(seq, sender->td_maxend + 1),
+ after(seq, sender->td_end - receiver->td_maxwin - 1)
+ || after(end, sender->td_end - receiver->td_maxwin - 1),
+ before(sack, receiver->td_end + 1),
+ after(ack, receiver->td_end - MAXACKWINDOW(sender)));
+
+ if (sender->loose || receiver->loose ||
+ (before(end, sender->td_maxend + 1) &&
+ after(seq, sender->td_end - receiver->td_maxwin - 1) &&
+ before(sack, receiver->td_end + 1) &&
+ after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
+ /*
+ * Take into account window scaling (RFC 1323).
+ */
+ if (!tcph->syn)
+ win <<= sender->td_scale;
+
+ /*
+ * Update sender data.
+ */
+ swin = win + (sack - ack);
+ if (sender->td_maxwin < swin)
+ sender->td_maxwin = swin;
+ if (after(end, sender->td_end))
+ sender->td_end = end;
+ if (after(sack + win, receiver->td_maxend - 1)) {
+ receiver->td_maxend = sack + win;
+ if (win == 0)
+ receiver->td_maxend++;
+ }
+
+ /*
+ * Check retransmissions.
+ */
+ if (*index == TCP_ACK_SET) {
+ if (state->last_dir == dir
+ && state->last_seq == seq
+ && state->last_end == end)
+ state->retrans++;
+ else {
+ state->last_dir = dir;
+ state->last_seq = seq;
+ state->last_end = end;
+ state->retrans = 0;
+ }
+ }
+ /*
+ * Close the window of disabled window tracking :-)
+ */
+ if (sender->loose)
+ sender->loose--;
+
+ res = 1;
+ } else {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: %s ",
+ before(end, sender->td_maxend + 1) ?
+ after(seq, sender->td_end - receiver->td_maxwin - 1) ?
+ before(ack, receiver->td_end + 1) ?
+ after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
+ : "ACK is under the lower bound (possibly overly delayed ACK)"
+ : "ACK is over the upper bound (ACKed data has never seen yet)"
+ : "SEQ is under the lower bound (retransmitted already ACKed data)"
+ : "SEQ is over the upper bound (over the window of the receiver)");
+
+ res = ip_ct_tcp_be_liberal && !tcph->rst;
+ }
+
+ DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
+ "receiver end=%u maxend=%u maxwin=%u\n",
+ res, sender->td_end, sender->td_maxend, sender->td_maxwin,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+
+ return res;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+/* Update sender->td_end after NAT successfully mangled the packet */
+int ip_conntrack_tcp_update(struct sk_buff *skb,
+ struct ip_conntrack *conntrack,
+ int dir)
+{
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
+ __u32 end;
+#ifdef DEBUGP_VARS
+ struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
+ struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
+#endif
+
+ end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
+
+ WRITE_LOCK(&tcp_lock);
+ /*
+ * We have to worry for the ack in the reply packet only...
+ */
+ if (after(end, conntrack->proto.tcp.seen[dir].td_end))
+ conntrack->proto.tcp.seen[dir].td_end = end;
+ conntrack->proto.tcp.last_end = end;
+ WRITE_UNLOCK(&tcp_lock);
+ DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+
+ return 1;
+}
+
+EXPORT_SYMBOL(ip_conntrack_tcp_update);
+#endif
+
+#define TH_FIN 0x01
+#define TH_SYN 0x02
+#define TH_RST 0x04
+#define TH_PUSH 0x08
+#define TH_ACK 0x10
+#define TH_URG 0x20
+#define TH_ECE 0x40
+#define TH_CWR 0x80
+
+/* table of valid flag combinations - ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+{
+ [TH_SYN] = 1,
+ [TH_SYN|TH_ACK] = 1,
+ [TH_RST] = 1,
+ [TH_RST|TH_ACK] = 1,
+ [TH_RST|TH_ACK|TH_PUSH] = 1,
+ [TH_FIN|TH_ACK] = 1,
+ [TH_ACK] = 1,
+ [TH_ACK|TH_PUSH] = 1,
+ [TH_ACK|TH_URG] = 1,
+ [TH_ACK|TH_URG|TH_PUSH] = 1,
+ [TH_FIN|TH_ACK|TH_PUSH] = 1,
+ [TH_FIN|TH_ACK|TH_URG] = 1,
+ [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
+};
+
+/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
+static int tcp_error(struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr _tcph, *th;
+ unsigned int tcplen = skb->len - iph->ihl * 4;
+ u_int8_t tcpflags;
+
+ /* Smaller that minimal TCP header? */
+ th = skb_header_pointer(skb, iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Not whole TCP header or malformed packet */
+ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: truncated/malformed packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Checksum invalid? Ignore.
+ * We skip checking packets on the outgoing path
+ * because the semantic of CHECKSUM_HW is different there
+ * and moreover root might send raw packets.
+ */
+ /* FIXME: Source route IP option packets --RR */
+ if (hooknum == NF_IP_PRE_ROUTING
+ && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
+ skb->ip_summed == CHECKSUM_HW ? skb->csum
+ : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: bad TCP checksum ");
+ return -NF_ACCEPT;
+ }
+
+ /* Check TCP flags. */
+ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+ if (!tcp_valid_flags[tcpflags]) {
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: invalid TCP flag combination ");
+ return -NF_ACCEPT;
+ }
+
+ return NF_ACCEPT;
}
/* Returns verdict for packet, or -1 for invalid. */
@@ -172,103 +827,260 @@ static int tcp_packet(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
enum ip_conntrack_info ctinfo)
{
- enum tcp_conntrack newconntrack, oldtcpstate;
- struct tcphdr tcph;
-
- if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
- return -1;
- if (skb->len < skb->nh.iph->ihl * 4 + tcph.doff * 4)
- return -1;
-
- /* If only reply is a RST, we can consider ourselves not to
- have an established connection: this is a fairly common
- problem case, so we can delete the conntrack
- immediately. --RR */
- if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph.rst) {
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)conntrack);
+ enum tcp_conntrack new_state, old_state;
+ enum ip_conntrack_dir dir;
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *th, _tcph;
+ unsigned long timeout;
+ unsigned int index;
+
+ th = skb_header_pointer(skb, iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ BUG_ON(th == NULL);
+
+ WRITE_LOCK(&tcp_lock);
+ old_state = conntrack->proto.tcp.state;
+ dir = CTINFO2DIR(ctinfo);
+ index = get_conntrack_index(th);
+ new_state = tcp_conntracks[dir][index][old_state];
+
+ switch (new_state) {
+ case TCP_CONNTRACK_IGNORE:
+ /* Either SYN in ORIGINAL, or SYN/ACK in REPLY direction. */
+ if (index == TCP_SYNACK_SET
+ && conntrack->proto.tcp.last_index == TCP_SYN_SET
+ && conntrack->proto.tcp.last_dir != dir
+ && after(ntohl(th->ack_seq),
+ conntrack->proto.tcp.last_seq)) {
+ /* This SYN/ACK acknowledges a SYN that we earlier
+ * ignored as invalid. This means that the client and
+ * the server are both in sync, while the firewall is
+ * not. We kill this session and block the SYN/ACK so
+ * that the client cannot but retransmit its SYN and
+ * thus initiate a clean new session.
+ */
+ WRITE_UNLOCK(&tcp_lock);
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: killing out of sync session ");
+ if (del_timer(&conntrack->timeout))
+ conntrack->timeout.function((unsigned long)
+ conntrack);
+ return -NF_DROP;
+ }
+ conntrack->proto.tcp.last_index = index;
+ conntrack->proto.tcp.last_dir = dir;
+ conntrack->proto.tcp.last_seq = ntohl(th->seq);
+
+ WRITE_UNLOCK(&tcp_lock);
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: invalid SYN (ignored) ");
return NF_ACCEPT;
+ case TCP_CONNTRACK_MAX:
+ /* Invalid packet */
+ DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
+ dir, get_conntrack_index(th),
+ old_state);
+ WRITE_UNLOCK(&tcp_lock);
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: invalid state ");
+ return -NF_ACCEPT;
+ case TCP_CONNTRACK_SYN_SENT:
+ if (old_state >= TCP_CONNTRACK_TIME_WAIT) {
+ /* Attempt to reopen a closed connection.
+ * Delete this connection and look up again. */
+ WRITE_UNLOCK(&tcp_lock);
+ if (del_timer(&conntrack->timeout))
+ conntrack->timeout.function((unsigned long)
+ conntrack);
+ return -NF_REPEAT;
+ }
+ break;
+ case TCP_CONNTRACK_CLOSE:
+ if (index == TCP_RST_SET
+ && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+ && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET
+ && after(ntohl(th->ack_seq),
+ conntrack->proto.tcp.last_seq)) {
+ /* Ignore RST closing down invalid SYN
+ we had let trough. */
+ WRITE_UNLOCK(&tcp_lock);
+ if (LOG_INVALID(IPPROTO_TCP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_tcp: invalid RST (ignored) ");
+ return NF_ACCEPT;
+ }
+ /* Just fall trough */
+ default:
+ /* Keep compilers happy. */
+ break;
}
- WRITE_LOCK(&tcp_lock);
- oldtcpstate = conntrack->proto.tcp.state;
- newconntrack
- = tcp_conntracks
- [CTINFO2DIR(ctinfo)]
- [get_conntrack_index(&tcph)][oldtcpstate];
-
- /* Invalid */
- if (newconntrack == TCP_CONNTRACK_MAX) {
- DEBUGP("ip_conntrack_tcp: Invalid dir=%i index=%u conntrack=%u\n",
- CTINFO2DIR(ctinfo), get_conntrack_index(&tcph),
- conntrack->proto.tcp.state);
+ if (!tcp_in_window(&conntrack->proto.tcp, dir, &index,
+ skb, iph, th)) {
WRITE_UNLOCK(&tcp_lock);
- return -1;
+ return -NF_ACCEPT;
}
+ /* From now on we have got in-window packets */
+
+ /* If FIN was trimmed off, we don't change state. */
+ conntrack->proto.tcp.last_index = index;
+ new_state = tcp_conntracks[dir][index][old_state];
- conntrack->proto.tcp.state = newconntrack;
-
- /* Poor man's window tracking: record SYN/ACK for handshake check */
- if (oldtcpstate == TCP_CONNTRACK_SYN_SENT
- && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
- && tcph.syn && tcph.ack) {
- conntrack->proto.tcp.handshake_ack
- = htonl(ntohl(tcph.seq) + 1);
- goto out;
- }
+ DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+ "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest),
+ (th->syn ? 1 : 0), (th->ack ? 1 : 0),
+ (th->fin ? 1 : 0), (th->rst ? 1 : 0),
+ old_state, new_state);
- /* Set ASSURED if we see valid ack in ESTABLISHED after SYN_RECV */
- if (oldtcpstate == TCP_CONNTRACK_SYN_RECV
- && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL
- && tcph.ack && !tcph.syn
- && tcph.ack_seq == conntrack->proto.tcp.handshake_ack)
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ conntrack->proto.tcp.state = new_state;
+ timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
+ && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
+ ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+ WRITE_UNLOCK(&tcp_lock);
-out: WRITE_UNLOCK(&tcp_lock);
- ip_ct_refresh(conntrack, *tcp_timeouts[newconntrack]);
+ if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+ /* If only reply is a RST, we can consider ourselves not to
+ have an established connection: this is a fairly common
+ problem case, so we can delete the conntrack
+ immediately. --RR */
+ if (th->rst) {
+ if (del_timer(&conntrack->timeout))
+ conntrack->timeout.function((unsigned long)
+ conntrack);
+ return NF_ACCEPT;
+ }
+ } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+ && (old_state == TCP_CONNTRACK_SYN_RECV
+ || old_state == TCP_CONNTRACK_ESTABLISHED)
+ && new_state == TCP_CONNTRACK_ESTABLISHED) {
+ /* Set ASSURED if we see see valid ack in ESTABLISHED
+ after SYN_RECV or a valid answer for a picked up
+ connection. */
+ set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ }
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
return NF_ACCEPT;
}
-
-/* Called when a new connection for this protocol found. */
-static int tcp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
+
+ /* Called when a new connection for this protocol found. */
+static int tcp_new(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb)
{
- enum tcp_conntrack newconntrack;
- struct tcphdr tcph;
-
- if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
- return -1;
+ enum tcp_conntrack new_state;
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *th, _tcph;
+#ifdef DEBUGP_VARS
+ struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
+ struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
+#endif
+ th = skb_header_pointer(skb, iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ BUG_ON(th == NULL);
+
/* Don't need lock here: this conntrack not in circulation yet */
- newconntrack
- = tcp_conntracks[0][get_conntrack_index(&tcph)]
+ new_state
+ = tcp_conntracks[0][get_conntrack_index(th)]
[TCP_CONNTRACK_NONE];
/* Invalid: delete conntrack */
- if (newconntrack == TCP_CONNTRACK_MAX) {
- DEBUGP("ip_conntrack_tcp: invalid new deleting.\n");
+ if (new_state >= TCP_CONNTRACK_MAX) {
+ DEBUGP("ip_ct_tcp: invalid new deleting.\n");
+ return 0;
+ }
+
+ if (new_state == TCP_CONNTRACK_SYN_SENT) {
+ /* SYN packet */
+ conntrack->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len,
+ iph, th);
+ conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+ conntrack->proto.tcp.seen[0].td_maxwin = 1;
+ conntrack->proto.tcp.seen[0].td_maxend =
+ conntrack->proto.tcp.seen[0].td_end;
+
+ tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
+ conntrack->proto.tcp.seen[1].flags = 0;
+ conntrack->proto.tcp.seen[0].loose =
+ conntrack->proto.tcp.seen[1].loose = 0;
+ } else if (ip_ct_tcp_loose == 0) {
+ /* Don't try to pick up connections. */
return 0;
+ } else {
+ /*
+ * We are in the middle of a connection,
+ * its history is lost for us.
+ * Let's try to use the data from the packet.
+ */
+ conntrack->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len,
+ iph, th);
+ conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+ conntrack->proto.tcp.seen[0].td_maxwin = 1;
+ conntrack->proto.tcp.seen[0].td_maxend =
+ conntrack->proto.tcp.seen[0].td_end +
+ conntrack->proto.tcp.seen[0].td_maxwin;
+ conntrack->proto.tcp.seen[0].td_scale = 0;
+
+ /* We assume SACK. Should we assume window scaling too? */
+ conntrack->proto.tcp.seen[0].flags =
+ conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
+ conntrack->proto.tcp.seen[0].loose =
+ conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
}
+
+ conntrack->proto.tcp.seen[1].td_end = 0;
+ conntrack->proto.tcp.seen[1].td_maxend = 0;
+ conntrack->proto.tcp.seen[1].td_maxwin = 1;
+ conntrack->proto.tcp.seen[1].td_scale = 0;
- conntrack->proto.tcp.state = newconntrack;
+ /* tcp_packet will set them */
+ conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
+ conntrack->proto.tcp.last_index = TCP_NONE_SET;
+
+ DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
return 1;
}
-
+
static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp,
const struct sk_buff *skb)
{
const struct iphdr *iph = skb->nh.iph;
- struct tcphdr tcph;
+ struct tcphdr *th, _tcph;
unsigned int datalen;
- if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
+ th = skb_header_pointer(skb, iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return 0;
- datalen = skb->len - iph->ihl*4 - tcph.doff*4;
+ datalen = skb->len - iph->ihl*4 - th->doff*4;
- return between(exp->seq, ntohl(tcph.seq), ntohl(tcph.seq) + datalen);
+ return between(exp->seq, ntohl(th->seq), ntohl(th->seq) + datalen);
}
-struct ip_conntrack_protocol ip_conntrack_protocol_tcp
-= { { NULL, NULL }, IPPROTO_TCP, "tcp",
- tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack,
- tcp_packet, tcp_new, NULL, tcp_exp_matches_pkt, NULL };
+struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
+{
+ .proto = IPPROTO_TCP,
+ .name = "tcp",
+ .pkt_to_tuple = tcp_pkt_to_tuple,
+ .invert_tuple = tcp_invert_tuple,
+ .print_tuple = tcp_print_tuple,
+ .print_conntrack = tcp_print_conntrack,
+ .packet = tcp_packet,
+ .new = tcp_new,
+ .exp_matches_pkt = tcp_exp_matches_pkt,
+ .error = tcp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index a63c32d1840e..0fe9e9188fdf 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -12,6 +12,8 @@
#include <linux/netfilter.h>
#include <linux/in.h>
#include <linux/udp.h>
+#include <net/checksum.h>
+#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
unsigned long ip_ct_udp_timeout = 30*HZ;
@@ -21,14 +23,15 @@ static int udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct ip_conntrack_tuple *tuple)
{
- struct udphdr hdr;
+ struct udphdr _hdr, *hp;
/* Actually only need first 8 bytes. */
- if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0)
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
return 0;
- tuple->src.u.udp.port = hdr.source;
- tuple->dst.u.udp.port = hdr.dest;
+ tuple->src.u.udp.port = hp->source;
+ tuple->dst.u.udp.port = hp->dest;
return 1;
}
@@ -60,16 +63,17 @@ static unsigned int udp_print_conntrack(char *buffer,
/* Returns verdict for packet, and may modify conntracktype */
static int udp_packet(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
- enum ip_conntrack_info conntrackinfo)
+ enum ip_conntrack_info ctinfo)
{
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- ip_ct_refresh(conntrack, ip_ct_udp_timeout_stream);
+ ip_ct_refresh_acct(conntrack, ctinfo, skb,
+ ip_ct_udp_timeout_stream);
/* Also, more likely to be important, and not a probe */
set_bit(IPS_ASSURED_BIT, &conntrack->status);
} else
- ip_ct_refresh(conntrack, ip_ct_udp_timeout);
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
return NF_ACCEPT;
}
@@ -80,7 +84,60 @@ static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
return 1;
}
-struct ip_conntrack_protocol ip_conntrack_protocol_udp
-= { { NULL, NULL }, IPPROTO_UDP, "udp",
- udp_pkt_to_tuple, udp_invert_tuple, udp_print_tuple, udp_print_conntrack,
- udp_packet, udp_new, NULL, NULL, NULL };
+static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct iphdr *iph = skb->nh.iph;
+ unsigned int udplen = skb->len - iph->ihl * 4;
+ struct udphdr hdr;
+
+ /* Header is too small? */
+ if (skb_copy_bits(skb, iph->ihl*4, &hdr, sizeof(hdr)) != 0) {
+ if (LOG_INVALID(IPPROTO_UDP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_udp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Truncated/malformed packets */
+ if (ntohs(hdr.len) > udplen || ntohs(hdr.len) < sizeof(hdr)) {
+ if (LOG_INVALID(IPPROTO_UDP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_udp: truncated/malformed packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Packet with no checksum */
+ if (!hdr.check)
+ return NF_ACCEPT;
+
+ /* Checksum invalid? Ignore.
+ * We skip checking packets on the outgoing path
+ * because the semantic of CHECKSUM_HW is different there
+ * and moreover root might send raw packets.
+ * FIXME: Source route IP option packets --RR */
+ if (hooknum == NF_IP_PRE_ROUTING
+ && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP,
+ skb->ip_summed == CHECKSUM_HW ? skb->csum
+ : skb_checksum(skb, iph->ihl*4, udplen, 0))) {
+ if (LOG_INVALID(IPPROTO_UDP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ "ip_ct_udp: bad UDP checksum ");
+ return -NF_ACCEPT;
+ }
+
+ return NF_ACCEPT;
+}
+
+struct ip_conntrack_protocol ip_conntrack_protocol_udp =
+{
+ .proto = IPPROTO_UDP,
+ .name = "udp",
+ .pkt_to_tuple = udp_pkt_to_tuple,
+ .invert_tuple = udp_invert_tuple,
+ .print_tuple = udp_print_tuple,
+ .print_conntrack = udp_print_conntrack,
+ .packet = udp_packet,
+ .new = udp_new,
+ .error = udp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 80edac904188..86010ea65de1 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -20,6 +20,8 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
@@ -43,6 +45,9 @@
MODULE_LICENSE("GPL");
+extern atomic_t ip_conntrack_count;
+DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
static int kill_proto(const struct ip_conntrack *i, void *data)
{
return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
@@ -63,128 +68,305 @@ print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple,
return len;
}
-/* FIXME: Don't print source proto part. --RR */
+#ifdef CONFIG_IP_NF_CT_ACCT
static unsigned int
-print_expect(char *buffer, const struct ip_conntrack_expect *expect)
+seq_print_counters(struct seq_file *s, struct ip_conntrack_counter *counter)
{
- unsigned int len;
-
- if (expect->expectant->helper->timeout)
- len = sprintf(buffer, "EXPECTING: %lu ",
- timer_pending(&expect->timeout)
- ? (expect->timeout.expires - jiffies)/HZ : 0);
- else
- len = sprintf(buffer, "EXPECTING: - ");
- len += sprintf(buffer + len, "use=%u proto=%u ",
- atomic_read(&expect->use), expect->tuple.dst.protonum);
- len += print_tuple(buffer + len, &expect->tuple,
- __ip_ct_find_proto(expect->tuple.dst.protonum));
- len += sprintf(buffer + len, "\n");
- return len;
+ return seq_printf(s, "packets=%llu bytes=%llu ",
+ counter->packets, counter->bytes);
}
+#else
+#define seq_print_counters(x, y) 0
+#endif
-static unsigned int
-print_conntrack(char *buffer, struct ip_conntrack *conntrack)
+static void *ct_seq_start(struct seq_file *s, loff_t *pos)
{
- unsigned int len;
- struct ip_conntrack_protocol *proto
- = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum);
+ unsigned int *bucket;
- len = sprintf(buffer, "%-8s %u %lu ",
- proto->name,
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum,
- timer_pending(&conntrack->timeout)
- ? (conntrack->timeout.expires - jiffies)/HZ : 0);
+ /* strange seq_file api calls stop even if we fail,
+ * thus we need to grab lock since stop unlocks */
+ READ_LOCK(&ip_conntrack_lock);
+
+ if (*pos >= ip_conntrack_htable_size)
+ return NULL;
- len += proto->print_conntrack(buffer + len, conntrack);
- len += print_tuple(buffer + len,
- &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- proto);
- if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
- len += sprintf(buffer + len, "[UNREPLIED] ");
- len += print_tuple(buffer + len,
- &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
- proto);
- if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
- len += sprintf(buffer + len, "[ASSURED] ");
- len += sprintf(buffer + len, "use=%u ",
- atomic_read(&conntrack->ct_general.use));
- len += sprintf(buffer + len, "\n");
+ bucket = kmalloc(sizeof(unsigned int), GFP_KERNEL);
+ if (!bucket) {
+ return ERR_PTR(-ENOMEM);
+ }
+
+ *bucket = *pos;
+ return bucket;
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ unsigned int *bucket = (unsigned int *) v;
- return len;
+ *pos = ++(*bucket);
+ if (*pos >= ip_conntrack_htable_size) {
+ kfree(v);
+ return NULL;
+ }
+ return bucket;
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+{
+ READ_UNLOCK(&ip_conntrack_lock);
}
-/* Returns true when finished. */
-static inline int
-conntrack_iterate(const struct ip_conntrack_tuple_hash *hash,
- char *buffer, off_t offset, off_t *upto,
- unsigned int *len, unsigned int maxlen)
+/* return 0 on success, 1 in case of error */
+static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
+ struct seq_file *s)
{
- unsigned int newlen;
- IP_NF_ASSERT(hash->ctrack);
+ struct ip_conntrack *conntrack = hash->ctrack;
+ struct ip_conntrack_protocol *proto;
+ char buffer[IP_CT_PRINT_BUFLEN];
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
- /* Only count originals */
+ IP_NF_ASSERT(conntrack);
+
+ /* we only want to print DIR_ORIGINAL */
if (DIRECTION(hash))
return 0;
- if ((*upto)++ < offset)
- return 0;
+ proto = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.protonum);
+ IP_NF_ASSERT(proto);
+
+ if (seq_printf(s, "%-8s %u %lu ",
+ proto->name,
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
+ timer_pending(&conntrack->timeout)
+ ? (conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
+ return 1;
+
+ proto->print_conntrack(buffer, conntrack);
+ if (seq_puts(s, buffer))
+ return 1;
+
+ print_tuple(buffer, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ proto);
+
+ if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
+ return 1;
- newlen = print_conntrack(buffer + *len, hash->ctrack);
- if (*len + newlen > maxlen)
+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
+ if (seq_printf(s, "[UNREPLIED] "))
+ return 1;
+
+ print_tuple(buffer, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
+ proto);
+ if (seq_puts(s, buffer))
+ return 1;
+
+ if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
+ return 1;
+
+ if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
+ if (seq_printf(s, "[ASSURED] "))
+ return 1;
+
+ if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
return 1;
- else *len += newlen;
return 0;
}
-static int
-list_conntracks(char *buffer, char **start, off_t offset, int length)
+
+static int ct_seq_show(struct seq_file *s, void *v)
{
- unsigned int i;
- unsigned int len = 0;
- off_t upto = 0;
- struct list_head *e;
+ unsigned int *bucket = (unsigned int *) v;
- READ_LOCK(&ip_conntrack_lock);
- /* Traverse hash; print originals then reply. */
- for (i = 0; i < ip_conntrack_htable_size; i++) {
- if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate,
- struct ip_conntrack_tuple_hash *,
- buffer, offset, &upto, &len, length))
- goto finished;
+ if (LIST_FIND(&ip_conntrack_hash[*bucket], ct_seq_real_show,
+ struct ip_conntrack_tuple_hash *, s)) {
+ /* buffer was filled and unable to print that tuple */
+ return 1;
}
+ return 0;
+}
+
+static struct seq_operations ct_seq_ops = {
+ .start = ct_seq_start,
+ .next = ct_seq_next,
+ .stop = ct_seq_stop,
+ .show = ct_seq_show
+};
+
+static int ct_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ct_seq_ops);
+}
- /* Now iterate through expecteds. */
+static struct file_operations ct_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+/* expects */
+static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct list_head *e = &ip_conntrack_expect_list;
+ loff_t i;
+
+ /* strange seq_file api calls stop even if we fail,
+ * thus we need to grab lock since stop unlocks */
+ READ_LOCK(&ip_conntrack_lock);
READ_LOCK(&ip_conntrack_expect_tuple_lock);
- list_for_each(e, &ip_conntrack_expect_list) {
- unsigned int last_len;
- struct ip_conntrack_expect *expect
- = (struct ip_conntrack_expect *)e;
- if (upto++ < offset) continue;
-
- last_len = len;
- len += print_expect(buffer + len, expect);
- if (len > length) {
- len = last_len;
- goto finished_expects;
- }
+
+ if (list_empty(e))
+ return NULL;
+
+ for (i = 0; i <= *pos; i++) {
+ e = e->next;
+ if (e == &ip_conntrack_expect_list)
+ return NULL;
}
+ return e;
+}
- finished_expects:
+static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct list_head *e = v;
+
+ e = e->next;
+
+ if (e == &ip_conntrack_expect_list)
+ return NULL;
+
+ return e;
+}
+
+static void exp_seq_stop(struct seq_file *s, void *v)
+{
READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
- finished:
READ_UNLOCK(&ip_conntrack_lock);
+}
- /* `start' hack - see fs/proc/generic.c line ~165 */
- *start = (char *)((unsigned int)upto - offset);
- return len;
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+ struct ip_conntrack_expect *expect = v;
+ char buffer[IP_CT_PRINT_BUFLEN];
+
+ if (expect->expectant->helper->timeout)
+ seq_printf(s, "%lu ", timer_pending(&expect->timeout)
+ ? (expect->timeout.expires - jiffies)/HZ : 0);
+ else
+ seq_printf(s, "- ");
+
+ seq_printf(s, "use=%u proto=%u ", atomic_read(&expect->use),
+ expect->tuple.dst.protonum);
+
+ print_tuple(buffer, &expect->tuple,
+ __ip_ct_find_proto(expect->tuple.dst.protonum));
+ return seq_printf(s, "%s\n", buffer);
+}
+
+static struct seq_operations exp_seq_ops = {
+ .start = exp_seq_start,
+ .next = exp_seq_next,
+ .stop = exp_seq_stop,
+ .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &exp_seq_ops);
+}
+
+static struct file_operations exp_file_ops = {
+ .owner = THIS_MODULE,
+ .open = exp_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ int cpu;
+
+ for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu;
+ return &per_cpu(ip_conntrack_stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ int cpu;
+
+ for (cpu = *pos + 1; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu;
+ return &per_cpu(ip_conntrack_stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
+ struct ip_conntrack_stat *st = v;
+
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x %08x %08x \n",
+ nr_conntracks,
+ st->searched,
+ st->found,
+ st->new,
+ st->invalid,
+ st->ignore,
+ st->delete,
+ st->delete_list,
+ st->insert,
+ st->insert_failed,
+ st->drop,
+ st->early_drop,
+ st->icmp_error,
+
+ st->expect_new,
+ st->expect_create,
+ st->expect_delete
+ );
+ return 0;
+}
+
+static struct seq_operations ct_cpu_seq_ops = {
+ .start = ct_cpu_seq_start,
+ .next = ct_cpu_seq_next,
+ .stop = ct_cpu_seq_stop,
+ .show = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ct_cpu_seq_ops);
}
+static struct file_operations ct_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ct_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
static unsigned int ip_confirm(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
@@ -323,6 +505,10 @@ extern unsigned long ip_ct_tcp_timeout_close_wait;
extern unsigned long ip_ct_tcp_timeout_last_ack;
extern unsigned long ip_ct_tcp_timeout_time_wait;
extern unsigned long ip_ct_tcp_timeout_close;
+extern unsigned long ip_ct_tcp_timeout_max_retrans;
+extern int ip_ct_tcp_loose;
+extern int ip_ct_tcp_be_liberal;
+extern int ip_ct_tcp_max_retrans;
/* From ip_conntrack_proto_udp.c */
extern unsigned long ip_ct_udp_timeout;
@@ -334,6 +520,11 @@ extern unsigned long ip_ct_icmp_timeout;
/* From ip_conntrack_proto_icmp.c */
extern unsigned long ip_ct_generic_timeout;
+/* Log invalid packets of a given protocol */
+unsigned int ip_ct_log_invalid = 0;
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
+
static struct ctl_table_header *ip_ct_sysctl_header;
static ctl_table ip_ct_sysctl_table[] = {
@@ -449,6 +640,49 @@ static ctl_table ip_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
+ .procname = "ip_conntrack_log_invalid",
+ .data = &ip_ct_log_invalid,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &log_invalid_proto_min,
+ .extra2 = &log_invalid_proto_max,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
+ .procname = "ip_conntrack_tcp_timeout_max_retrans",
+ .data = &ip_ct_tcp_timeout_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
+ .procname = "ip_conntrack_tcp_loose",
+ .data = &ip_ct_tcp_loose,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
+ .procname = "ip_conntrack_tcp_be_liberal",
+ .data = &ip_ct_tcp_be_liberal,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
+ .procname = "ip_conntrack_tcp_max_retrans",
+ .data = &ip_ct_tcp_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
@@ -494,7 +728,7 @@ static ctl_table ip_ct_net_table[] = {
#endif
static int init_or_cleanup(int init)
{
- struct proc_dir_entry *proc;
+ struct proc_dir_entry *proc, *proc_exp, *proc_stat;
int ret = 0;
if (!init) goto cleanup;
@@ -503,14 +737,24 @@ static int init_or_cleanup(int init)
if (ret < 0)
goto cleanup_nothing;
- proc = proc_net_create("ip_conntrack", 0440, list_conntracks);
+ proc = proc_net_create("ip_conntrack", 0440, NULL);
if (!proc) goto cleanup_init;
- proc->owner = THIS_MODULE;
+ proc->proc_fops = &ct_file_ops;
+
+ proc_exp = proc_net_create("ip_conntrack_expect", 0440, NULL);
+ if (!proc_exp) goto cleanup_proc;
+ proc_exp->proc_fops = &exp_file_ops;
+
+ proc_stat = proc_net_fops_create("ip_conntrack_stat", S_IRUGO,
+ &ct_cpu_seq_fops);
+ if (!proc_stat)
+ goto cleanup_proc_exp;
+ proc_stat->owner = THIS_MODULE;
ret = nf_register_hook(&ip_conntrack_defrag_ops);
if (ret < 0) {
printk("ip_conntrack: can't register pre-routing defrag hook.\n");
- goto cleanup_proc;
+ goto cleanup_proc_stat;
}
ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
if (ret < 0) {
@@ -562,6 +806,10 @@ static int init_or_cleanup(int init)
nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
cleanup_defragops:
nf_unregister_hook(&ip_conntrack_defrag_ops);
+ cleanup_proc_stat:
+ proc_net_remove("ip_conntrack_stat");
+cleanup_proc_exp:
+ proc_net_remove("ip_conntrack_exp");
cleanup_proc:
proc_net_remove("ip_conntrack");
cleanup_init:
@@ -638,7 +886,7 @@ EXPORT_SYMBOL(need_ip_conntrack);
EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
EXPORT_SYMBOL(ip_ct_selective_cleanup);
-EXPORT_SYMBOL(ip_ct_refresh);
+EXPORT_SYMBOL(ip_ct_refresh_acct);
EXPORT_SYMBOL(ip_ct_find_proto);
EXPORT_SYMBOL(__ip_ct_find_proto);
EXPORT_SYMBOL(ip_ct_find_helper);
@@ -657,3 +905,4 @@ EXPORT_SYMBOL(ip_conntrack_hash);
EXPORT_SYMBOL(ip_conntrack_untracked);
EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
EXPORT_SYMBOL_GPL(ip_conntrack_put);
+EXPORT_SYMBOL(ip_ct_log_invalid);
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index ddee7378209d..d132a3c48d8d 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -19,6 +19,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
+#include <linux/moduleparam.h>
MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
MODULE_DESCRIPTION("tftp connection tracking helper");
@@ -27,7 +28,7 @@ MODULE_LICENSE("GPL");
#define MAX_PORTS 8
static int ports[MAX_PORTS];
static int ports_c;
-MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+module_param_array(ports, int, ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#if 0
@@ -41,14 +42,16 @@ static int tftp_help(struct sk_buff *skb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
- struct tftphdr tftph;
+ struct tftphdr _tftph, *tfh;
struct ip_conntrack_expect *exp;
- if (skb_copy_bits(skb, skb->nh.iph->ihl * 4 + sizeof(struct udphdr),
- &tftph, sizeof(tftph)) != 0)
+ tfh = skb_header_pointer(skb,
+ skb->nh.iph->ihl * 4 + sizeof(struct udphdr),
+ sizeof(_tftph), &_tftph);
+ if (tfh == NULL)
return NF_ACCEPT;
- switch (ntohs(tftph.opcode)) {
+ switch (ntohs(tfh->opcode)) {
/* RRQ and WRQ works the same way */
case TFTP_OPCODE_READ:
case TFTP_OPCODE_WRITE:
@@ -104,10 +107,10 @@ static int __init init(void)
int i, ret;
char *tmpname;
- if (!ports[0])
- ports[0]=TFTP_PORT;
+ if (ports_c == 0)
+ ports[ports_c++] = TFTP_PORT;
- for (i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) {
+ for (i = 0; i < ports_c; i++) {
/* Create helper structure */
memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
@@ -137,7 +140,6 @@ static int __init init(void)
fini();
return(ret);
}
- ports_c++;
}
return(0);
}
diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c
index fbd8b9bfeb28..48227fe19dc2 100644
--- a/net/ipv4/netfilter/ip_fw_compat_masq.c
+++ b/net/ipv4/netfilter/ip_fw_compat_masq.c
@@ -31,6 +31,7 @@
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
@@ -144,7 +145,8 @@ check_for_demasq(struct sk_buff **pskb)
switch ((*pskb)->nh.iph->protocol) {
case IPPROTO_ICMP:
/* ICMP errors. */
- ct = icmp_error_track(*pskb, &ctinfo, NF_IP_PRE_ROUTING);
+ protocol->error(*pskb, &ctinfo, NF_IP_PRE_ROUTING);
+ ct = (struct ip_conntrack *)(*pskb)->nfct->master;
if (ct) {
/* We only do SNAT in the compatibility layer.
So we can manipulate ICMP errors from
@@ -165,7 +167,8 @@ check_for_demasq(struct sk_buff **pskb)
case IPPROTO_UDP:
IP_NF_ASSERT(((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
- if (!get_tuple((*pskb)->nh.iph, *pskb, (*pskb)->nh.iph->ihl*4, &tuple, protocol)) {
+ if (!ip_ct_get_tuple((*pskb)->nh.iph, *pskb,
+ (*pskb)->nh.iph->ihl*4, &tuple, protocol)) {
if (net_ratelimit())
printk("ip_fw_compat_masq: Can't get tuple\n");
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1c6b7810655a..d350134dacb1 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -49,7 +49,6 @@ static unsigned int ip_nat_htable_size;
static struct list_head *bysource;
static struct list_head *byipsproto;
LIST_HEAD(protos);
-LIST_HEAD(helpers);
extern struct ip_nat_protocol unknown_nat_protocol;
@@ -498,13 +497,6 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
return ret;
}
-static inline int
-helper_cmp(const struct ip_nat_helper *helper,
- const struct ip_conntrack_tuple *tuple)
-{
- return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
-}
-
/* Where to manip the reply packets (will be reverse manip). */
static unsigned int opposite_hook[NF_IP_NUMHOOKS]
= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
@@ -643,8 +635,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
/* If there's a helper, assign it; based on new tuple. */
if (!conntrack->master)
- info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
- &reply);
+ info->helper = ip_nat_find_helper(&reply);
/* It's done. */
info->initialized |= (1 << HOOK2MANIP(hooknum));
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index 946ca05bb90f..ae15f410bc4b 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -12,6 +12,7 @@
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/tcp.h>
+#include <linux/moduleparam.h>
#include <net/tcp.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
@@ -33,7 +34,7 @@ MODULE_DESCRIPTION("ftp NAT helper");
static int ports[MAX_PORTS];
static int ports_c;
-MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+module_param_array(ports, int, ports_c, 0400);
DECLARE_LOCK_EXTERN(ip_ftp_lock);
@@ -313,10 +314,10 @@ static int __init init(void)
int i, ret = 0;
char *tmpname;
- if (ports[0] == 0)
- ports[0] = FTP_PORT;
+ if (ports_c == 0)
+ ports[ports_c] = FTP_PORT;
- for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ for (i = 0; i < ports_c; i++) {
ftp[i].tuple.dst.protonum = IPPROTO_TCP;
ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i].mask.dst.protonum = 0xFFFF;
@@ -343,7 +344,6 @@ static int __init init(void)
fini();
return ret;
}
- ports_c++;
}
return ret;
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index a49c722adbc1..2e8d021aff44 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -47,6 +47,7 @@
#define DUMP_OFFSET(x)
#endif
+static LIST_HEAD(helpers);
DECLARE_LOCK(ip_nat_seqofs_lock);
/* Setup TCP sequence correction given this change at this sequence */
@@ -419,6 +420,18 @@ int ip_nat_helper_register(struct ip_nat_helper *me)
return ret;
}
+struct ip_nat_helper *
+ip_nat_find_helper(const struct ip_conntrack_tuple *tuple)
+{
+ struct ip_nat_helper *h;
+
+ READ_LOCK(&ip_nat_lock);
+ h = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, tuple);
+ READ_UNLOCK(&ip_nat_lock);
+
+ return h;
+}
+
static int
kill_helper(const struct ip_conntrack *i, void *helper)
{
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index dc778dd4ab2c..06555b44e49b 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -27,6 +27,7 @@
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/moduleparam.h>
#if 0
#define DEBUGP printk
@@ -41,7 +42,7 @@ static int ports_c;
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("IRC (DCC) NAT helper");
MODULE_LICENSE("GPL");
-MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+module_param_array(ports, int, ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
/* protects irc part of conntracks */
@@ -235,11 +236,10 @@ static int __init init(void)
struct ip_nat_helper *hlpr;
char *tmpname;
- if (ports[0] == 0) {
- ports[0] = IRC_PORT;
- }
+ if (ports_c == 0)
+ ports[ports_c++] = IRC_PORT;
- for (i = 0; (i < MAX_PORTS) && ports[i] != 0; i++) {
+ for (i = 0; i < ports_c; i++) {
hlpr = &ip_nat_irc_helpers[i];
hlpr->tuple.dst.protonum = IPPROTO_TCP;
hlpr->tuple.src.u.tcp.port = htons(ports[i]);
@@ -269,7 +269,6 @@ static int __init init(void)
fini();
return 1;
}
- ports_c++;
}
return ret;
}
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 145b2c57368a..8ee96d10449a 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -132,7 +132,8 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
ct = ip_conntrack_get(*pskb, &ctinfo);
/* Connection must be valid and new. */
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+ || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
IP_NF_ASSERT(out);
return ip_nat_setup_info(ct, targinfo, hooknum);
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 539ad18f8131..32d6e966aa13 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -47,6 +47,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
@@ -1252,6 +1253,9 @@ static unsigned int nat_help(struct ip_conntrack *ct,
int dir = CTINFO2DIR(ctinfo);
struct iphdr *iph = (*pskb)->nh.iph;
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+
+ if (!skb_ip_make_writable(pskb, (*pskb)->len))
+ return NF_DROP;
spin_lock_bh(&snmp_lock);
@@ -1357,4 +1361,4 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
-MODULE_PARM(debug, "i");
+module_param(debug, bool, 0600);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 9765fd2d5cf1..62ef0d1f7554 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -99,11 +99,13 @@ ip_nat_fn(unsigned int hooknum,
hash table yet). We must not let this through, in
case we're doing NAT to the same network. */
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- struct icmphdr hdr;
+ struct icmphdr _hdr, *hp;
- if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
- &hdr, sizeof(hdr)) == 0
- && hdr.type == ICMP_REDIRECT)
+ hp = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_hdr), &_hdr);
+ if (hp != NULL &&
+ hp->type == ICMP_REDIRECT)
return NF_DROP;
}
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index a2097bfbefb3..cacaab6f768c 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -32,6 +32,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/moduleparam.h>
MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
MODULE_DESCRIPTION("tftp NAT helper");
@@ -41,7 +42,7 @@ MODULE_LICENSE("GPL");
static int ports[MAX_PORTS];
static int ports_c = 0;
-MODULE_PARM(ports,"1-" __MODULE_STRING(MAX_PORTS) "i");
+module_param_array(ports, int, ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#if 0
@@ -59,7 +60,7 @@ tftp_nat_help(struct ip_conntrack *ct,
struct sk_buff **pskb)
{
int dir = CTINFO2DIR(ctinfo);
- struct tftphdr tftph;
+ struct tftphdr _tftph, *tfh;
struct ip_conntrack_tuple repl;
if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL)
@@ -71,11 +72,13 @@ tftp_nat_help(struct ip_conntrack *ct,
return NF_ACCEPT;
}
- if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
- &tftph, sizeof(tftph)) != 0)
+ tfh = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
+ sizeof(_tftph), &_tftph);
+ if (tfh == NULL)
return NF_DROP;
- switch (ntohs(tftph.opcode)) {
+ switch (ntohs(tfh->opcode)) {
/* RRQ and WRQ works the same way */
case TFTP_OPCODE_READ:
case TFTP_OPCODE_WRITE:
@@ -108,9 +111,12 @@ tftp_nat_expected(struct sk_buff **pskb,
#if 0
const struct ip_conntrack_tuple *repl =
&master->tuplehash[IP_CT_DIR_REPLY].tuple;
- struct udphdr udph;
+ struct udphdr _udph, *uh;
- if (skb_copy_bits(*pskb,(*pskb)->nh.iph->ihl*4,&udph,sizeof(udph))!=0)
+ uh = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL)
return NF_DROP;
#endif
@@ -125,8 +131,8 @@ tftp_nat_expected(struct sk_buff **pskb,
mr.range[0].min_ip = mr.range[0].max_ip = orig->dst.ip;
DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u "
"newsrc: %u.%u.%u.%u\n",
- NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph.source),
- NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph.dest),
+ NIPQUAD((*pskb)->nh.iph->saddr), ntohs(uh->source),
+ NIPQUAD((*pskb)->nh.iph->daddr), ntohs(uh->dest),
NIPQUAD(orig->dst.ip));
} else {
mr.range[0].min_ip = mr.range[0].max_ip = orig->src.ip;
@@ -136,8 +142,8 @@ tftp_nat_expected(struct sk_buff **pskb,
DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u "
"newdst: %u.%u.%u.%u:%u\n",
- NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph.source),
- NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph.dest),
+ NIPQUAD((*pskb)->nh.iph->saddr), ntohs(uh->source),
+ NIPQUAD((*pskb)->nh.iph->daddr), ntohs(uh->dest),
NIPQUAD(orig->src.ip), ntohs(orig->src.u.udp.port));
}
@@ -162,10 +168,10 @@ static int __init init(void)
int i, ret = 0;
char *tmpname;
- if (!ports[0])
- ports[0] = TFTP_PORT;
+ if (ports_c == 0)
+ ports[ports_c++] = TFTP_PORT;
- for (i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) {
+ for (i = 0; i < ports_c; i++) {
memset(&tftp[i], 0, sizeof(struct ip_nat_helper));
tftp[i].tuple.dst.protonum = IPPROTO_UDP;
@@ -194,7 +200,6 @@ static int __init init(void)
fini();
return ret;
}
- ports_c++;
}
return ret;
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f24f17b8e03e..bd2e13211a7f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -61,6 +61,8 @@ do { \
#endif
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+static DECLARE_MUTEX(ipt_mutex);
+
/* Must have mutex */
#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
@@ -1458,21 +1460,24 @@ tcp_find_option(u_int8_t option,
int *hotdrop)
{
/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
- u_int8_t opt[60 - sizeof(struct tcphdr)];
+ u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
unsigned int i;
duprintf("tcp_match: finding option\n");
/* If we don't have the whole header, drop packet. */
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
- opt, optlen) < 0) {
+ BUG_ON(!optlen);
+ op = skb_header_pointer(skb,
+ skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
+ optlen, _opt);
+ if (op == NULL) {
*hotdrop = 1;
return 0;
}
for (i = 0; i < optlen; ) {
- if (opt[i] == option) return !invert;
- if (opt[i] < 2) i++;
- else i += opt[i+1]?:1;
+ if (op[i] == option) return !invert;
+ if (op[i] < 2) i++;
+ else i += op[i+1]?:1;
}
return invert;
@@ -1486,7 +1491,7 @@ tcp_match(const struct sk_buff *skb,
int offset,
int *hotdrop)
{
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
const struct ipt_tcp *tcpinfo = matchinfo;
if (offset) {
@@ -1506,7 +1511,9 @@ tcp_match(const struct sk_buff *skb,
#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
+ th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
/* We've been asked to examine this packet, and we
can't. Hence, no choice but to drop. */
duprintf("Dropping evil TCP offset=0 tinygram.\n");
@@ -1515,23 +1522,24 @@ tcp_match(const struct sk_buff *skb,
}
if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
- ntohs(tcph.source),
+ ntohs(th->source),
!!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
return 0;
if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
- ntohs(tcph.dest),
+ ntohs(th->dest),
!!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
return 0;
- if (!FWINVTCP((((unsigned char *)&tcph)[13] & tcpinfo->flg_mask)
+ if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
== tcpinfo->flg_cmp,
IPT_TCP_INV_FLAGS))
return 0;
if (tcpinfo->option) {
- if (tcph.doff * 4 < sizeof(tcph)) {
+ if (th->doff * 4 < sizeof(_tcph)) {
*hotdrop = 1;
return 0;
}
- if (!tcp_find_option(tcpinfo->option, skb, tcph.doff*4 - sizeof(tcph),
+ if (!tcp_find_option(tcpinfo->option, skb,
+ th->doff*4 - sizeof(_tcph),
tcpinfo->invflags & IPT_TCP_INV_OPTION,
hotdrop))
return 0;
@@ -1564,14 +1572,16 @@ udp_match(const struct sk_buff *skb,
int offset,
int *hotdrop)
{
- struct udphdr udph;
+ struct udphdr _udph, *uh;
const struct ipt_udp *udpinfo = matchinfo;
/* Must not be a fragment. */
if (offset)
return 0;
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
+ uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL) {
/* We've been asked to examine this packet, and we
can't. Hence, no choice but to drop. */
duprintf("Dropping evil UDP tinygram.\n");
@@ -1580,10 +1590,10 @@ udp_match(const struct sk_buff *skb,
}
return port_match(udpinfo->spts[0], udpinfo->spts[1],
- ntohs(udph.source),
+ ntohs(uh->source),
!!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
- ntohs(udph.dest),
+ ntohs(uh->dest),
!!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
}
@@ -1635,16 +1645,19 @@ icmp_match(const struct sk_buff *skb,
int offset,
int *hotdrop)
{
- struct icmphdr icmph;
+ struct icmphdr _icmph, *ic;
const struct ipt_icmp *icmpinfo = matchinfo;
/* Must not be a fragment. */
if (offset)
return 0;
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph)) < 0){
+ ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ sizeof(_icmph), &_icmph);
+ if (ic == NULL) {
/* We've been asked to examine this packet, and we
- can't. Hence, no choice but to drop. */
+ * can't. Hence, no choice but to drop.
+ */
duprintf("Dropping evil ICMP tinygram.\n");
*hotdrop = 1;
return 0;
@@ -1653,7 +1666,7 @@ icmp_match(const struct sk_buff *skb,
return icmp_type_code_match(icmpinfo->type,
icmpinfo->code[0],
icmpinfo->code[1],
- icmph.type, icmph.code,
+ ic->type, ic->code,
!!(icmpinfo->invflags&IPT_ICMP_INV));
}
diff --git a/net/ipv4/netfilter/ipchains_core.c b/net/ipv4/netfilter/ipchains_core.c
index 97b5401ef1ad..1360222c2537 100644
--- a/net/ipv4/netfilter/ipchains_core.c
+++ b/net/ipv4/netfilter/ipchains_core.c
@@ -679,49 +679,53 @@ ip_fw_check(const char *rif,
case IPPROTO_TCP:
dprintf("TCP ");
if (!offset) {
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
- if (skb_copy_bits(*pskb,
- (*pskb)->nh.iph->ihl * 4,
- &tcph, sizeof(tcph)))
+ th = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return FW_BLOCK;
- src_port = ntohs(tcph.source);
- dst_port = ntohs(tcph.dest);
+ src_port = ntohs(th->source);
+ dst_port = ntohs(th->dest);
/* Connection initilisation can only
* be made when the syn bit is set and
* neither of the ack or reset is
* set. */
- if (tcph.syn && !(tcph.ack || tcph.rst))
+ if (th->syn && !(th->ack || th->rst))
tcpsyn = 1;
}
break;
case IPPROTO_UDP:
dprintf("UDP ");
if (!offset) {
- struct udphdr udph;
+ struct udphdr _udph, *uh;
- if (skb_copy_bits(*pskb,
- (*pskb)->nh.iph->ihl * 4,
- &udph, sizeof(udph)))
+ uh = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL)
return FW_BLOCK;
- src_port = ntohs(udph.source);
- dst_port = ntohs(udph.dest);
+ src_port = ntohs(uh->source);
+ dst_port = ntohs(uh->dest);
}
break;
case IPPROTO_ICMP:
if (!offset) {
- struct icmphdr icmph;
+ struct icmphdr _icmph, *ic;
- if (skb_copy_bits(*pskb,
- (*pskb)->nh.iph->ihl * 4,
- &icmph, sizeof(icmph)))
+ ic = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_icmph),
+ &_icmph);
+ if (ic == NULL)
return FW_BLOCK;
- src_port = (__u16) icmph.type;
- dst_port = (__u16) icmph.code;
+ src_port = (__u16) ic->type;
+ dst_port = (__u16) ic->code;
}
dprintf("ICMP ");
break;
diff --git a/net/ipv4/netfilter/ipfwadm_core.c b/net/ipv4/netfilter/ipfwadm_core.c
index 424a9034fa27..c38a6887722d 100644
--- a/net/ipv4/netfilter/ipfwadm_core.c
+++ b/net/ipv4/netfilter/ipfwadm_core.c
@@ -410,20 +410,21 @@ int ip_fw_chk(struct sk_buff **pskb,
dprintf1("TCP ");
/* ports stay 0xFFFF if it is not the first fragment */
if (!offset) {
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
- if (skb_copy_bits(*pskb,
- (*pskb)->nh.iph->ihl * 4,
- &tcph, sizeof(tcph)))
+ th = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return FW_BLOCK;
- src_port = ntohs(tcph.source);
- dst_port = ntohs(tcph.dest);
+ src_port = ntohs(th->source);
+ dst_port = ntohs(th->dest);
- if(!tcph.ack && !tcph.rst)
+ if(!th->ack && !th->rst)
/* We do NOT have ACK, value TRUE */
notcpack = 1;
- if(!tcph.syn || !notcpack)
+ if(!th->syn || !notcpack)
/* We do NOT have SYN, value TRUE */
notcpsyn = 1;
}
@@ -433,29 +434,32 @@ int ip_fw_chk(struct sk_buff **pskb,
dprintf1("UDP ");
/* ports stay 0xFFFF if it is not the first fragment */
if (!offset) {
- struct udphdr udph;
+ struct udphdr _udph, *uh;
- if (skb_copy_bits(*pskb,
- (*pskb)->nh.iph->ihl * 4,
- &udph, sizeof(udph)))
+ uh = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL)
return FW_BLOCK;
- src_port = ntohs(udph.source);
- dst_port = ntohs(udph.dest);
+ src_port = ntohs(uh->source);
+ dst_port = ntohs(uh->dest);
}
prt = IP_FW_F_UDP;
break;
case IPPROTO_ICMP:
/* icmp_type stays 255 if it is not the first fragment */
if (!offset) {
- struct icmphdr icmph;
+ struct icmphdr _icmph, *ic;
- if (skb_copy_bits(*pskb,
- (*pskb)->nh.iph->ihl * 4,
- &icmph, sizeof(icmph)))
+ ic = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_icmph),
+ &_icmph);
+ if (ic == NULL)
return FW_BLOCK;
- icmp_type = (__u16) icmph.type;
+ icmp_type = (__u16) ic->type;
}
dprintf2("ICMP:%d ", icmp_type);
prt = IP_FW_F_ICMP;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 8ca402564f5e..120109cd294d 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -52,34 +52,39 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
static inline int
set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
{
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
u_int16_t diffs[2];
/* Not enought header? */
- if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4, &tcph, sizeof(tcph))
- < 0)
+ th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return 0;
- diffs[0] = ((u_int16_t *)&tcph)[6];
+ diffs[0] = ((u_int16_t *)th)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
- tcph.ece = einfo->proto.tcp.ece;
+ th->ece = einfo->proto.tcp.ece;
if (einfo->operation & IPT_ECN_OP_SET_CWR)
- tcph.cwr = einfo->proto.tcp.cwr;
- diffs[1] = ((u_int16_t *)&tcph)[6];
+ th->cwr = einfo->proto.tcp.cwr;
+ diffs[1] = ((u_int16_t *)&th)[6];
/* Only mangle if it's changed. */
if (diffs[0] != diffs[1]) {
diffs[0] = diffs[0] ^ 0xFFFF;
if (!skb_ip_make_writable(pskb,
- (*pskb)->nh.iph->ihl*4+sizeof(tcph)))
+ (*pskb)->nh.iph->ihl*4+sizeof(_tcph)))
return 0;
+
+ if (th != &_tcph)
+ memcpy(&_tcph, th, sizeof(_tcph));
+
if ((*pskb)->ip_summed != CHECKSUM_HW)
- tcph.check = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- tcph.check^0xFFFF));
+ _tcph.check = csum_fold(csum_partial((char *)diffs,
+ sizeof(diffs),
+ _tcph.check^0xFFFF));
memcpy((*pskb)->data + (*pskb)->nh.iph->ihl*4,
- &tcph, sizeof(tcph));
+ &_tcph, sizeof(_tcph));
if ((*pskb)->ip_summed == CHECKSUM_HW)
if (skb_checksum_help(pskb, inward))
return 0;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b79962e225f7..2a3e3eb424e3 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -28,7 +28,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("iptables syslog logging module");
static unsigned int nflog = 1;
-MODULE_PARM(nflog, "i");
+module_param(nflog, int, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
#if 0
@@ -45,9 +45,10 @@ static void dump_packet(const struct ipt_log_info *info,
const struct sk_buff *skb,
unsigned int iphoff)
{
- struct iphdr iph;
+ struct iphdr _iph, *ih;
- if (skb_copy_bits(skb, iphoff, &iph, sizeof(iph)) < 0) {
+ ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+ if (ih == NULL) {
printk("TRUNCATED");
return;
}
@@ -56,32 +57,34 @@ static void dump_packet(const struct ipt_log_info *info,
* TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
- NIPQUAD(iph.saddr), NIPQUAD(iph.daddr));
+ NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
- ntohs(iph.tot_len), iph.tos & IPTOS_TOS_MASK,
- iph.tos & IPTOS_PREC_MASK, iph.ttl, ntohs(iph.id));
+ ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+ ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
/* Max length: 6 "CE DF MF " */
- if (ntohs(iph.frag_off) & IP_CE)
+ if (ntohs(ih->frag_off) & IP_CE)
printk("CE ");
- if (ntohs(iph.frag_off) & IP_DF)
+ if (ntohs(ih->frag_off) & IP_DF)
printk("DF ");
- if (ntohs(iph.frag_off) & IP_MF)
+ if (ntohs(ih->frag_off) & IP_MF)
printk("MF ");
/* Max length: 11 "FRAG:65535 " */
- if (ntohs(iph.frag_off) & IP_OFFSET)
- printk("FRAG:%u ", ntohs(iph.frag_off) & IP_OFFSET);
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
if ((info->logflags & IPT_LOG_IPOPT)
- && iph.ihl * 4 > sizeof(struct iphdr)) {
- unsigned char opt[4 * 15 - sizeof(struct iphdr)];
+ && ih->ihl * 4 > sizeof(struct iphdr)) {
+ unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op;
unsigned int i, optsize;
- optsize = iph.ihl * 4 - sizeof(struct iphdr);
- if (skb_copy_bits(skb, iphoff+sizeof(iph), opt, optsize) < 0) {
+ optsize = ih->ihl * 4 - sizeof(struct iphdr);
+ op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+ optsize, _opt);
+ if (op == NULL) {
printk("TRUNCATED");
return;
}
@@ -89,67 +92,71 @@ static void dump_packet(const struct ipt_log_info *info,
/* Max length: 127 "OPT (" 15*4*2chars ") " */
printk("OPT (");
for (i = 0; i < optsize; i++)
- printk("%02X", opt[i]);
+ printk("%02X", op[i]);
printk(") ");
}
- switch (iph.protocol) {
+ switch (ih->protocol) {
case IPPROTO_TCP: {
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
/* Max length: 10 "PROTO=TCP " */
printk("PROTO=TCP ");
- if (ntohs(iph.frag_off) & IP_OFFSET)
+ if (ntohs(ih->frag_off) & IP_OFFSET)
break;
/* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (skb_copy_bits(skb, iphoff+iph.ihl*4, &tcph, sizeof(tcph))
- < 0) {
+ th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - iph.ihl*4);
+ skb->len - iphoff - ih->ihl*4);
break;
}
/* Max length: 20 "SPT=65535 DPT=65535 " */
printk("SPT=%u DPT=%u ",
- ntohs(tcph.source), ntohs(tcph.dest));
+ ntohs(th->source), ntohs(th->dest));
/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
if (info->logflags & IPT_LOG_TCPSEQ)
printk("SEQ=%u ACK=%u ",
- ntohl(tcph.seq), ntohl(tcph.ack_seq));
+ ntohl(th->seq), ntohl(th->ack_seq));
/* Max length: 13 "WINDOW=65535 " */
- printk("WINDOW=%u ", ntohs(tcph.window));
+ printk("WINDOW=%u ", ntohs(th->window));
/* Max length: 9 "RES=0x3F " */
- printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(&tcph) & TCP_RESERVED_BITS) >> 22));
+ printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
- if (tcph.cwr)
+ if (th->cwr)
printk("CWR ");
- if (tcph.ece)
+ if (th->ece)
printk("ECE ");
- if (tcph.urg)
+ if (th->urg)
printk("URG ");
- if (tcph.ack)
+ if (th->ack)
printk("ACK ");
- if (tcph.psh)
+ if (th->psh)
printk("PSH ");
- if (tcph.rst)
+ if (th->rst)
printk("RST ");
- if (tcph.syn)
+ if (th->syn)
printk("SYN ");
- if (tcph.fin)
+ if (th->fin)
printk("FIN ");
/* Max length: 11 "URGP=65535 " */
- printk("URGP=%u ", ntohs(tcph.urg_ptr));
+ printk("URGP=%u ", ntohs(th->urg_ptr));
if ((info->logflags & IPT_LOG_TCPOPT)
- && tcph.doff * 4 > sizeof(struct tcphdr)) {
- unsigned char opt[4 * 15 - sizeof(struct tcphdr)];
+ && th->doff * 4 > sizeof(struct tcphdr)) {
+ unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
+ unsigned char *op;
unsigned int i, optsize;
- optsize = tcph.doff * 4 - sizeof(struct tcphdr);
- if (skb_copy_bits(skb, iphoff+iph.ihl*4 + sizeof(tcph),
- opt, optsize) < 0) {
+ optsize = th->doff * 4 - sizeof(struct tcphdr);
+ op = skb_header_pointer(skb,
+ iphoff+ih->ihl*4+sizeof(_tcph),
+ optsize, _opt);
+ if (op == NULL) {
printk("TRUNCATED");
return;
}
@@ -157,36 +164,37 @@ static void dump_packet(const struct ipt_log_info *info,
/* Max length: 127 "OPT (" 15*4*2chars ") " */
printk("OPT (");
for (i = 0; i < optsize; i++)
- printk("%02X", opt[i]);
+ printk("%02X", op[i]);
printk(") ");
}
break;
}
case IPPROTO_UDP: {
- struct udphdr udph;
+ struct udphdr _udph, *uh;
/* Max length: 10 "PROTO=UDP " */
printk("PROTO=UDP ");
- if (ntohs(iph.frag_off) & IP_OFFSET)
+ if (ntohs(ih->frag_off) & IP_OFFSET)
break;
/* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (skb_copy_bits(skb, iphoff+iph.ihl*4, &udph, sizeof(udph))
- < 0) {
+ uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL) {
printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - iph.ihl*4);
+ skb->len - iphoff - ih->ihl*4);
break;
}
/* Max length: 20 "SPT=65535 DPT=65535 " */
printk("SPT=%u DPT=%u LEN=%u ",
- ntohs(udph.source), ntohs(udph.dest),
- ntohs(udph.len));
+ ntohs(uh->source), ntohs(uh->dest),
+ ntohs(uh->len));
break;
}
case IPPROTO_ICMP: {
- struct icmphdr icmph;
+ struct icmphdr _icmph, *ich;
static size_t required_len[NR_ICMP_TYPES+1]
= { [ICMP_ECHOREPLY] = 4,
[ICMP_DEST_UNREACH]
@@ -208,47 +216,48 @@ static void dump_packet(const struct ipt_log_info *info,
/* Max length: 11 "PROTO=ICMP " */
printk("PROTO=ICMP ");
- if (ntohs(iph.frag_off) & IP_OFFSET)
+ if (ntohs(ih->frag_off) & IP_OFFSET)
break;
/* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (skb_copy_bits(skb, iphoff+iph.ihl*4, &icmph, sizeof(icmph))
- < 0) {
+ ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_icmph), &_icmph);
+ if (ich == NULL) {
printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - iph.ihl*4);
+ skb->len - iphoff - ih->ihl*4);
break;
}
/* Max length: 18 "TYPE=255 CODE=255 " */
- printk("TYPE=%u CODE=%u ", icmph.type, icmph.code);
+ printk("TYPE=%u CODE=%u ", ich->type, ich->code);
/* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (icmph.type <= NR_ICMP_TYPES
- && required_len[icmph.type]
- && skb->len-iphoff-iph.ihl*4 < required_len[icmph.type]) {
+ if (ich->type <= NR_ICMP_TYPES
+ && required_len[ich->type]
+ && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - iph.ihl*4);
+ skb->len - iphoff - ih->ihl*4);
break;
}
- switch (icmph.type) {
+ switch (ich->type) {
case ICMP_ECHOREPLY:
case ICMP_ECHO:
/* Max length: 19 "ID=65535 SEQ=65535 " */
printk("ID=%u SEQ=%u ",
- ntohs(icmph.un.echo.id),
- ntohs(icmph.un.echo.sequence));
+ ntohs(ich->un.echo.id),
+ ntohs(ich->un.echo.sequence));
break;
case ICMP_PARAMETERPROB:
/* Max length: 14 "PARAMETER=255 " */
printk("PARAMETER=%u ",
- ntohl(icmph.un.gateway) >> 24);
+ ntohl(ich->un.gateway) >> 24);
break;
case ICMP_REDIRECT:
/* Max length: 24 "GATEWAY=255.255.255.255 " */
printk("GATEWAY=%u.%u.%u.%u ",
- NIPQUAD(icmph.un.gateway));
+ NIPQUAD(ich->un.gateway));
/* Fall through */
case ICMP_DEST_UNREACH:
case ICMP_SOURCE_QUENCH:
@@ -257,62 +266,65 @@ static void dump_packet(const struct ipt_log_info *info,
if (!iphoff) { /* Only recurse once. */
printk("[");
dump_packet(info, skb,
- iphoff + iph.ihl*4+sizeof(icmph));
+ iphoff + ih->ihl*4+sizeof(_icmph));
printk("] ");
}
/* Max length: 10 "MTU=65535 " */
- if (icmph.type == ICMP_DEST_UNREACH
- && icmph.code == ICMP_FRAG_NEEDED)
- printk("MTU=%u ", ntohs(icmph.un.frag.mtu));
+ if (ich->type == ICMP_DEST_UNREACH
+ && ich->code == ICMP_FRAG_NEEDED)
+ printk("MTU=%u ", ntohs(ich->un.frag.mtu));
}
break;
}
/* Max Length */
case IPPROTO_AH: {
- struct ip_auth_hdr ah;
+ struct ip_auth_hdr _ahdr, *ah;
- if (ntohs(iph.frag_off) & IP_OFFSET)
+ if (ntohs(ih->frag_off) & IP_OFFSET)
break;
/* Max length: 9 "PROTO=AH " */
printk("PROTO=AH ");
/* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (skb_copy_bits(skb, iphoff+iph.ihl*4, &ah, sizeof(ah)) < 0) {
+ ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - iph.ihl*4);
+ skb->len - iphoff - ih->ihl*4);
break;
}
/* Length: 15 "SPI=0xF1234567 " */
- printk("SPI=0x%x ", ntohl(ah.spi));
+ printk("SPI=0x%x ", ntohl(ah->spi));
break;
}
case IPPROTO_ESP: {
- struct ip_esp_hdr esph;
+ struct ip_esp_hdr _esph, *eh;
/* Max length: 10 "PROTO=ESP " */
printk("PROTO=ESP ");
- if (ntohs(iph.frag_off) & IP_OFFSET)
+ if (ntohs(ih->frag_off) & IP_OFFSET)
break;
/* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (skb_copy_bits(skb, iphoff+iph.ihl*4, &esph, sizeof(esph))
- < 0) {
+ eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_esph), &_esph);
+ if (eh == NULL) {
printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - iph.ihl*4);
+ skb->len - iphoff - ih->ihl*4);
break;
}
/* Length: 15 "SPI=0xF1234567 " */
- printk("SPI=0x%x ", ntohl(esph.spi));
+ printk("SPI=0x%x ", ntohl(eh->spi));
break;
}
/* Max length: 10 "PROTO 255 " */
default:
- printk("PROTO=%u ", iph.protocol);
+ printk("PROTO=%u ", ih->protocol);
}
/* Proto Max log string length */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 54bc4684cc9d..ea02a12d7625 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -92,8 +92,8 @@ masquerade_target(struct sk_buff **pskb,
return NF_ACCEPT;
ct = ip_conntrack_get(*pskb, &ctinfo);
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW
- || ctinfo == IP_CT_RELATED));
+ IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+ || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
mr = targinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b8018cb023ff..9637b75fd71e 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -103,7 +103,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, int hook)
static void send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
- struct tcphdr otcph, *tcph;
+ struct tcphdr _otcph, *oth, *tcph;
struct rtable *rt;
u_int16_t tmp_port;
u_int32_t tmp_addr;
@@ -114,12 +114,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
return;
- if (skb_copy_bits(oldskb, oldskb->nh.iph->ihl*4,
- &otcph, sizeof(otcph)) < 0)
+ oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
+ sizeof(_otcph), &_otcph);
+ if (oth == NULL)
return;
/* No RST for RST. */
- if (otcph.rst)
+ if (oth->rst)
return;
/* FIXME: Check checksum --RR */
@@ -167,13 +168,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
if (tcph->ack) {
needs_ack = 0;
- tcph->seq = otcph.ack_seq;
+ tcph->seq = oth->ack_seq;
tcph->ack_seq = 0;
} else {
needs_ack = 1;
- tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+ tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
+ oldskb->len - oldskb->nh.iph->ihl*4
- - (otcph.doff<<2));
+ - (oth->doff<<2));
tcph->seq = 0;
}
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index c352df54aa1f..51d16d33bcbd 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -34,8 +34,8 @@
* by that factor.
*
* flushtimeout:
- * Specify, after how many clock ticks (intel: 100 per second) the queue
- * should be flushed even if it is not full yet.
+ * Specify, after how many hundredths of a second the queue should be
+ * flushed even if it is not full yet.
*
* ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
*/
@@ -50,6 +50,7 @@
#include <linux/netlink.h>
#include <linux/netdevice.h>
#include <linux/mm.h>
+#include <linux/moduleparam.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h>
@@ -74,15 +75,15 @@ MODULE_DESCRIPTION("iptables userspace logging module");
#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
static unsigned int nlbufsiz = 4096;
-MODULE_PARM(nlbufsiz, "i");
+module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */
MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
-static unsigned int flushtimeout = 10 * HZ;
-MODULE_PARM(flushtimeout, "i");
-MODULE_PARM_DESC(flushtimeout, "buffer flush timeout");
+static unsigned int flushtimeout = 10;
+module_param(flushtimeout, int, 0600);
+MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
static unsigned int nflog = 1;
-MODULE_PARM(nflog, "i");
+module_param(nflog, int, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
/* global data structures */
@@ -97,7 +98,6 @@ typedef struct {
static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
static struct sock *nflognl; /* our socket */
-static size_t qlen; /* current length of multipart-nlmsg */
DECLARE_LOCK(ulog_lock); /* spinlock */
/* send one ulog_buff_t to userspace */
@@ -116,7 +116,7 @@ static void ulog_send(unsigned int nlgroupnum)
NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum);
DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n",
- ub->qlen, nlgroup);
+ ub->qlen, nlgroupnum);
netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC);
ub->qlen = 0;
@@ -126,7 +126,7 @@ static void ulog_send(unsigned int nlgroupnum)
}
-/* timer function to flush queue in ULOG_FLUSH_INTERVAL time */
+/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
@@ -261,20 +261,21 @@ static void ipt_ulog_packet(unsigned int hooknum,
ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
}
- /* if threshold is reached, send message to userspace */
- if (qlen >= loginfo->qthreshold) {
- if (loginfo->qthreshold > 1)
- nlh->nlmsg_type = NLMSG_DONE;
- }
-
ub->lastnlh = nlh;
/* if timer isn't already running, start it */
if (!timer_pending(&ub->timer)) {
- ub->timer.expires = jiffies + flushtimeout;
+ ub->timer.expires = jiffies + flushtimeout * HZ / 100;
add_timer(&ub->timer);
}
+ /* if threshold is reached, send message to userspace */
+ if (ub->qlen >= loginfo->qthreshold) {
+ if (loginfo->qthreshold > 1)
+ nlh->nlmsg_type = NLMSG_DONE;
+ ulog_send(groupnum);
+ }
+
UNLOCK_BH(&ulog_lock);
return;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 1f0d7652f6dc..a0fea847cb72 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -43,23 +43,26 @@ match(const struct sk_buff *skb,
int offset,
int *hotdrop)
{
- struct ip_auth_hdr ah;
+ struct ip_auth_hdr _ahdr, *ah;
const struct ipt_ah *ahinfo = matchinfo;
/* Must not be a fragment. */
if (offset)
return 0;
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &ah, sizeof(ah)) < 0) {
+ ah = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
/* We've been asked to examine this packet, and we
- can't. Hence, no choice but to drop. */
+ * can't. Hence, no choice but to drop.
+ */
duprintf("Dropping evil AH tinygram.\n");
*hotdrop = 1;
return 0;
}
return spi_match(ahinfo->spis[0], ahinfo->spis[1],
- ntohl(ah.spi),
+ ntohl(ah->spi),
!!(ahinfo->invflags & IPT_AH_INV_SPI));
}
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 0e1efd764fc6..b6f7181e89cc 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -30,31 +30,34 @@ static inline int match_tcp(const struct sk_buff *skb,
const struct ipt_ecn_info *einfo,
int *hotdrop)
{
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
/* In practice, TCP match does this, so can't fail. But let's
- be good citizens. */
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
+ * be good citizens.
+ */
+ th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
*hotdrop = 0;
return 0;
}
if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
- if (tcph.ece == 1)
+ if (th->ece == 1)
return 0;
} else {
- if (tcph.ece == 0)
+ if (th->ece == 0)
return 0;
}
}
if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
- if (tcph.cwr == 1)
+ if (th->cwr == 1)
return 0;
} else {
- if (tcph.cwr == 0)
+ if (th->cwr == 0)
return 0;
}
}
diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c
index c3b889378844..e1d0dd31e117 100644
--- a/net/ipv4/netfilter/ipt_esp.c
+++ b/net/ipv4/netfilter/ipt_esp.c
@@ -44,23 +44,26 @@ match(const struct sk_buff *skb,
int offset,
int *hotdrop)
{
- struct ip_esp_hdr esp;
+ struct ip_esp_hdr _esp, *eh;
const struct ipt_esp *espinfo = matchinfo;
/* Must not be a fragment. */
if (offset)
return 0;
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &esp, sizeof(esp)) < 0) {
+ eh = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_esp), &_esp);
+ if (eh == NULL) {
/* We've been asked to examine this packet, and we
- can't. Hence, no choice but to drop. */
+ * can't. Hence, no choice but to drop.
+ */
duprintf("Dropping evil ESP tinygram.\n");
*hotdrop = 1;
return 0;
}
return spi_match(espinfo->spis[0], espinfo->spis[1],
- ntohl(esp.spi),
+ ntohl(eh->spi),
!!(espinfo->invflags & IPT_ESP_INV_SPI));
}
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 64e7999b049c..7fdf41e22c86 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -54,7 +54,7 @@ match(const struct sk_buff *skb,
int offset,
int *hotdrop)
{
- u16 ports[2];
+ u16 _ports[2], *pptr;
const struct ipt_multiport *multiinfo = matchinfo;
/* Must not be a fragment. */
@@ -63,18 +63,21 @@ match(const struct sk_buff *skb,
/* Must be big enough to read ports (both UDP and TCP have
them at the start). */
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, ports, sizeof(ports)) < 0) {
+ pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_ports), &_ports[0]);
+ if (pptr == NULL) {
/* We've been asked to examine this packet, and we
- can't. Hence, no choice but to drop. */
- duprintf("ipt_multiport:"
- " Dropping evil offset=0 tinygram.\n");
- *hotdrop = 1;
- return 0;
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("ipt_multiport:"
+ " Dropping evil offset=0 tinygram.\n");
+ *hotdrop = 1;
+ return 0;
}
return ports_match(multiinfo->ports,
multiinfo->flags, multiinfo->count,
- ntohs(ports[0]), ntohs(ports[1]));
+ ntohs(pptr[0]), ntohs(pptr[1]));
}
/* Called when user tries to insert an entry of this type. */
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 08b786ac34dd..15472b3e9e56 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -15,6 +15,7 @@
#include <linux/ctype.h>
#include <linux/ip.h>
#include <linux/vmalloc.h>
+#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_recent.h>
@@ -37,12 +38,12 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. htt
MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
MODULE_LICENSE("GPL");
-MODULE_PARM(ip_list_tot,"i");
-MODULE_PARM(ip_pkt_list_tot,"i");
-MODULE_PARM(ip_list_hash_size,"i");
-MODULE_PARM(ip_list_perms,"i");
+module_param(ip_list_tot, int, 0400);
+module_param(ip_pkt_list_tot, int, 0400);
+module_param(ip_list_hash_size, int, 0400);
+module_param(ip_list_perms, int, 0400);
#ifdef DEBUG
-MODULE_PARM(debug,"i");
+module_param(debug, int, 0600);
MODULE_PARM_DESC(debug,"debugging level, defaults to 1");
#endif
MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
diff --git a/net/ipv4/netfilter/ipt_sctp.c b/net/ipv4/netfilter/ipt_sctp.c
new file mode 100644
index 000000000000..8f875940b8dc
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_sctp.c
@@ -0,0 +1,201 @@
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <linux/sctp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_sctp.h>
+
+#ifdef DEBUG_SCTP
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
+ || (!!((invflag) & (option)) ^ (cond)))
+
+static int
+match_flags(const struct ipt_sctp_flag_info *flag_info,
+ const int flag_count,
+ u_int8_t chunktype,
+ u_int8_t chunkflags)
+{
+ int i;
+
+ for (i = 0; i < flag_count; i++) {
+ if (flag_info[i].chunktype == chunktype) {
+ return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
+ }
+ }
+
+ return 1;
+}
+
+static int
+match_packet(const struct sk_buff *skb,
+ const u_int32_t *chunkmap,
+ int chunk_match_type,
+ const struct ipt_sctp_flag_info *flag_info,
+ const int flag_count,
+ int *hotdrop)
+{
+ int offset;
+ u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
+ sctp_chunkhdr_t sch;
+
+#ifdef DEBUG_SCTP
+ int i = 0;
+#endif
+
+ if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) {
+ SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
+ }
+
+ offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t);
+ do {
+ if (skb_copy_bits(skb, offset, &sch, sizeof(sch)) < 0) {
+ duprintf("Dropping invalid SCTP packet.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
+ ++i, offset, sch.type, htons(sch.length), sch.flags);
+
+ offset += (htons(sch.length) + 3) & ~3;
+
+ duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
+
+ if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch.type)) {
+ switch (chunk_match_type) {
+ case SCTP_CHUNK_MATCH_ANY:
+ if (match_flags(flag_info, flag_count,
+ sch.type, sch.flags)) {
+ return 1;
+ }
+ break;
+
+ case SCTP_CHUNK_MATCH_ALL:
+ if (match_flags(flag_info, flag_count,
+ sch.type, sch.flags)) {
+ SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch.type);
+ }
+ break;
+
+ case SCTP_CHUNK_MATCH_ONLY:
+ if (!match_flags(flag_info, flag_count,
+ sch.type, sch.flags)) {
+ return 0;
+ }
+ break;
+ }
+ } else {
+ switch (chunk_match_type) {
+ case SCTP_CHUNK_MATCH_ONLY:
+ return 0;
+ }
+ }
+ } while (offset < skb->len);
+
+ switch (chunk_match_type) {
+ case SCTP_CHUNK_MATCH_ALL:
+ return SCTP_CHUNKMAP_IS_CLEAR(chunkmap);
+ case SCTP_CHUNK_MATCH_ANY:
+ return 0;
+ case SCTP_CHUNK_MATCH_ONLY:
+ return 1;
+ }
+
+ /* This will never be reached, but required to stop compiler whine */
+ return 0;
+}
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ const struct ipt_sctp_info *info;
+ sctp_sctphdr_t sh;
+
+ info = (const struct ipt_sctp_info *)matchinfo;
+
+ if (offset) {
+ duprintf("Dropping non-first fragment.. FIXME\n");
+ return 0;
+ }
+
+ if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &sh, sizeof(sh)) < 0) {
+ duprintf("Dropping evil TCP offset=0 tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+ duprintf("spt: %d\tdpt: %d\n", ntohs(sh.source), ntohs(sh.dest));
+
+ return SCCHECK(((ntohs(sh.source) >= info->spts[0])
+ && (ntohs(sh.source) <= info->spts[1])),
+ IPT_SCTP_SRC_PORTS, info->flags, info->invflags)
+ && SCCHECK(((ntohs(sh.dest) >= info->dpts[0])
+ && (ntohs(sh.dest) <= info->dpts[1])),
+ IPT_SCTP_DEST_PORTS, info->flags, info->invflags)
+ && SCCHECK(match_packet(skb, info->chunkmap, info->chunk_match_type,
+ info->flag_info, info->flag_count,
+ hotdrop),
+ IPT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ const struct ipt_sctp_info *info;
+
+ info = (const struct ipt_sctp_info *)matchinfo;
+
+ return ip->proto == IPPROTO_SCTP
+ && !(ip->invflags & IPT_INV_PROTO)
+ && matchsize == IPT_ALIGN(sizeof(struct ipt_sctp_info))
+ && !(info->flags & ~IPT_SCTP_VALID_FLAGS)
+ && !(info->invflags & ~IPT_SCTP_VALID_FLAGS)
+ && !(info->invflags & ~info->flags)
+ && ((!(info->flags & IPT_SCTP_CHUNK_TYPES)) ||
+ (info->chunk_match_type &
+ (SCTP_CHUNK_MATCH_ALL
+ | SCTP_CHUNK_MATCH_ANY
+ | SCTP_CHUNK_MATCH_ONLY)));
+}
+
+static struct ipt_match sctp_match =
+{
+ .list = { NULL, NULL},
+ .name = "sctp",
+ .match = &match,
+ .checkentry = &checkentry,
+ .destroy = NULL,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&sctp_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&sctp_match);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Match for SCTP protocol packets");
+
diff --git a/net/ipv4/netfilter/ipt_tcpmss.c b/net/ipv4/netfilter/ipt_tcpmss.c
index c7cb62ade3f4..5cda547e011e 100644
--- a/net/ipv4/netfilter/ipt_tcpmss.c
+++ b/net/ipv4/netfilter/ipt_tcpmss.c
@@ -27,37 +27,45 @@ mssoption_match(u_int16_t min, u_int16_t max,
int invert,
int *hotdrop)
{
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
- u8 opt[15 * 4 - sizeof(tcph)];
+ u8 _opt[15 * 4 - sizeof(_tcph)], *op;
unsigned int i, optlen;
/* If we don't have the whole header, drop packet. */
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0)
+ th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
goto dropit;
/* Malformed. */
- if (tcph.doff*4 < sizeof(tcph))
+ if (th->doff*4 < sizeof(*th))
goto dropit;
- optlen = tcph.doff*4 - sizeof(tcph);
+ optlen = th->doff*4 - sizeof(*th);
+ if (!optlen)
+ goto out;
+
/* Truncated options. */
- if (skb_copy_bits(skb, skb->nh.iph->ihl*4+sizeof(tcph), opt, optlen)<0)
+ op = skb_header_pointer(skb, skb->nh.iph->ihl * 4 + sizeof(*th),
+ optlen, _opt);
+ if (op == NULL)
goto dropit;
for (i = 0; i < optlen; ) {
- if (opt[i] == TCPOPT_MSS
+ if (op[i] == TCPOPT_MSS
&& (optlen - i) >= TCPOLEN_MSS
- && opt[i+1] == TCPOLEN_MSS) {
+ && op[i+1] == TCPOLEN_MSS) {
u_int16_t mssval;
- mssval = (opt[i+2] << 8) | opt[i+3];
+ mssval = (op[i+2] << 8) | op[i+3];
return (mssval >= min && mssval <= max) ^ invert;
}
- if (opt[i] < 2) i++;
- else i += opt[i+1]?:1;
+ if (op[i] < 2) i++;
+ else i += op[i+1]?:1;
}
+out:
return invert;
dropit:
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 8fb2ed9d1f9a..6b291da92656 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -11,6 +11,7 @@
*/
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4/ip_tables.h>
MODULE_LICENSE("GPL");
@@ -155,7 +156,7 @@ static struct nf_hook_ops ipt_ops[] = {
/* Default to forward because I got too much mail already. */
static int forward = NF_ACCEPT;
-MODULE_PARM(forward, "i");
+module_param(forward, bool, 0000);
static int __init init(void)
{
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1cfd749d651e..c17f8716ecdd 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -323,6 +323,51 @@ error:
return err;
}
+static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
+{
+ struct iovec *iov;
+ u8 __user *type = NULL;
+ u8 __user *code = NULL;
+ int probed = 0;
+ int i;
+
+ if (!msg->msg_iov)
+ return;
+
+ for (i = 0; i < msg->msg_iovlen; i++) {
+ iov = &msg->msg_iov[i];
+ if (!iov)
+ continue;
+
+ switch (fl->proto) {
+ case IPPROTO_ICMP:
+ /* check if one-byte field is readable or not. */
+ if (iov->iov_base && iov->iov_len < 1)
+ break;
+
+ if (!type) {
+ type = iov->iov_base;
+ /* check if code field is readable or not. */
+ if (iov->iov_len > 1)
+ code = type + 1;
+ } else if (!code)
+ code = iov->iov_base;
+
+ if (type && code) {
+ get_user(fl->fl_icmp_type, type);
+ __get_user(fl->fl_icmp_code, code);
+ probed = 1;
+ }
+ break;
+ default:
+ probed = 1;
+ break;
+ }
+ if (probed)
+ break;
+ }
+}
+
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
@@ -429,6 +474,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.proto = inet->hdrincl ? IPPROTO_RAW :
sk->sk_protocol,
};
+ if (!inet->hdrincl)
+ raw_probe_proto_opt(&fl, msg);
+
err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
}
if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92c79da5f297..aa8581413cc6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1339,9 +1339,12 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, int how)
{
struct rtable *rt = (struct rtable *) dst;
struct in_device *idev = rt->idev;
- if (idev) {
- rt->idev = NULL;
- in_dev_put(idev);
+ if (idev && idev->dev != &loopback_dev) {
+ struct in_device *loopback_idev = in_dev_get(&loopback_dev);
+ if (loopback_idev) {
+ rt->idev = loopback_idev;
+ in_dev_put(idev);
+ }
}
}
@@ -1384,13 +1387,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
if (rt->fl.iif == 0)
src = rt->rt_src;
else if (fib_lookup(&rt->fl, &res) == 0) {
-#ifdef CONFIG_IP_ROUTE_NAT
- if (res.type == RTN_NAT)
- src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
- RT_SCOPE_UNIVERSE);
- else
-#endif
- src = FIB_RES_PREFSRC(res);
+ src = FIB_RES_PREFSRC(res);
fib_res_put(&res);
} else
src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
@@ -1494,10 +1491,6 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
#endif
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
-#ifdef CONFIG_IP_ROUTE_NAT
- rth->rt_dst_map = daddr;
- rth->rt_src_map = saddr;
-#endif
#ifdef CONFIG_NET_CLS_ROUTE
rth->u.dst.tclassid = itag;
#endif
@@ -1607,31 +1600,6 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
RT_CACHE_STAT_INC(in_slow_tot);
-#ifdef CONFIG_IP_ROUTE_NAT
- /* Policy is applied before mapping destination,
- but rerouting after map should be made with old source.
- */
-
- if (1) {
- u32 src_map = saddr;
- if (res.r)
- src_map = fib_rules_policy(saddr, &res, &flags);
-
- if (res.type == RTN_NAT) {
- fl.fl4_dst = fib_rules_map_destination(daddr, &res);
- fib_res_put(&res);
- free_res = 0;
- if (fib_lookup(&fl, &res))
- goto e_inval;
- free_res = 1;
- if (res.type != RTN_UNICAST)
- goto e_inval;
- flags |= RTCF_DNAT;
- }
- fl.fl4_src = src_map;
- }
-#endif
-
if (res.type == RTN_BROADCAST)
goto brd_input;
@@ -1705,12 +1673,6 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
rth->rt_gateway = daddr;
-#ifdef CONFIG_IP_ROUTE_NAT
- rth->rt_src_map = fl.fl4_src;
- rth->rt_dst_map = fl.fl4_dst;
- if (flags&RTCF_DNAT)
- rth->rt_gateway = fl.fl4_dst;
-#endif
rth->rt_iif =
rth->fl.iif = dev->ifindex;
rth->u.dst.dev = out_dev->dev;
@@ -1773,10 +1735,6 @@ local_input:
#endif
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
-#ifdef CONFIG_IP_ROUTE_NAT
- rth->rt_dst_map = fl.fl4_dst;
- rth->rt_src_map = fl.fl4_src;
-#endif
#ifdef CONFIG_NET_CLS_ROUTE
rth->u.dst.tclassid = itag;
#endif
@@ -1897,7 +1855,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
if (MULTICAST(daddr)) {
struct in_device *in_dev;
- read_lock(&inetdev_lock);
+ rcu_read_lock();
if ((in_dev = __in_dev_get(dev)) != NULL) {
int our = ip_check_mc(in_dev, daddr, saddr,
skb->nh.iph->protocol);
@@ -1906,12 +1864,12 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
|| (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
#endif
) {
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
return ip_route_input_mc(skb, daddr, saddr,
tos, dev, our);
}
}
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
return -EINVAL;
}
return ip_route_input_slow(skb, daddr, saddr, tos, dev);
@@ -2069,9 +2027,6 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
}
free_res = 1;
- if (res.type == RTN_NAT)
- goto e_inval;
-
if (res.type == RTN_LOCAL) {
if (!fl.fl4_src)
fl.fl4_src = fl.fl4_dst;
@@ -2161,10 +2116,6 @@ make_route:
#endif
rth->rt_dst = fl.fl4_dst;
rth->rt_src = fl.fl4_src;
-#ifdef CONFIG_IP_ROUTE_NAT
- rth->rt_dst_map = fl.fl4_dst;
- rth->rt_src_map = fl.fl4_src;
-#endif
rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
rth->u.dst.dev = dev_out;
dev_hold(dev_out);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5e7f70f1c940..85643472b84d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -852,8 +852,10 @@ static void tcp_init_metrics(struct sock *sk)
* to low value, and then abruptly stops to do it and starts to delay
* ACKs, wait for troubles.
*/
- if (dst_metric(dst, RTAX_RTT) > tp->srtt)
+ if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
tp->srtt = dst_metric(dst, RTAX_RTT);
+ tp->rtt_seq = tp->snd_nxt;
+ }
if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
tp->mdev = dst_metric(dst, RTAX_RTTVAR);
tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 191cec718e95..1bf740e71c64 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -119,8 +119,7 @@ int xfrm4_output(struct sk_buff **pskb)
xfrm4_encap(skb);
- err = x->type->output(pskb);
- skb = *pskb;
+ err = x->type->output(skb);
if (err)
goto error;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3aacce604561..3ce69883bcc4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -12,8 +12,8 @@
#include <net/xfrm.h>
#include <net/ip.h>
-extern struct dst_ops xfrm4_dst_ops;
-extern struct xfrm_policy_afinfo xfrm4_policy_afinfo;
+static struct dst_ops xfrm4_dst_ops;
+static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
static struct xfrm_type_map xfrm4_type_map = { .lock = RW_LOCK_UNLOCKED };
@@ -183,6 +183,15 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
}
break;
+ case IPPROTO_ICMP:
+ if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
+ u8 *icmp = xprth;
+
+ fl->fl_icmp_type = icmp[0];
+ fl->fl_icmp_code = icmp[1];
+ }
+ break;
+
case IPPROTO_ESP:
if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
u32 *ehdr = (u32 *)xprth;
@@ -234,7 +243,7 @@ static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
path->ops->update_pmtu(path, mtu);
}
-struct dst_ops xfrm4_dst_ops = {
+static struct dst_ops xfrm4_dst_ops = {
.family = AF_INET,
.protocol = __constant_htons(ETH_P_IP),
.gc = xfrm4_garbage_collect,
@@ -243,7 +252,7 @@ struct dst_ops xfrm4_dst_ops = {
.entry_size = sizeof(struct xfrm_dst),
};
-struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
+static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.family = AF_INET,
.lock = RW_LOCK_UNLOCKED,
.type_map = &xfrm4_type_map,
@@ -254,12 +263,12 @@ struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.decode_session = _decode_session4,
};
-void __init xfrm4_policy_init(void)
+static void __init xfrm4_policy_init(void)
{
xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
}
-void __exit xfrm4_policy_fini(void)
+static void __exit xfrm4_policy_fini(void)
{
xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
}
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 0d1a0b0c7901..dcc04644ccec 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -4,13 +4,13 @@
*/
#include <linux/skbuff.h>
+#include <linux/module.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/protocol.h>
-static int ipip_output(struct sk_buff **pskb)
+static int ipip_output(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
iph = skb->nh.iph;
@@ -43,6 +43,8 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler)
return ret;
}
+EXPORT_SYMBOL(xfrm4_tunnel_register);
+
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler)
{
int ret;
@@ -60,6 +62,8 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler)
return ret;
}
+EXPORT_SYMBOL(xfrm4_tunnel_deregister);
+
static int ipip_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler = ipip_handler;
@@ -68,7 +72,7 @@ static int ipip_rcv(struct sk_buff *skb)
if (handler && handler->handler(skb) == 0)
return 0;
- return xfrm4_rcv_encap(skb, 0);
+ return xfrm4_rcv(skb);
}
static void ipip_err(struct sk_buff *skb, u32 info)
@@ -84,6 +88,10 @@ static int ipip_init_state(struct xfrm_state *x, void *args)
{
if (!x->props.mode)
return -EINVAL;
+
+ if (x->encap)
+ return -EINVAL;
+
x->props.header_len = sizeof(struct iphdr);
return 0;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 23c5759c022d..fd87a5a192da 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -48,6 +48,7 @@ config INET6_IPCOMP
tristate "IPv6: IPComp transformation"
depends on IPV6
select XFRM
+ select INET6_TUNNEL
select CRYPTO
select CRYPTO_DEFLATE
---help---
@@ -56,9 +57,21 @@ config INET6_IPCOMP
If unsure, say Y.
+config INET6_TUNNEL
+ tristate "IPv6: tunnel transformation"
+ depends on IPV6
+ select XFRM
+ ---help---
+ Support for generic IPv6-in-IPv6 tunnel transformation, which is
+ required by the IPv6-in-IPv6 tunneling module as well as tunnel mode
+ IPComp.
+
+ If unsure, say Y.
+
config IPV6_TUNNEL
tristate "IPv6: IPv6-in-IPv6 tunnel"
depends on IPV6
+ select INET6_TUNNEL
---help---
Support for IPv6-in-IPv6 tunnels described in RFC 2473.
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index d9e309fe8490..b39e04940590 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -11,12 +11,13 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
ip6_flowlabel.o ipv6_syms.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
- xfrm6_tunnel.o xfrm6_output.o
+ xfrm6_output.o
ipv6-objs += $(ipv6-y)
obj-$(CONFIG_INET6_AH) += ah6.o
obj-$(CONFIG_INET6_ESP) += esp6.o
obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
+obj-$(CONFIG_INET6_TUNNEL) += xfrm6_tunnel.o
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 40ad73c5cbb7..7150375908a8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -472,6 +472,8 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
printk("Freeing alive inet6 address %p\n", ifp);
return;
}
+ dst_release(&ifp->rt->u.dst);
+
inet6_ifa_count--;
kfree(ifp);
}
@@ -482,25 +484,33 @@ static struct inet6_ifaddr *
ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
int scope, unsigned flags)
{
- struct inet6_ifaddr *ifa;
+ struct inet6_ifaddr *ifa = NULL;
+ struct rt6_info *rt;
int hash;
static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+ int err = 0;
spin_lock_bh(&lock);
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(addr, idev->dev)) {
- spin_unlock_bh(&lock);
ADBG(("ipv6_add_addr: already assigned\n"));
- return ERR_PTR(-EEXIST);
+ err = -EEXIST;
+ goto out;
}
ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
if (ifa == NULL) {
- spin_unlock_bh(&lock);
ADBG(("ipv6_add_addr: malloc failed\n"));
- return ERR_PTR(-ENOBUFS);
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ rt = addrconf_dst_alloc(idev, addr, 0);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto out;
}
memset(ifa, 0, sizeof(struct inet6_ifaddr));
@@ -517,9 +527,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
read_lock(&addrconf_lock);
if (idev->dead) {
read_unlock(&addrconf_lock);
- spin_unlock_bh(&lock);
- kfree(ifa);
- return ERR_PTR(-ENODEV); /*XXX*/
+ err = -ENODEV; /*XXX*/
+ goto out;
}
inet6_ifa_count++;
@@ -553,12 +562,20 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
}
#endif
+ ifa->rt = rt;
+
in6_ifa_hold(ifa);
write_unlock_bh(&idev->lock);
read_unlock(&addrconf_lock);
+out:
spin_unlock_bh(&lock);
- notifier_call_chain(&inet6addr_chain,NETDEV_UP,ifa);
+ if (unlikely(err == 0))
+ notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+ else {
+ kfree(ifa);
+ ifa = ERR_PTR(err);
+ }
return ifa;
}
@@ -1457,8 +1474,7 @@ ok:
spin_unlock(&ifp->lock);
if (!(flags&IFA_F_TENTATIVE))
- ipv6_ifa_notify((flags&IFA_F_DEPRECATED) ?
- 0 : RTM_NEWADDR, ifp);
+ ipv6_ifa_notify(0, ifp);
} else
spin_unlock(&ifp->lock);
@@ -2982,7 +2998,9 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
switch (event) {
case RTM_NEWADDR:
- ip6_rt_addr_add(&ifp->addr, ifp->idev->dev, 0);
+ dst_hold(&ifp->rt->u.dst);
+ if (ip6_ins_rt(ifp->rt, NULL, NULL))
+ dst_release(&ifp->rt->u.dst);
break;
case RTM_DELADDR:
addrconf_leave_solict(ifp->idev->dev, &ifp->addr);
@@ -2993,8 +3011,11 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (!ipv6_addr_any(&addr))
ipv6_dev_ac_dec(ifp->idev->dev, &addr);
}
- if (!ipv6_chk_addr(&ifp->addr, ifp->idev->dev, 1))
- ip6_rt_addr_del(&ifp->addr, ifp->idev->dev);
+ dst_hold(&ifp->rt->u.dst);
+ if (ip6_del_rt(ifp->rt, NULL, NULL))
+ dst_free(&ifp->rt->u.dst);
+ else
+ dst_release(&ifp->rt->u.dst);
break;
}
}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index eda2737e572b..32ecedb04abd 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -154,11 +154,11 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
return 0;
}
-int ah6_output(struct sk_buff **pskb)
+static int ah6_output(struct sk_buff *skb)
{
int err;
int extlen;
- struct dst_entry *dst = (*pskb)->dst;
+ struct dst_entry *dst = skb->dst;
struct xfrm_state *x = dst->xfrm;
struct ipv6hdr *top_iph;
struct ip_auth_hdr *ah;
@@ -170,11 +170,11 @@ int ah6_output(struct sk_buff **pskb)
char hdrs[0];
} *tmp_ext;
- top_iph = (struct ipv6hdr *)(*pskb)->data;
- top_iph->payload_len = htons((*pskb)->len - sizeof(*top_iph));
+ top_iph = (struct ipv6hdr *)skb->data;
+ top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
- nexthdr = *(*pskb)->nh.raw;
- *(*pskb)->nh.raw = IPPROTO_AH;
+ nexthdr = *skb->nh.raw;
+ *skb->nh.raw = IPPROTO_AH;
/* When there are no extension headers, we only need to save the first
* 8 bytes of the base IP header.
@@ -182,7 +182,7 @@ int ah6_output(struct sk_buff **pskb)
memcpy(tmp_base, top_iph, sizeof(tmp_base));
tmp_ext = NULL;
- extlen = (*pskb)->h.raw - (unsigned char *)(top_iph + 1);
+ extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
if (extlen) {
extlen += sizeof(*tmp_ext);
tmp_ext = kmalloc(extlen, GFP_ATOMIC);
@@ -198,7 +198,7 @@ int ah6_output(struct sk_buff **pskb)
goto error_free_iph;
}
- ah = (struct ip_auth_hdr *)(*pskb)->h.raw;
+ ah = (struct ip_auth_hdr *)skb->h.raw;
ah->nexthdr = nexthdr;
top_iph->priority = 0;
@@ -214,7 +214,7 @@ int ah6_output(struct sk_buff **pskb)
ah->reserved = 0;
ah->spi = x->id.spi;
ah->seq_no = htonl(++x->replay.oseq);
- ahp->icv(ahp, *pskb, ah->auth_data);
+ ahp->icv(ahp, skb, ah->auth_data);
err = 0;
@@ -229,7 +229,7 @@ error:
return err;
}
-int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
{
/*
* Before process AH
@@ -319,8 +319,8 @@ out:
return -EINVAL;
}
-void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
{
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
@@ -353,6 +353,9 @@ static int ah6_init_state(struct xfrm_state *x, void *args)
if (x->aalg->alg_key_len > 512)
goto error;
+ if (x->encap)
+ goto error;
+
ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
if (ahp == NULL)
return -ENOMEM;
@@ -445,7 +448,7 @@ static struct inet6_protocol ah6_protocol = {
.flags = INET6_PROTO_NOPOLICY,
};
-int __init ah6_init(void)
+static int __init ah6_init(void)
{
if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 5b1e4d959f4a..537dc37be239 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -293,6 +293,7 @@ static void aca_put(struct ifacaddr6 *ac)
{
if (atomic_dec_and_test(&ac->aca_refcnt)) {
in6_dev_put(ac->aca_idev);
+ dst_release(&ac->aca_rt->u.dst);
kfree(ac);
}
}
@@ -304,6 +305,8 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
{
struct ifacaddr6 *aca;
struct inet6_dev *idev;
+ struct rt6_info *rt;
+ int err;
idev = in6_dev_get(dev);
@@ -312,17 +315,15 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
write_lock_bh(&idev->lock);
if (idev->dead) {
- write_unlock_bh(&idev->lock);
- in6_dev_put(idev);
- return -ENODEV;
+ err = -ENODEV;
+ goto out;
}
for (aca = idev->ac_list; aca; aca = aca->aca_next) {
if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0) {
aca->aca_users++;
- write_unlock_bh(&idev->lock);
- in6_dev_put(idev);
- return 0;
+ err = 0;
+ goto out;
}
}
@@ -333,15 +334,22 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
if (aca == NULL) {
- write_unlock_bh(&idev->lock);
- in6_dev_put(idev);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rt = addrconf_dst_alloc(idev, addr, 1);
+ if (IS_ERR(rt)) {
+ kfree(aca);
+ err = PTR_ERR(rt);
+ goto out;
}
memset(aca, 0, sizeof(struct ifacaddr6));
ipv6_addr_copy(&aca->aca_addr, addr);
aca->aca_idev = idev;
+ aca->aca_rt = rt;
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -352,12 +360,18 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
idev->ac_list = aca;
write_unlock_bh(&idev->lock);
- ip6_rt_addr_add(&aca->aca_addr, dev, 1);
+ dst_hold(&rt->u.dst);
+ if (ip6_ins_rt(rt, NULL, NULL))
+ dst_release(&rt->u.dst);
addrconf_join_solict(dev, &aca->aca_addr);
aca_put(aca);
return 0;
+out:
+ write_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return err;
}
/*
@@ -396,7 +410,11 @@ int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
write_unlock_bh(&idev->lock);
addrconf_leave_solict(dev, &aca->aca_addr);
- ip6_rt_addr_del(&aca->aca_addr, dev);
+ dst_hold(&aca->aca_rt->u.dst);
+ if (ip6_del_rt(aca->aca_rt, NULL, NULL))
+ dst_free(&aca->aca_rt->u.dst);
+ else
+ dst_release(&aca->aca_rt->u.dst);
aca_put(aca);
in6_dev_put(idev);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 497727195c98..77b4ba6f8016 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -37,11 +37,11 @@
#include <net/ipv6.h>
#include <linux/icmpv6.h>
-int esp6_output(struct sk_buff **pskb)
+static int esp6_output(struct sk_buff *skb)
{
int err;
int hdr_len;
- struct dst_entry *dst = (*pskb)->dst;
+ struct dst_entry *dst = skb->dst;
struct xfrm_state *x = dst->xfrm;
struct ipv6hdr *top_iph;
struct ipv6_esp_hdr *esph;
@@ -54,17 +54,17 @@ int esp6_output(struct sk_buff **pskb)
int nfrags;
esp = x->data;
- hdr_len = (*pskb)->h.raw - (*pskb)->data +
+ hdr_len = skb->h.raw - skb->data +
sizeof(*esph) + esp->conf.ivlen;
/* Strip IP+ESP header. */
- __skb_pull(*pskb, hdr_len);
+ __skb_pull(skb, hdr_len);
/* Now skb is pure payload to encrypt */
err = -ENOMEM;
/* Round to block size */
- clen = (*pskb)->len;
+ clen = skb->len;
alen = esp->auth.icv_trunc_len;
tfm = esp->conf.tfm;
@@ -73,24 +73,24 @@ int esp6_output(struct sk_buff **pskb)
if (esp->conf.padlen)
clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
- if ((nfrags = skb_cow_data(*pskb, clen-(*pskb)->len+alen, &trailer)) < 0) {
+ if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) {
goto error;
}
/* Fill padding... */
do {
int i;
- for (i=0; i<clen-(*pskb)->len - 2; i++)
+ for (i=0; i<clen-skb->len - 2; i++)
*(u8*)(trailer->tail + i) = i+1;
} while (0);
- *(u8*)(trailer->tail + clen-(*pskb)->len - 2) = (clen - (*pskb)->len)-2;
- pskb_put(*pskb, trailer, clen - (*pskb)->len);
+ *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+ pskb_put(skb, trailer, clen - skb->len);
- top_iph = (struct ipv6hdr *)__skb_push(*pskb, hdr_len);
- esph = (struct ipv6_esp_hdr *)(*pskb)->h.raw;
- top_iph->payload_len = htons((*pskb)->len + alen - sizeof(*top_iph));
- *(u8*)(trailer->tail - 1) = *(*pskb)->nh.raw;
- *(*pskb)->nh.raw = IPPROTO_ESP;
+ top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
+ esph = (struct ipv6_esp_hdr *)skb->h.raw;
+ top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
+ *(u8*)(trailer->tail - 1) = *skb->nh.raw;
+ *skb->nh.raw = IPPROTO_ESP;
esph->spi = x->id.spi;
esph->seq_no = htonl(++x->replay.oseq);
@@ -106,7 +106,7 @@ int esp6_output(struct sk_buff **pskb)
if (!sg)
goto error;
}
- skb_to_sgvec(*pskb, sg, esph->enc_data+esp->conf.ivlen-(*pskb)->data, clen);
+ skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
crypto_cipher_encrypt(tfm, sg, sg, clen);
if (unlikely(sg != &esp->sgbuf[0]))
kfree(sg);
@@ -118,9 +118,9 @@ int esp6_output(struct sk_buff **pskb)
}
if (esp->auth.icv_full_len) {
- esp->auth.icv(esp, *pskb, (u8*)esph-(*pskb)->data,
+ esp->auth.icv(esp, skb, (u8*)esph-skb->data,
sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
- pskb_put(*pskb, trailer, alen);
+ pskb_put(skb, trailer, alen);
}
err = 0;
@@ -129,7 +129,7 @@ error:
return err;
}
-int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
{
struct ipv6hdr *iph;
struct ipv6_esp_hdr *esph;
@@ -252,8 +252,8 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
return mtu + x->props.header_len + esp->auth.icv_full_len;
}
-void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
{
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
@@ -272,7 +272,7 @@ void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
xfrm_state_put(x);
}
-void esp6_destroy(struct xfrm_state *x)
+static void esp6_destroy(struct xfrm_state *x)
{
struct esp_data *esp = x->data;
@@ -298,17 +298,21 @@ void esp6_destroy(struct xfrm_state *x)
kfree(esp);
}
-int esp6_init_state(struct xfrm_state *x, void *args)
+static int esp6_init_state(struct xfrm_state *x, void *args)
{
struct esp_data *esp = NULL;
+ /* null auth and encryption can have zero length keys */
if (x->aalg) {
- if (x->aalg->alg_key_len == 0 || x->aalg->alg_key_len > 512)
+ if (x->aalg->alg_key_len > 512)
goto error;
}
if (x->ealg == NULL)
goto error;
+ if (x->encap)
+ goto error;
+
esp = kmalloc(sizeof(*esp), GFP_KERNEL);
if (esp == NULL)
return -ENOMEM;
@@ -398,7 +402,7 @@ static struct inet6_protocol esp6_protocol = {
.flags = INET6_PROTO_NOPOLICY,
};
-int __init esp6_init(void)
+static int __init esp6_init(void)
{
if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 07151a6c354d..6dda815c013f 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -68,34 +68,35 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len
u8 nexthdr = *nexthdrp;
while (ipv6_ext_hdr(nexthdr)) {
- struct ipv6_opt_hdr hdr;
+ struct ipv6_opt_hdr _hdr, *hp;
int hdrlen;
if (len < (int)sizeof(struct ipv6_opt_hdr))
return -1;
if (nexthdr == NEXTHDR_NONE)
return -1;
- if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
BUG();
if (nexthdr == NEXTHDR_FRAGMENT) {
- unsigned short frag_off;
- if (skb_copy_bits(skb,
- start+offsetof(struct frag_hdr,
- frag_off),
- &frag_off,
- sizeof(frag_off))) {
+ unsigned short _frag_off, *fp;
+ fp = skb_header_pointer(skb,
+ start+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_frag_off),
+ &_frag_off);
+ if (fp == NULL)
return -1;
- }
- if (ntohs(frag_off) & ~0x7)
+ if (ntohs(*fp) & ~0x7)
break;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hdr.hdrlen+2)<<2;
+ hdrlen = (hp->hdrlen+2)<<2;
else
- hdrlen = ipv6_optlen(&hdr);
+ hdrlen = ipv6_optlen(hp);
- nexthdr = hdr.nexthdr;
+ nexthdr = hp->nexthdr;
len -= hdrlen;
start += hdrlen;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9a676aaf6184..a6d435f4f2e3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -139,10 +139,12 @@ static int is_ineligible(struct sk_buff *skb)
if (ptr < 0)
return 0;
if (nexthdr == IPPROTO_ICMPV6) {
- u8 type;
- if (skb_copy_bits(skb, ptr+offsetof(struct icmp6hdr, icmp6_type),
- &type, 1)
- || !(type & ICMPV6_INFOMSG_MASK))
+ u8 _type, *tp;
+ tp = skb_header_pointer(skb,
+ ptr+offsetof(struct icmp6hdr, icmp6_type),
+ sizeof(_type), &_type);
+ if (tp == NULL ||
+ !(*tp & ICMPV6_INFOMSG_MASK))
return 1;
}
return 0;
@@ -200,12 +202,13 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
{
- u8 optval;
+ u8 _optval, *op;
offset += skb->nh.raw - skb->data;
- if (skb_copy_bits(skb, offset, &optval, 1))
+ op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
+ if (op == NULL)
return 1;
- return (optval&0xC0) == 0x80;
+ return (*op & 0xC0) == 0x80;
}
int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 65a137241777..ff6bd80f7b1b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -449,9 +449,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
* Same priority level
*/
- if ((iter->rt6i_dev == rt->rt6i_dev) &&
- (ipv6_addr_cmp(&iter->rt6i_gateway,
- &rt->rt6i_gateway) == 0)) {
+ if (iter->rt6i_dev == rt->rt6i_dev &&
+ iter->rt6i_idev == rt->rt6i_idev &&
+ ipv6_addr_cmp(&iter->rt6i_gateway,
+ &rt->rt6i_gateway) == 0) {
if (!(iter->rt6i_flags&RTF_EXPIRES))
return -EEXIST;
iter->rt6i_expires = rt->rt6i_expires;
@@ -514,7 +515,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh,
int err = -ENOMEM;
fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
- rt->rt6i_dst.plen, (u8*) &rt->rt6i_dst - (u8*) rt);
+ rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
if (fn == NULL)
goto out;
@@ -550,7 +551,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh,
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
sizeof(struct in6_addr), rt->rt6i_src.plen,
- (u8*) &rt->rt6i_src - (u8*) rt);
+ offsetof(struct rt6_info, rt6i_src));
if (sn == NULL) {
/* If it is failed, discard just allocated
@@ -571,7 +572,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh,
} else {
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
sizeof(struct in6_addr), rt->rt6i_src.plen,
- (u8*) &rt->rt6i_src - (u8*) rt);
+ offsetof(struct rt6_info, rt6i_src));
if (sn == NULL)
goto st_failure;
@@ -680,14 +681,13 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
struct in6_addr *saddr)
{
struct lookup_args args[2];
- struct rt6_info *rt = NULL;
struct fib6_node *fn;
- args[0].offset = (u8*) &rt->rt6i_dst - (u8*) rt;
+ args[0].offset = offsetof(struct rt6_info, rt6i_dst);
args[0].addr = daddr;
#ifdef CONFIG_IPV6_SUBTREES
- args[1].offset = (u8*) &rt->rt6i_src - (u8*) rt;
+ args[1].offset = offsetof(struct rt6_info, rt6i_src);
args[1].addr = saddr;
#endif
@@ -739,11 +739,10 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
struct in6_addr *daddr, int dst_len,
struct in6_addr *saddr, int src_len)
{
- struct rt6_info *rt = NULL;
struct fib6_node *fn;
fn = fib6_locate_1(root, daddr, dst_len,
- (u8*) &rt->rt6i_dst - (u8*) rt);
+ offsetof(struct rt6_info, rt6i_dst));
#ifdef CONFIG_IPV6_SUBTREES
if (src_len) {
@@ -752,7 +751,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
fn = fn->subtree;
if (fn)
fn = fib6_locate_1(fn, saddr, src_len,
- (u8*) &rt->rt6i_src - (u8*) rt);
+ offsetof(struct rt6_info, rt6i_src));
}
#endif
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 2c5aab0894d2..15f341adc74c 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -538,7 +538,8 @@ release:
/* Do not check for fault */
if (!freq.flr_label)
- copy_to_user(optval + ((u8*)&freq.flr_label - (u8*)&freq), &fl->label, sizeof(fl->label));
+ copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
+ &fl->label, sizeof(fl->label));
sfl1->fl = fl;
sfl1->next = np->ipv6_fl_list;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 04303769d36b..8f5296e3f9d0 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -114,10 +114,10 @@ error_out:
return err;
}
-static int ipcomp6_output(struct sk_buff **pskb)
+static int ipcomp6_output(struct sk_buff *skb)
{
int err;
- struct dst_entry *dst = (*pskb)->dst;
+ struct dst_entry *dst = skb->dst;
struct xfrm_state *x = dst->xfrm;
struct ipv6hdr *top_iph;
int hdr_len;
@@ -126,23 +126,23 @@ static int ipcomp6_output(struct sk_buff **pskb)
int plen, dlen;
u8 *start, *scratch = ipcd->scratch;
- hdr_len = (*pskb)->h.raw - (*pskb)->data;
+ hdr_len = skb->h.raw - skb->data;
/* check whether datagram len is larger than threshold */
- if (((*pskb)->len - hdr_len) < ipcd->threshold) {
+ if ((skb->len - hdr_len) < ipcd->threshold) {
goto out_ok;
}
- if ((skb_is_nonlinear(*pskb) || skb_cloned(*pskb)) &&
- skb_linearize(*pskb, GFP_ATOMIC) != 0) {
+ if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+ skb_linearize(skb, GFP_ATOMIC) != 0) {
err = -ENOMEM;
goto error;
}
/* compression */
- plen = (*pskb)->len - hdr_len;
+ plen = skb->len - hdr_len;
dlen = IPCOMP_SCRATCH_SIZE;
- start = (*pskb)->h.raw;
+ start = skb->h.raw;
err = crypto_comp_compress(ipcd->tfm, start, plen, scratch, &dlen);
if (err) {
@@ -152,18 +152,18 @@ static int ipcomp6_output(struct sk_buff **pskb)
goto out_ok;
}
memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
- pskb_trim(*pskb, hdr_len + dlen + sizeof(struct ip_comp_hdr));
+ pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr));
/* insert ipcomp header and replace datagram */
- top_iph = (struct ipv6hdr *)(*pskb)->data;
+ top_iph = (struct ipv6hdr *)skb->data;
- top_iph->payload_len = htons((*pskb)->len - sizeof(struct ipv6hdr));
+ top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
ipch = (struct ipv6_comp_hdr *)start;
- ipch->nexthdr = *(*pskb)->nh.raw;
+ ipch->nexthdr = *skb->nh.raw;
ipch->flags = 0;
ipch->cpi = htons((u16 )ntohl(x->id.spi));
- *(*pskb)->nh.raw = IPPROTO_COMP;
+ *skb->nh.raw = IPPROTO_COMP;
out_ok:
err = 0;
@@ -284,6 +284,9 @@ static int ipcomp6_init_state(struct xfrm_state *x, void *args)
if (!x->calg)
goto out;
+ if (x->encap)
+ goto out;
+
err = -ENOMEM;
ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
if (!ipcd)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d2ce00d81d4c..0cef15b866f5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -66,6 +66,7 @@ do { \
#endif
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+static DECLARE_MUTEX(ip6t_mutex);
/* Must have mutex */
#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index bb8590bdd605..acc673ce9591 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -11,6 +11,7 @@
*/
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/spinlock.h>
@@ -26,7 +27,7 @@ MODULE_DESCRIPTION("IP6 tables LOG target module");
MODULE_LICENSE("GPL");
static unsigned int nflog = 1;
-MODULE_PARM(nflog, "i");
+module_param(nflog, int, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
struct in_device;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 46daa79051d1..aca6d21cc588 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -10,6 +10,7 @@
*/
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
MODULE_LICENSE("GPL");
@@ -156,7 +157,7 @@ static struct nf_hook_ops ip6t_ops[] = {
/* Default to forward because I got too much mail already. */
static int forward = NF_ACCEPT;
-MODULE_PARM(forward, "i");
+module_param(forward, bool, 0000);
static int __init init(void)
{
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 031989611932..91fbcfee87ac 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -555,6 +555,52 @@ error:
IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
return err;
}
+
+static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
+{
+ struct iovec *iov;
+ u8 __user *type = NULL;
+ u8 __user *code = NULL;
+ int probed = 0;
+ int i;
+
+ if (!msg->msg_iov)
+ return;
+
+ for (i = 0; i < msg->msg_iovlen; i++) {
+ iov = &msg->msg_iov[i];
+ if (!iov)
+ continue;
+
+ switch (fl->proto) {
+ case IPPROTO_ICMPV6:
+ /* check if one-byte field is readable or not. */
+ if (iov->iov_base && iov->iov_len < 1)
+ break;
+
+ if (!type) {
+ type = iov->iov_base;
+ /* check if code field is readable or not. */
+ if (iov->iov_len > 1)
+ code = type + 1;
+ } else if (!code)
+ code = iov->iov_base;
+
+ if (type && code) {
+ get_user(fl->fl_icmp_type, type);
+ __get_user(fl->fl_icmp_code, code);
+ probed = 1;
+ }
+ break;
+ default:
+ probed = 1;
+ break;
+ }
+ if (probed)
+ break;
+ }
+}
+
static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len)
{
@@ -674,6 +720,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
opt = fl6_merge_options(&opt_space, flowlabel, opt);
fl.proto = proto;
+ rawv6_probe_proto_opt(&fl, msg);
+
ipv6_addr_copy(&fl.fl6_dst, daddr);
if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 836d2ae8464e..e07da9ee8990 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -195,14 +195,18 @@ static void ip6_frag_secret_rebuild(unsigned long dummy)
atomic_t ip6_frag_mem = ATOMIC_INIT(0);
/* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct sk_buff *skb)
+static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
{
+ if (work)
+ *work -= skb->truesize;
atomic_sub(skb->truesize, &ip6_frag_mem);
kfree_skb(skb);
}
-static inline void frag_free_queue(struct frag_queue *fq)
+static inline void frag_free_queue(struct frag_queue *fq, int *work)
{
+ if (work)
+ *work -= sizeof(struct frag_queue);
atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
kfree(fq);
}
@@ -220,7 +224,7 @@ static inline struct frag_queue *frag_alloc_queue(void)
/* Destruction primitives. */
/* Complete destruction of fq. */
-static void ip6_frag_destroy(struct frag_queue *fq)
+static void ip6_frag_destroy(struct frag_queue *fq, int *work)
{
struct sk_buff *fp;
@@ -232,17 +236,17 @@ static void ip6_frag_destroy(struct frag_queue *fq)
while (fp) {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(fp);
+ frag_kfree_skb(fp, work);
fp = xp;
}
- frag_free_queue(fq);
+ frag_free_queue(fq, work);
}
-static __inline__ void fq_put(struct frag_queue *fq)
+static __inline__ void fq_put(struct frag_queue *fq, int *work)
{
if (atomic_dec_and_test(&fq->refcnt))
- ip6_frag_destroy(fq);
+ ip6_frag_destroy(fq, work);
}
/* Kill fq entry. It is not destroyed immediately,
@@ -264,10 +268,13 @@ static void ip6_evictor(void)
{
struct frag_queue *fq;
struct list_head *tmp;
+ int work;
- for(;;) {
- if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh)
- return;
+ work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh;
+ if (work <= 0)
+ return;
+
+ while(work > 0) {
read_lock(&ip6_frag_lock);
if (list_empty(&ip6_frag_lru_list)) {
read_unlock(&ip6_frag_lock);
@@ -283,7 +290,7 @@ static void ip6_evictor(void)
fq_kill(fq);
spin_unlock(&fq->lock);
- fq_put(fq);
+ fq_put(fq, &work);
IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
}
}
@@ -320,7 +327,7 @@ static void ip6_frag_expire(unsigned long data)
}
out:
spin_unlock(&fq->lock);
- fq_put(fq);
+ fq_put(fq, NULL);
}
/* Creation primitives. */
@@ -340,7 +347,7 @@ static struct frag_queue *ip6_frag_intern(unsigned int hash,
atomic_inc(&fq->refcnt);
write_unlock(&ip6_frag_lock);
fq_in->last_in |= COMPLETE;
- fq_put(fq_in);
+ fq_put(fq_in, NULL);
return fq;
}
}
@@ -539,7 +546,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->fragments = next;
fq->meat -= free_it->len;
- frag_kfree_skb(free_it);
+ frag_kfree_skb(free_it, NULL);
}
}
@@ -734,7 +741,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
ret = ip6_frag_reasm(fq, skbp, nhoffp, dev);
spin_unlock(&fq->lock);
- fq_put(fq);
+ fq_put(fq, NULL);
return ret;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2017c69dc9f3..77e9de707e96 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -155,7 +155,16 @@ static void ip6_dst_destroy(struct dst_entry *dst)
static void ip6_dst_ifdown(struct dst_entry *dst, int how)
{
- ip6_dst_destroy(dst);
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ struct inet6_dev *idev = rt->rt6i_idev;
+
+ if (idev != NULL && idev->dev != &loopback_dev) {
+ struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
+ if (loopback_idev != NULL) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
+ }
+ }
}
/*
@@ -174,8 +183,16 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
struct net_device *dev = sprt->rt6i_dev;
if (dev->ifindex == oif)
return sprt;
- if (dev->flags&IFF_LOOPBACK)
+ if (dev->flags & IFF_LOOPBACK) {
+ if (sprt->rt6i_idev->dev->ifindex != oif) {
+ if (strict && oif)
+ continue;
+ if (local && (!oif ||
+ local->rt6i_idev->dev->ifindex == oif))
+ continue;
+ }
local = sprt;
+ }
}
if (local)
@@ -336,13 +353,13 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
return NULL;
}
-/* rt6_ins is called with FREE rt6_lock.
+/* ip6_ins_rt is called with FREE rt6_lock.
It takes new route entry, the addition fails by any reason the
route is freed. In any case, if caller does not hold it, it may
be destroyed.
*/
-static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
{
int err;
@@ -390,7 +407,7 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
dst_hold(&rt->u.dst);
- err = rt6_ins(rt, NULL, NULL);
+ err = ip6_ins_rt(rt, NULL, NULL);
if (err == 0)
return rt;
@@ -608,8 +625,13 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
struct in6_addr *addr,
int (*output)(struct sk_buff **))
{
- struct rt6_info *rt = ip6_dst_alloc();
+ struct rt6_info *rt;
+ struct inet6_dev *idev = in6_dev_get(dev);
+ if (unlikely(idev == NULL))
+ return NULL;
+
+ rt = ip6_dst_alloc();
if (unlikely(rt == NULL))
goto out;
@@ -620,7 +642,7 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
neigh = ndisc_get_neigh(dev, addr);
rt->rt6i_dev = dev;
- rt->rt6i_idev = in6_dev_get(dev);
+ rt->rt6i_idev = idev;
rt->rt6i_nexthop = neigh;
atomic_set(&rt->u.dst.__refcnt, 1);
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
@@ -731,8 +753,9 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
int err;
struct rtmsg *r;
struct rtattr **rta;
- struct rt6_info *rt;
+ struct rt6_info *rt = NULL;
struct net_device *dev = NULL;
+ struct inet6_dev *idev = NULL;
int addr_type;
rta = (struct rtattr **) _rtattr;
@@ -744,9 +767,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
return -EINVAL;
#endif
if (rtmsg->rtmsg_ifindex) {
+ err = -ENODEV;
dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
if (!dev)
- return -ENODEV;
+ goto out;
+ idev = in6_dev_get(dev);
+ if (!idev)
+ goto out;
}
if (rtmsg->rtmsg_metric == 0)
@@ -793,10 +820,17 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
*/
if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
(dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
- if (dev)
+ if (dev && dev != &loopback_dev) {
dev_put(dev);
- dev = &loopback_dev;
- dev_hold(dev);
+ in6_dev_put(idev);
+ dev = &loopback_dev;
+ dev_hold(dev);
+ idev = in6_dev_get(dev);
+ if (!idev) {
+ err = -ENODEV;
+ goto out;
+ }
+ }
rt->u.dst.output = ip6_pkt_discard_out;
rt->u.dst.input = ip6_pkt_discard;
rt->u.dst.error = -ENETUNREACH;
@@ -838,7 +872,9 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
}
} else {
dev = grt->rt6i_dev;
+ idev = grt->rt6i_idev;
dev_hold(dev);
+ in6_dev_hold(grt->rt6i_idev);
}
if (!(grt->rt6i_flags&RTF_GATEWAY))
err = 0;
@@ -900,8 +936,8 @@ install_route:
if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
rt->u.dst.dev = dev;
- rt->rt6i_idev = in6_dev_get(dev);
- return rt6_ins(rt, nlh, _rtattr);
+ rt->rt6i_idev = idev;
+ return ip6_ins_rt(rt, nlh, _rtattr);
out:
if (dev)
@@ -1054,7 +1090,7 @@ source_ok:
nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
- if (rt6_ins(nrt, NULL, NULL))
+ if (ip6_ins_rt(nrt, NULL, NULL))
goto out;
if (rt->rt6i_flags&RTF_CACHE) {
@@ -1144,7 +1180,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
- rt6_ins(nrt, NULL, NULL);
+ ip6_ins_rt(nrt, NULL, NULL);
}
out:
@@ -1303,23 +1339,26 @@ int ip6_pkt_discard_out(struct sk_buff **pskb)
}
/*
- * Add address
+ * Allocate a dst for local (unicast / anycast) address.
*/
-int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
+struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
+ const struct in6_addr *addr,
+ int anycast)
{
struct rt6_info *rt = ip6_dst_alloc();
if (rt == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
dev_hold(&loopback_dev);
+ in6_dev_hold(idev);
rt->u.dst.flags = DST_HOST;
rt->u.dst.input = ip6_input;
rt->u.dst.output = ip6_output;
rt->rt6i_dev = &loopback_dev;
- rt->rt6i_idev = in6_dev_get(&loopback_dev);
+ rt->rt6i_idev = idev;
rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
@@ -1331,34 +1370,15 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
if (rt->rt6i_nexthop == NULL) {
dst_free((struct dst_entry *) rt);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128;
- rt6_ins(rt, NULL, NULL);
-
- return 0;
-}
-
-/* Delete address. Warning: you should check that this address
- disappeared before calling this function.
- */
-
-int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
-{
- struct rt6_info *rt;
- int err = -ENOENT;
- rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
- if (rt) {
- if (rt->rt6i_dst.plen == 128)
- err = ip6_del_rt(rt, NULL, NULL);
- else
- dst_release(&rt->u.dst);
- }
+ atomic_set(&rt->u.dst.__refcnt, 1);
- return err;
+ return rt;
}
static int fib6_ifdown(struct rt6_info *rt, void *arg)
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0791594f8878..92e74233fcdb 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -9,6 +9,7 @@
* IPv6 support
*/
+#include <linux/module.h>
#include <linux/string.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
@@ -25,11 +26,11 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
IP6_ECN_set_ce(inner_iph);
}
-int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
{
struct sk_buff *skb = *pskb;
int err;
- u32 spi, seq;
+ u32 seq;
struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
struct xfrm_state *x;
int xfrm_nr = 0;
@@ -40,7 +41,8 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
nhoff = *nhoffp;
nexthdr = skb->nh.raw[nhoff];
- if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
+ seq = 0;
+ if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
goto drop;
do {
@@ -137,3 +139,10 @@ drop:
kfree_skb(skb);
return -1;
}
+
+EXPORT_SYMBOL(xfrm6_rcv_spi);
+
+int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+ return xfrm6_rcv_spi(pskb, nhoffp, 0);
+}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 712856f0f356..786de7d912bb 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -113,8 +113,7 @@ int xfrm6_output(struct sk_buff **pskb)
xfrm6_encap(skb);
- err = x->type->output(pskb);
- skb = *pskb;
+ err = x->type->output(skb);
if (err)
goto error;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ab4e40b0ab76..a0715e2f05d7 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -17,12 +17,12 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
-extern struct dst_ops xfrm6_dst_ops;
-extern struct xfrm_policy_afinfo xfrm6_policy_afinfo;
+static struct dst_ops xfrm6_dst_ops;
+static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED };
-int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
+static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
{
int err = 0;
*dst = (struct xfrm_dst*)ip6_route_output(NULL, fl);
@@ -213,6 +213,16 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
fl->proto = nexthdr;
return;
+ case IPPROTO_ICMPV6:
+ if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
+ u8 *icmp = (u8 *)exthdr;
+
+ fl->fl_icmp_type = icmp[0];
+ fl->fl_icmp_code = icmp[1];
+ }
+ fl->proto = nexthdr;
+ return;
+
/* XXX Why are there these headers? */
case IPPROTO_AH:
case IPPROTO_ESP:
@@ -243,7 +253,7 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
return;
}
-struct dst_ops xfrm6_dst_ops = {
+static struct dst_ops xfrm6_dst_ops = {
.family = AF_INET6,
.protocol = __constant_htons(ETH_P_IPV6),
.gc = xfrm6_garbage_collect,
@@ -252,7 +262,7 @@ struct dst_ops xfrm6_dst_ops = {
.entry_size = sizeof(struct xfrm_dst),
};
-struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
+static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.family = AF_INET6,
.lock = RW_LOCK_UNLOCKED,
.type_map = &xfrm6_type_map,
@@ -263,12 +273,12 @@ struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.decode_session = _decode_session6,
};
-void __init xfrm6_policy_init(void)
+static void __init xfrm6_policy_init(void)
{
xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
}
-void __exit xfrm6_policy_fini(void)
+static void __exit xfrm6_policy_fini(void)
{
xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
}
@@ -277,12 +287,10 @@ void __init xfrm6_init(void)
{
xfrm6_policy_init();
xfrm6_state_init();
- xfrm6_tunnel_init();
}
void __exit xfrm6_fini(void)
{
- xfrm6_tunnel_fini();
//xfrm6_input_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 5766a133411a..9616a63cc431 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -343,9 +343,8 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
-static int xfrm6_tunnel_output(struct sk_buff **pskb)
+static int xfrm6_tunnel_output(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct ipv6hdr *top_iph;
top_iph = (struct ipv6hdr *)skb->data;
@@ -356,17 +355,6 @@ static int xfrm6_tunnel_output(struct sk_buff **pskb)
static int xfrm6_tunnel_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
{
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- return -EINVAL;
-
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
- dst_release(skb->dst);
- skb->dst = NULL;
- skb->protocol = htons(ETH_P_IPV6);
- skb->pkt_type = PACKET_HOST;
- netif_rx(skb);
-
return 0;
}
@@ -413,49 +401,15 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
{
struct sk_buff *skb = *pskb;
struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
- struct xfrm_state *x = NULL;
struct ipv6hdr *iph = skb->nh.ipv6h;
- int err = 0;
u32 spi;
/* device-like_ip6ip6_handler() */
- if (handler) {
- err = handler->handler(pskb, nhoffp);
- if (!err)
- goto out;
- }
+ if (handler && handler->handler(pskb, nhoffp) == 0)
+ return 0;
spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
- x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr,
- spi,
- IPPROTO_IPV6, AF_INET6);
-
- if (!x)
- goto drop;
-
- spin_lock(&x->lock);
-
- if (unlikely(x->km.state != XFRM_STATE_VALID))
- goto drop_unlock;
-
- err = xfrm6_tunnel_input(x, NULL, skb);
- if (err)
- goto drop_unlock;
-
- x->curlft.bytes += skb->len;
- x->curlft.packets++;
- spin_unlock(&x->lock);
- xfrm_state_put(x);
-
-out:
- return 0;
-
-drop_unlock:
- spin_unlock(&x->lock);
- xfrm_state_put(x);
-drop:
- kfree_skb(skb);
- return -1;
+ return xfrm6_rcv_spi(pskb, nhoffp, spi);
}
static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -517,6 +471,9 @@ static int xfrm6_tunnel_init_state(struct xfrm_state *x, void *args)
if (!x->props.mode)
return -EINVAL;
+ if (x->encap)
+ return -EINVAL;
+
x->props.header_len = sizeof(struct ipv6hdr);
return 0;
@@ -543,31 +500,32 @@ static struct inet6_protocol xfrm6_tunnel_protocol = {
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
-void __init xfrm6_tunnel_init(void)
+static int __init xfrm6_tunnel_init(void)
{
X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) {
X6TPRINTK1(KERN_ERR
"xfrm6_tunnel init: can't add xfrm type\n");
- return;
+ return -EAGAIN;
}
if (inet6_add_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0) {
X6TPRINTK1(KERN_ERR
"xfrm6_tunnel init(): can't add protocol\n");
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
- return;
+ return -EAGAIN;
}
if (xfrm6_tunnel_spi_init() < 0) {
X6TPRINTK1(KERN_ERR
"xfrm6_tunnel init: failed to initialize spi\n");
inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
- return;
+ return -EAGAIN;
}
+ return 0;
}
-void __exit xfrm6_tunnel_fini(void)
+static void __exit xfrm6_tunnel_fini(void)
{
X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
@@ -579,3 +537,7 @@ void __exit xfrm6_tunnel_fini(void)
X6TPRINTK1(KERN_ERR
"xfrm6_tunnel close: can't remove xfrm type\n");
}
+
+module_init(xfrm6_tunnel_init);
+module_exit(xfrm6_tunnel_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 11a2955f8e09..04bb8925ac04 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -303,10 +303,10 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev)
*/
#ifdef CONFIG_INET
IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n");
- in_dev = in_dev_get(dev);
+ rcu_read_lock();
+ in_dev = __in_dev_get(dev);
if (in_dev == NULL)
- return;
- read_lock(&in_dev->lock);
+ goto out;
if (in_dev->ifa_list)
arp_send(ARPOP_REQUEST, ETH_P_ARP,
@@ -314,8 +314,8 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev)
dev,
in_dev->ifa_list->ifa_address,
NULL, dev->dev_addr, NULL);
- read_unlock(&in_dev->lock);
- in_dev_put(in_dev);
+out:
+ rcu_read_unlock();
#endif /* CONFIG_INET */
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index fdf75a1ba801..8ca25fd7efe7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1075,15 +1075,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
natt->encap_type = n_type->sadb_x_nat_t_type_type;
- switch (natt->encap_type) {
- case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- break;
- default:
- err = -ENOPROTOOPT;
- goto out;
- }
-
if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) {
struct sadb_x_nat_t_port* n_port =
ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1];
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
index 6c87fcaeca6a..1f0d51a341f3 100644
--- a/net/rxrpc/call.c
+++ b/net/rxrpc/call.c
@@ -929,7 +929,6 @@ static void rxrpc_call_receive_packet(struct rxrpc_call *call)
{
struct rxrpc_message *msg;
struct list_head *_p;
- uint32_t data32;
_enter("%p", call);
@@ -986,22 +985,21 @@ static void rxrpc_call_receive_packet(struct rxrpc_call *call)
break;
/* deal with abort packets */
- case RXRPC_PACKET_TYPE_ABORT:
- data32 = 0;
- if (skb_copy_bits(msg->pkt, msg->offset,
- &data32, sizeof(data32)) < 0) {
+ case RXRPC_PACKET_TYPE_ABORT: {
+ uint32_t _dbuf, *dp;
+
+ dp = skb_header_pointer(msg->pkt, msg->offset,
+ sizeof(_dbuf), &_dbuf);
+ if (dp == NULL)
printk("Rx Received short ABORT packet\n");
- }
- else {
- data32 = ntohl(data32);
- }
- _proto("Rx Received Call ABORT { data=%d }", data32);
+ _proto("Rx Received Call ABORT { data=%d }",
+ (dp ? ntohl(*dp) : 0));
spin_lock(&call->lock);
call->app_call_state = RXRPC_CSTATE_ERROR;
call->app_err_state = RXRPC_ESTATE_PEER_ABORT;
- call->app_abort_code = data32;
+ call->app_abort_code = (dp ? ntohl(*dp) : 0);
call->app_errno = -ECONNABORTED;
call->app_mark = RXRPC_APP_MARK_EOF;
call->app_read_buf = NULL;
@@ -1013,7 +1011,7 @@ static void rxrpc_call_receive_packet(struct rxrpc_call *call)
spin_unlock(&call->lock);
call->app_error_func(call);
break;
-
+ }
default:
/* deal with other packet types */
_proto("Rx Unsupported packet type %u (#%u)",
@@ -1271,7 +1269,7 @@ static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
struct rxrpc_message *msg)
{
- struct rxrpc_ackpacket ack;
+ struct rxrpc_ackpacket _ack, *ap;
rxrpc_serial_t serial;
rxrpc_seq_t seq;
int ret;
@@ -1279,33 +1277,34 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
_enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
/* extract the basic ACK record */
- if (skb_copy_bits(msg->pkt, msg->offset, &ack, sizeof(ack)) < 0) {
+ ap = skb_header_pointer(msg->pkt, msg->offset, sizeof(_ack), &_ack);
+ if (ap == NULL) {
printk("Rx Received short ACK packet\n");
return;
}
- msg->offset += sizeof(ack);
+ msg->offset += sizeof(_ack);
- serial = ack.serial;
- seq = ntohl(ack.firstPacket);
+ serial = ap->serial;
+ seq = ntohl(ap->firstPacket);
_proto("Rx Received ACK %%%d { b=%hu m=%hu f=%u p=%u s=%u r=%s n=%u }",
ntohl(msg->hdr.serial),
- ntohs(ack.bufferSpace),
- ntohs(ack.maxSkew),
+ ntohs(ap->bufferSpace),
+ ntohs(ap->maxSkew),
seq,
- ntohl(ack.previousPacket),
+ ntohl(ap->previousPacket),
ntohl(serial),
- rxrpc_acks[ack.reason],
+ rxrpc_acks[ap->reason],
call->ackr.nAcks
);
/* check the other side isn't ACK'ing a sequence number I haven't sent
* yet */
- if (ack.nAcks > 0 &&
+ if (ap->nAcks > 0 &&
(seq > call->snd_seq_count ||
- seq + ack.nAcks - 1 > call->snd_seq_count)) {
+ seq + ap->nAcks - 1 > call->snd_seq_count)) {
printk("Received ACK (#%u-#%u) for unsent packet\n",
- seq, seq + ack.nAcks - 1);
+ seq, seq + ap->nAcks - 1);
rxrpc_call_abort(call, -EINVAL);
_leave("");
return;
@@ -1354,7 +1353,7 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
}
}
- switch (ack.reason) {
+ switch (ap->reason) {
/* deal with negative/positive acknowledgement of data
* packets */
case RXRPC_ACK_REQUESTED:
@@ -1366,14 +1365,14 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
case RXRPC_ACK_OUT_OF_SEQUENCE:
case RXRPC_ACK_EXCEEDS_WINDOW:
call->snd_resend_cnt = 0;
- ret = rxrpc_call_record_ACK(call, msg, seq, ack.nAcks);
+ ret = rxrpc_call_record_ACK(call, msg, seq, ap->nAcks);
if (ret < 0)
rxrpc_call_abort(call, ret);
break;
/* respond to ping packets immediately */
case RXRPC_ACK_PING:
- rxrpc_call_generate_ACK(call, &msg->hdr, &ack);
+ rxrpc_call_generate_ACK(call, &msg->hdr, ap);
break;
/* only record RTT on ping response packets */
@@ -1386,7 +1385,7 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
rttmsg = NULL;
spin_lock(&call->lock);
if (call->snd_ping &&
- call->snd_ping->hdr.serial == ack.serial) {
+ call->snd_ping->hdr.serial == ap->serial) {
rttmsg = call->snd_ping;
call->snd_ping = NULL;
}
@@ -1402,7 +1401,7 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
break;
default:
- printk("Unsupported ACK reason %u\n", ack.reason);
+ printk("Unsupported ACK reason %u\n", ap->reason);
break;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1f9bf9d0834c..ebb9935ab4ca 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -389,7 +389,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
{
int err;
struct rtattr *kind = tca[TCA_KIND-1];
- struct Qdisc *sch = NULL;
+ void *p = NULL;
+ struct Qdisc *sch;
struct Qdisc_ops *ops;
int size;
@@ -406,21 +407,22 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
err = -EINVAL;
if (ops == NULL)
goto err_out;
-
- size = sizeof(*sch) + ops->priv_size;
-
- sch = kmalloc(size, GFP_KERNEL);
- err = -ENOBUFS;
- if (!sch)
+ err = -EBUSY;
+ if (!try_module_get(ops->owner))
goto err_out;
- /* Grrr... Resolve race condition with module unload */
-
- err = -EINVAL;
- if (ops != qdisc_lookup_ops(kind))
- goto err_out;
+ /* ensure that the Qdisc and the private data are 32-byte aligned */
+ size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
+ size += ops->priv_size + QDISC_ALIGN_CONST;
- memset(sch, 0, size);
+ p = kmalloc(size, GFP_KERNEL);
+ err = -ENOBUFS;
+ if (!p)
+ goto err_out2;
+ memset(p, 0, size);
+ sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
+ & ~QDISC_ALIGN_CONST);
+ sch->padded = (char *)sch - (char *)p;
INIT_LIST_HEAD(&sch->list);
skb_queue_head_init(&sch->q);
@@ -439,7 +441,7 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
handle = qdisc_alloc_handle(dev);
err = -ENOMEM;
if (handle == 0)
- goto err_out;
+ goto err_out3;
}
if (handle == TC_H_INGRESS)
@@ -447,10 +449,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
else
sch->handle = handle;
- err = -EBUSY;
- if (!try_module_get(ops->owner))
- goto err_out;
-
/* enqueue is accessed locklessly - make sure it's visible
* before we set a netdevice's qdisc pointer to sch */
smp_wmb();
@@ -466,12 +464,14 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
#endif
return sch;
}
+err_out3:
+ dev_put(dev);
+err_out2:
module_put(ops->owner);
-
err_out:
*errp = err;
- if (sch)
- kfree(sch);
+ if (p)
+ kfree(p);
return NULL;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ca08449e7b03..fe530156875a 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -573,7 +573,6 @@ static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt)
struct atm_qdisc_data *p = PRIV(sch);
DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
- memset(p,0,sizeof(*p));
p->flows = &p->link;
if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
p->link.q = &noop_qdisc;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 01dfcb1ab832..192ad0a9b904 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1746,15 +1746,18 @@ static void cbq_destroy_filters(struct cbq_class *cl)
}
}
-static void cbq_destroy_class(struct cbq_class *cl)
+static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
{
+ struct cbq_sched_data *q = qdisc_priv(sch);
+
cbq_destroy_filters(cl);
qdisc_destroy(cl->q);
qdisc_put_rtab(cl->R_tab);
#ifdef CONFIG_NET_ESTIMATOR
qdisc_kill_estimator(&cl->stats);
#endif
- kfree(cl);
+ if (cl != &q->link)
+ kfree(cl);
}
static void
@@ -1777,8 +1780,7 @@ cbq_destroy(struct Qdisc* sch)
for (cl = q->classes[h]; cl; cl = next) {
next = cl->next;
- if (cl != &q->link)
- cbq_destroy_class(cl);
+ cbq_destroy_class(sch, cl);
}
}
@@ -1799,7 +1801,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
spin_unlock_bh(&sch->dev->queue_lock);
#endif
- cbq_destroy_class(cl);
+ cbq_destroy_class(sch, cl);
}
}
@@ -2035,7 +2037,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_unlock(sch);
if (--cl->refcnt == 0)
- cbq_destroy_class(cl);
+ cbq_destroy_class(sch, cl);
return 0;
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index e0831a4a4457..28b61f0f87a7 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -331,8 +331,6 @@ int dsmark_init(struct Qdisc *sch,struct rtattr *opt)
!tb[TCA_DSMARK_INDICES-1] ||
RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16))
return -EINVAL;
- memset(p,0,sizeof(*p));
- p->filter_list = NULL;
p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]);
if (!p->indices)
return -EINVAL;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 82117f9ba7d1..2d7e859d0cd8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -415,6 +415,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
if (!ops->init || ops->init(sch, NULL) == 0)
return sch;
+ dev_put(dev);
kfree(p);
return NULL;
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 84ef3ab6a843..fa1a9e5494c8 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -62,6 +62,7 @@
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/list.h>
+#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
@@ -133,9 +134,11 @@ struct hfsc_class
struct list_head children; /* child classes */
struct Qdisc *qdisc; /* leaf qdisc */
- struct list_head actlist; /* active children list */
- struct list_head alist; /* active children list member */
- struct list_head ellist; /* eligible list member */
+ struct rb_node el_node; /* qdisc's eligible tree member */
+ struct rb_root vt_tree; /* active children sorted by cl_vt */
+ struct rb_node vt_node; /* parent's vt_tree member */
+ struct rb_root cf_tree; /* active children sorted by cl_f */
+ struct rb_node cf_node; /* parent's cf_heap member */
struct list_head hlist; /* hash list member */
struct list_head dlist; /* drop list member */
@@ -161,6 +164,9 @@ struct hfsc_class
adjustment */
u64 cl_vtoff; /* inter-period cumulative vt offset */
u64 cl_cvtmax; /* max child's vt in the last period */
+ u64 cl_cvtoff; /* cumulative cvtmax of all periods */
+ u64 cl_pcvtoff; /* parent's cvtoff at initalization
+ time */
struct internal_sc cl_rsc; /* internal real-time service curve */
struct internal_sc cl_fsc; /* internal fair service curve */
@@ -183,7 +189,7 @@ struct hfsc_sched
u16 defcls; /* default class id */
struct hfsc_class root; /* root class */
struct list_head clhash[HFSC_HSIZE]; /* class hash */
- struct list_head eligible; /* eligible list */
+ struct rb_root eligible; /* eligible tree */
struct list_head droplist; /* active leaf class list (for
dropping) */
struct sk_buff_head requeue; /* requeued packet */
@@ -219,82 +225,51 @@ do { \
/*
- * eligible list holds backlogged classes being sorted by their eligible times.
- * there is one eligible list per hfsc instance.
+ * eligible tree holds backlogged classes being sorted by their eligible times.
+ * there is one eligible tree per hfsc instance.
*/
static void
-ellist_insert(struct hfsc_class *cl)
+eltree_insert(struct hfsc_class *cl)
{
- struct list_head *head = &cl->sched->eligible;
- struct hfsc_class *p;
+ struct rb_node **p = &cl->sched->eligible.rb_node;
+ struct rb_node *parent = NULL;
+ struct hfsc_class *cl1;
- /* check the last entry first */
- if (list_empty(head) ||
- ((p = list_entry(head->prev, struct hfsc_class, ellist)) &&
- p->cl_e <= cl->cl_e)) {
- list_add_tail(&cl->ellist, head);
- return;
- }
-
- list_for_each_entry(p, head, ellist) {
- if (cl->cl_e < p->cl_e) {
- /* insert cl before p */
- list_add_tail(&cl->ellist, &p->ellist);
- return;
- }
+ while (*p != NULL) {
+ parent = *p;
+ cl1 = rb_entry(parent, struct hfsc_class, el_node);
+ if (cl->cl_e >= cl1->cl_e)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
}
- ASSERT(0); /* should not reach here */
+ rb_link_node(&cl->el_node, parent, p);
+ rb_insert_color(&cl->el_node, &cl->sched->eligible);
}
static inline void
-ellist_remove(struct hfsc_class *cl)
+eltree_remove(struct hfsc_class *cl)
{
- list_del(&cl->ellist);
+ rb_erase(&cl->el_node, &cl->sched->eligible);
}
-static void
-ellist_update(struct hfsc_class *cl)
+static inline void
+eltree_update(struct hfsc_class *cl)
{
- struct list_head *head = &cl->sched->eligible;
- struct hfsc_class *p, *last;
-
- /*
- * the eligible time of a class increases monotonically.
- * if the next entry has a larger eligible time, nothing to do.
- */
- if (cl->ellist.next == head ||
- ((p = list_entry(cl->ellist.next, struct hfsc_class, ellist)) &&
- cl->cl_e <= p->cl_e))
- return;
-
- /* check the last entry */
- last = list_entry(head->prev, struct hfsc_class, ellist);
- if (last->cl_e <= cl->cl_e) {
- list_move_tail(&cl->ellist, head);
- return;
- }
-
- /*
- * the new position must be between the next entry
- * and the last entry
- */
- list_for_each_entry_continue(p, head, ellist) {
- if (cl->cl_e < p->cl_e) {
- list_move_tail(&cl->ellist, &p->ellist);
- return;
- }
- }
- ASSERT(0); /* should not reach here */
+ eltree_remove(cl);
+ eltree_insert(cl);
}
/* find the class with the minimum deadline among the eligible classes */
static inline struct hfsc_class *
-ellist_get_mindl(struct list_head *head, u64 cur_time)
+eltree_get_mindl(struct hfsc_sched *q, u64 cur_time)
{
struct hfsc_class *p, *cl = NULL;
+ struct rb_node *n;
- list_for_each_entry(p, head, ellist) {
+ for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) {
+ p = rb_entry(n, struct hfsc_class, el_node);
if (p->cl_e > cur_time)
break;
if (cl == NULL || p->cl_d < cl->cl_d)
@@ -305,92 +280,62 @@ ellist_get_mindl(struct list_head *head, u64 cur_time)
/* find the class with minimum eligible time among the eligible classes */
static inline struct hfsc_class *
-ellist_get_minel(struct list_head *head)
+eltree_get_minel(struct hfsc_sched *q)
{
- if (list_empty(head))
+ struct rb_node *n;
+
+ n = rb_first(&q->eligible);
+ if (n == NULL)
return NULL;
- return list_entry(head->next, struct hfsc_class, ellist);
+ return rb_entry(n, struct hfsc_class, el_node);
}
/*
- * active children list holds backlogged child classes being sorted
- * by their virtual time. each intermediate class has one active
- * children list.
+ * vttree holds holds backlogged child classes being sorted by their virtual
+ * time. each intermediate class has one vttree.
*/
static void
-actlist_insert(struct hfsc_class *cl)
+vttree_insert(struct hfsc_class *cl)
{
- struct list_head *head = &cl->cl_parent->actlist;
- struct hfsc_class *p;
+ struct rb_node **p = &cl->cl_parent->vt_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct hfsc_class *cl1;
- /* check the last entry first */
- if (list_empty(head) ||
- ((p = list_entry(head->prev, struct hfsc_class, alist)) &&
- p->cl_vt <= cl->cl_vt)) {
- list_add_tail(&cl->alist, head);
- return;
- }
-
- list_for_each_entry(p, head, alist) {
- if (cl->cl_vt < p->cl_vt) {
- /* insert cl before p */
- list_add_tail(&cl->alist, &p->alist);
- return;
- }
+ while (*p != NULL) {
+ parent = *p;
+ cl1 = rb_entry(parent, struct hfsc_class, vt_node);
+ if (cl->cl_vt >= cl1->cl_vt)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
}
- ASSERT(0); /* should not reach here */
+ rb_link_node(&cl->vt_node, parent, p);
+ rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree);
}
static inline void
-actlist_remove(struct hfsc_class *cl)
+vttree_remove(struct hfsc_class *cl)
{
- list_del(&cl->alist);
+ rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree);
}
-static void
-actlist_update(struct hfsc_class *cl)
+static inline void
+vttree_update(struct hfsc_class *cl)
{
- struct list_head *head = &cl->cl_parent->actlist;
- struct hfsc_class *p, *last;
-
- /*
- * the virtual time of a class increases monotonically.
- * if the next entry has a larger virtual time, nothing to do.
- */
- if (cl->alist.next == head ||
- ((p = list_entry(cl->alist.next, struct hfsc_class, alist)) &&
- cl->cl_vt <= p->cl_vt))
- return;
-
- /* check the last entry */
- last = list_entry(head->prev, struct hfsc_class, alist);
- if (last->cl_vt <= cl->cl_vt) {
- list_move_tail(&cl->alist, head);
- return;
- }
-
- /*
- * the new position must be between the next entry
- * and the last entry
- */
- list_for_each_entry_continue(p, head, alist) {
- if (cl->cl_vt < p->cl_vt) {
- list_move_tail(&cl->alist, &p->alist);
- return;
- }
- }
- ASSERT(0); /* should not reach here */
+ vttree_remove(cl);
+ vttree_insert(cl);
}
static inline struct hfsc_class *
-actlist_firstfit(struct hfsc_class *cl, u64 cur_time)
+vttree_firstfit(struct hfsc_class *cl, u64 cur_time)
{
struct hfsc_class *p;
+ struct rb_node *n;
- list_for_each_entry(p, &cl->actlist, alist) {
- if (p->cl_f <= cur_time) {
+ for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) {
+ p = rb_entry(n, struct hfsc_class, vt_node);
+ if (p->cl_f <= cur_time)
return p;
- }
}
return NULL;
}
@@ -399,14 +344,14 @@ actlist_firstfit(struct hfsc_class *cl, u64 cur_time)
* get the leaf class with the minimum vt in the hierarchy
*/
static struct hfsc_class *
-actlist_get_minvt(struct hfsc_class *cl, u64 cur_time)
+vttree_get_minvt(struct hfsc_class *cl, u64 cur_time)
{
/* if root-class's cfmin is bigger than cur_time nothing to do */
if (cl->cl_cfmin > cur_time)
return NULL;
while (cl->level > 0) {
- cl = actlist_firstfit(cl, cur_time);
+ cl = vttree_firstfit(cl, cur_time);
if (cl == NULL)
return NULL;
/*
@@ -418,6 +363,38 @@ actlist_get_minvt(struct hfsc_class *cl, u64 cur_time)
return cl;
}
+static void
+cftree_insert(struct hfsc_class *cl)
+{
+ struct rb_node **p = &cl->cl_parent->cf_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct hfsc_class *cl1;
+
+ while (*p != NULL) {
+ parent = *p;
+ cl1 = rb_entry(parent, struct hfsc_class, cf_node);
+ if (cl->cl_f >= cl1->cl_f)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&cl->cf_node, parent, p);
+ rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_remove(struct hfsc_class *cl)
+{
+ rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_update(struct hfsc_class *cl)
+{
+ cftree_remove(cl);
+ cftree_insert(cl);
+}
+
/*
* service curve support functions
*
@@ -711,7 +688,7 @@ init_ed(struct hfsc_class *cl, unsigned int next_len)
cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
- ellist_insert(cl);
+ eltree_insert(cl);
}
static void
@@ -720,7 +697,7 @@ update_ed(struct hfsc_class *cl, unsigned int next_len)
cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
- ellist_update(cl);
+ eltree_update(cl);
}
static inline void
@@ -729,32 +706,25 @@ update_d(struct hfsc_class *cl, unsigned int next_len)
cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
}
-static void
+static inline void
update_cfmin(struct hfsc_class *cl)
{
+ struct rb_node *n = rb_first(&cl->cf_tree);
struct hfsc_class *p;
- u64 cfmin;
- if (list_empty(&cl->actlist)) {
+ if (n == NULL) {
cl->cl_cfmin = 0;
return;
}
- cfmin = HT_INFINITY;
- list_for_each_entry(p, &cl->actlist, alist) {
- if (p->cl_f == 0) {
- cl->cl_cfmin = 0;
- return;
- }
- if (p->cl_f < cfmin)
- cfmin = p->cl_f;
- }
- cl->cl_cfmin = cfmin;
+ p = rb_entry(n, struct hfsc_class, cf_node);
+ cl->cl_cfmin = p->cl_f;
}
static void
init_vf(struct hfsc_class *cl, unsigned int len)
{
- struct hfsc_class *max_cl, *p;
+ struct hfsc_class *max_cl;
+ struct rb_node *n;
u64 vt, f, cur_time;
int go_active;
@@ -767,9 +737,9 @@ init_vf(struct hfsc_class *cl, unsigned int len)
go_active = 0;
if (go_active) {
- if (!list_empty(&cl->cl_parent->actlist)) {
- max_cl = list_entry(cl->cl_parent->actlist.prev,
- struct hfsc_class, alist);
+ n = rb_last(&cl->cl_parent->vt_tree);
+ if (n != NULL) {
+ max_cl = rb_entry(n, struct hfsc_class,vt_node);
/*
* set vt to the average of the min and max
* classes. if the parent's period didn't
@@ -785,19 +755,20 @@ init_vf(struct hfsc_class *cl, unsigned int len)
} else {
/*
* first child for a new parent backlog period.
- * add parent's cvtmax to vtoff of children
- * to make a new vt (vtoff + vt) larger than
- * the vt in the last period for all children.
+ * add parent's cvtmax to cvtoff to make a new
+ * vt (vtoff + vt) larger than the vt in the
+ * last period for all children.
*/
vt = cl->cl_parent->cl_cvtmax;
- list_for_each_entry(p, &cl->cl_parent->children,
- siblings)
- p->cl_vtoff += vt;
- cl->cl_vt = 0;
+ cl->cl_parent->cl_cvtoff += vt;
cl->cl_parent->cl_cvtmax = 0;
cl->cl_parent->cl_cvtmin = 0;
+ cl->cl_vt = 0;
}
+ cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
+ cl->cl_pcvtoff;
+
/* update the virtual curve */
vt = cl->cl_vt + cl->cl_vtoff;
rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
@@ -814,7 +785,8 @@ init_vf(struct hfsc_class *cl, unsigned int len)
cl->cl_parentperiod++;
cl->cl_f = 0;
- actlist_insert(cl);
+ vttree_insert(cl);
+ cftree_insert(cl);
if (cl->cl_flags & HFSC_USC) {
/* class has upper limit curve */
@@ -834,6 +806,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
f = max(cl->cl_myf, cl->cl_cfmin);
if (f != cl->cl_f) {
cl->cl_f = f;
+ cftree_update(cl);
update_cfmin(cl->cl_parent);
}
}
@@ -866,9 +839,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
cl->cl_parent->cl_cvtmax = cl->cl_vt;
- /* remove this class from the vt list */
- actlist_remove(cl);
+ /* remove this class from the vt tree */
+ vttree_remove(cl);
+ cftree_remove(cl);
update_cfmin(cl->cl_parent);
continue;
@@ -890,8 +864,8 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
cl->cl_vt = cl->cl_parent->cl_cvtmin;
}
- /* update the vt list */
- actlist_update(cl);
+ /* update the vt tree */
+ vttree_update(cl);
if (cl->cl_flags & HFSC_USC) {
cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
@@ -921,6 +895,7 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
f = max(cl->cl_myf, cl->cl_cfmin);
if (f != cl->cl_f) {
cl->cl_f = f;
+ cftree_update(cl);
update_cfmin(cl->cl_parent);
}
}
@@ -941,13 +916,13 @@ static void
set_passive(struct hfsc_class *cl)
{
if (cl->cl_flags & HFSC_RSC)
- ellist_remove(cl);
+ eltree_remove(cl);
list_del(&cl->dlist);
/*
- * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
- * needs to be called explicitly to remove a class from actlist
+ * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+ * needs to be called explicitly to remove a class from vttree.
*/
}
@@ -1171,7 +1146,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->qdisc = &noop_qdisc;
cl->stats_lock = &sch->dev->queue_lock;
INIT_LIST_HEAD(&cl->children);
- INIT_LIST_HEAD(&cl->actlist);
+ cl->vt_tree = RB_ROOT;
+ cl->cf_tree = RB_ROOT;
sch_tree_lock(sch);
list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]);
@@ -1179,6 +1155,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (parent->level == 0)
hfsc_purge_queue(sch, parent);
hfsc_adjust_levels(parent);
+ cl->cl_pcvtoff = parent->cl_cvtoff;
sch_tree_unlock(sch);
#ifdef CONFIG_NET_ESTIMATOR
@@ -1528,7 +1505,7 @@ hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
u64 next_time = 0;
long delay;
- if ((cl = ellist_get_minel(&q->eligible)) != NULL)
+ if ((cl = eltree_get_minel(q)) != NULL)
next_time = cl->cl_e;
if (q->root.cl_cfmin != 0) {
if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1553,13 +1530,12 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
return -EINVAL;
qopt = RTA_DATA(opt);
- memset(q, 0, sizeof(struct hfsc_sched));
sch->stats_lock = &sch->dev->queue_lock;
q->defcls = qopt->defcls;
for (i = 0; i < HFSC_HSIZE; i++)
INIT_LIST_HEAD(&q->clhash[i]);
- INIT_LIST_HEAD(&q->eligible);
+ q->eligible = RB_ROOT;
INIT_LIST_HEAD(&q->droplist);
skb_queue_head_init(&q->requeue);
@@ -1571,7 +1547,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
q->root.qdisc = &noop_qdisc;
q->root.stats_lock = &sch->dev->queue_lock;
INIT_LIST_HEAD(&q->root.children);
- INIT_LIST_HEAD(&q->root.actlist);
+ q->root.vt_tree = RB_ROOT;
+ q->root.cf_tree = RB_ROOT;
list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
@@ -1611,6 +1588,8 @@ hfsc_reset_class(struct hfsc_class *cl)
cl->cl_vtoff = 0;
cl->cl_cvtmin = 0;
cl->cl_cvtmax = 0;
+ cl->cl_cvtoff = 0;
+ cl->cl_pcvtoff = 0;
cl->cl_vtperiod = 0;
cl->cl_parentperiod = 0;
cl->cl_f = 0;
@@ -1618,7 +1597,9 @@ hfsc_reset_class(struct hfsc_class *cl)
cl->cl_myfadj = 0;
cl->cl_cfmin = 0;
cl->cl_nactive = 0;
- INIT_LIST_HEAD(&cl->actlist);
+
+ cl->vt_tree = RB_ROOT;
+ cl->cf_tree = RB_ROOT;
qdisc_reset(cl->qdisc);
if (cl->cl_flags & HFSC_RSC)
@@ -1641,7 +1622,7 @@ hfsc_reset_qdisc(struct Qdisc *sch)
hfsc_reset_class(cl);
}
__skb_queue_purge(&q->requeue);
- INIT_LIST_HEAD(&q->eligible);
+ q->eligible = RB_ROOT;
INIT_LIST_HEAD(&q->droplist);
del_timer(&q->wd_timer);
sch->flags &= ~TCQ_F_THROTTLED;
@@ -1749,14 +1730,14 @@ hfsc_dequeue(struct Qdisc *sch)
* find the class with the minimum deadline among
* the eligible classes.
*/
- if ((cl = ellist_get_mindl(&q->eligible, cur_time)) != NULL) {
+ if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
realtime = 1;
} else {
/*
* use link-sharing criteria
* get the class with the minimum vt in the hierarchy
*/
- cl = actlist_get_minvt(&q->root, cur_time);
+ cl = vttree_get_minvt(&q->root, cur_time);
if (cl == NULL) {
sch->stats.overlimits++;
hfsc_schedule_watchdog(sch, cur_time);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d07dfd8b5cf0..61c8fa4db608 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1277,7 +1277,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
return -EINVAL;
}
- memset(q,0,sizeof(*q));
q->debug = gopt->debug;
HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 30f2176b992d..13b5c3414794 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -283,21 +283,18 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt)
#ifndef CONFIG_NET_CLS_ACT
#ifndef CONFIG_NETFILTER
printk("You MUST compile classifier actions into the kernel\n");
- goto error;
+ return -EINVAL;
#else
printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
#endif
#endif
- if (NULL == p)
- goto error;
-
#ifndef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NETFILTER
if (!nf_registered) {
if (nf_register_hook(&ing_ops) < 0) {
printk("ingress qdisc registration error \n");
- goto error;
+ return -EINVAL;
}
nf_registered++;
}
@@ -305,12 +302,8 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt)
#endif
DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
- memset(p, 0, sizeof(*p));
- p->filter_list = NULL;
p->q = &noop_qdisc;
return 0;
-error:
- return -EINVAL;
}
@@ -346,9 +339,6 @@ static void ingress_destroy(struct Qdisc *sch)
p->filter_list = tp->next;
tcf_destroy(tp);
}
- memset(p, 0, sizeof(*p));
- p->filter_list = NULL;
-
#if 0
/* for future use */
qdisc_destroy(p->q);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 16a57cf9bcca..bae07708eb01 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -148,13 +148,12 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
struct in_ifaddr *ifa;
struct sctp_sockaddr_entry *addr;
- read_lock(&inetdev_lock);
+ rcu_read_lock();
if ((in_dev = __in_dev_get(dev)) == NULL) {
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
return;
}
- read_lock(&in_dev->lock);
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
/* Add the address to the local list. */
addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
@@ -166,8 +165,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
}
}
- read_unlock(&in_dev->lock);
- read_unlock(&inetdev_lock);
+ rcu_read_unlock();
}
/* Extract our IP addresses from the system and stash them in the
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a7ab07fe5cc0..de4bccc1c25d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -753,7 +753,7 @@ udp_data_ready(struct sock *sk, int len)
struct rpc_rqst *rovr;
struct sk_buff *skb;
int err, repsize, copied;
- u32 xid;
+ u32 _xid, *xp;
read_lock(&sk->sk_callback_lock);
dprintk("RPC: udp_data_ready...\n");
@@ -777,12 +777,14 @@ udp_data_ready(struct sock *sk, int len)
}
/* Copy the XID from the skb... */
- if (skb_copy_bits(skb, sizeof(struct udphdr), &xid, sizeof(xid)) < 0)
+ xp = skb_header_pointer(skb, sizeof(struct udphdr),
+ sizeof(_xid), &_xid);
+ if (xp == NULL)
goto dropit;
/* Look up and lock the request corresponding to the given XID */
spin_lock(&xprt->sock_lock);
- rovr = xprt_lookup_rqst(xprt, xid);
+ rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
task = rovr->rq_task;
diff --git a/net/xfrm/xfrm_export.c b/net/xfrm/xfrm_export.c
index f72754953d29..9f335640d254 100644
--- a/net/xfrm/xfrm_export.c
+++ b/net/xfrm/xfrm_export.c
@@ -33,8 +33,6 @@ EXPORT_SYMBOL(secpath_dup);
EXPORT_SYMBOL(xfrm_get_acqseq);
EXPORT_SYMBOL(xfrm_parse_spi);
EXPORT_SYMBOL(xfrm4_rcv);
-EXPORT_SYMBOL(xfrm4_tunnel_register);
-EXPORT_SYMBOL(xfrm4_tunnel_deregister);
EXPORT_SYMBOL(xfrm_register_type);
EXPORT_SYMBOL(xfrm_unregister_type);
EXPORT_SYMBOL(xfrm_get_type);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index eccc0231faeb..be298cde3022 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -78,15 +78,6 @@ static int verify_encap_tmpl(struct rtattr **xfrma)
if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap))
return -EINVAL;
- encap = RTA_DATA(rt);
- switch (encap->encap_type) {
- case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- break;
- default:
- return -ENOPROTOOPT;
- }
-
return 0;
}
@@ -164,15 +155,24 @@ out:
return err;
}
-static int attach_one_algo(struct xfrm_algo **algpp, struct rtattr *u_arg)
+static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
+ struct xfrm_algo_desc *(*get_byname)(char *),
+ struct rtattr *u_arg)
{
struct rtattr *rta = u_arg;
struct xfrm_algo *p, *ualg;
+ struct xfrm_algo_desc *algo;
if (!rta)
return 0;
ualg = RTA_DATA(rta);
+
+ algo = get_byname(ualg->alg_name);
+ if (!algo)
+ return -ENOSYS;
+ *props = algo->desc.sadb_alg_id;
+
p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
if (!p)
return -ENOMEM;
@@ -225,11 +225,17 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
copy_from_user_state(x, p);
- if ((err = attach_one_algo(&x->aalg, xfrma[XFRMA_ALG_AUTH-1])))
+ if ((err = attach_one_algo(&x->aalg, &x->props.aalgo,
+ xfrm_aalg_get_byname,
+ xfrma[XFRMA_ALG_AUTH-1])))
goto error;
- if ((err = attach_one_algo(&x->ealg, xfrma[XFRMA_ALG_CRYPT-1])))
+ if ((err = attach_one_algo(&x->ealg, &x->props.ealgo,
+ xfrm_ealg_get_byname,
+ xfrma[XFRMA_ALG_CRYPT-1])))
goto error;
- if ((err = attach_one_algo(&x->calg, xfrma[XFRMA_ALG_COMP-1])))
+ if ((err = attach_one_algo(&x->calg, &x->props.calgo,
+ xfrm_calg_get_byname,
+ xfrma[XFRMA_ALG_COMP-1])))
goto error;
if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
goto error;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 52fa3cfdfd5b..587d63bd6861 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2823,48 +2823,50 @@ static void selinux_task_to_inode(struct task_struct *p,
static int selinux_parse_skb_ipv4(struct sk_buff *skb, struct avc_audit_data *ad)
{
int offset, ihlen, ret;
- struct iphdr iph;
+ struct iphdr _iph, *ih;
offset = skb->nh.raw - skb->data;
- ret = skb_copy_bits(skb, offset, &iph, sizeof(iph));
- if (ret)
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
goto out;
- ihlen = iph.ihl * 4;
- if (ihlen < sizeof(iph))
+ ihlen = ih->ihl * 4;
+ if (ihlen < sizeof(_iph))
goto out;
- ad->u.net.v4info.saddr = iph.saddr;
- ad->u.net.v4info.daddr = iph.daddr;
+ ad->u.net.v4info.saddr = ih->saddr;
+ ad->u.net.v4info.daddr = ih->daddr;
- switch (iph.protocol) {
+ switch (ih->protocol) {
case IPPROTO_TCP: {
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
- if (ntohs(iph.frag_off) & IP_OFFSET)
+ if (ntohs(ih->frag_off) & IP_OFFSET)
break;
offset += ihlen;
- if (skb_copy_bits(skb, offset, &tcph, sizeof(tcph)) < 0)
+ th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+ if (th == NULL)
break;
- ad->u.net.sport = tcph.source;
- ad->u.net.dport = tcph.dest;
+ ad->u.net.sport = th->source;
+ ad->u.net.dport = th->dest;
break;
}
case IPPROTO_UDP: {
- struct udphdr udph;
+ struct udphdr _udph, *uh;
- if (ntohs(iph.frag_off) & IP_OFFSET)
+ if (ntohs(ih->frag_off) & IP_OFFSET)
break;
offset += ihlen;
- if (skb_copy_bits(skb, offset, &udph, sizeof(udph)) < 0)
- break;
+ uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+ if (uh == NULL)
+ break;
- ad->u.net.sport = udph.source;
- ad->u.net.dport = udph.dest;
+ ad->u.net.sport = uh->source;
+ ad->u.net.dport = uh->dest;
break;
}
@@ -2882,18 +2884,18 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb, struct avc_audit_data *ad
{
u8 nexthdr;
int ret, offset;
- struct ipv6hdr ipv6h;
+ struct ipv6hdr _ipv6h, *ip6;
offset = skb->nh.raw - skb->data;
- ret = skb_copy_bits(skb, offset, &ipv6h, sizeof(ipv6h));
- if (ret)
+ ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
+ if (ip6 == NULL)
goto out;
- ipv6_addr_copy(&ad->u.net.v6info.saddr, &ipv6h.saddr);
- ipv6_addr_copy(&ad->u.net.v6info.daddr, &ipv6h.daddr);
+ ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
+ ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
- nexthdr = ipv6h.nexthdr;
- offset += sizeof(ipv6h);
+ nexthdr = ip6->nexthdr;
+ offset += sizeof(_ipv6h);
offset = ipv6_skip_exthdr(skb, offset, &nexthdr,
skb->tail - skb->head - offset);
if (offset < 0)
@@ -2901,24 +2903,26 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb, struct avc_audit_data *ad
switch (nexthdr) {
case IPPROTO_TCP: {
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
- if (skb_copy_bits(skb, offset, &tcph, sizeof(tcph)) < 0)
+ th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+ if (th == NULL)
break;
- ad->u.net.sport = tcph.source;
- ad->u.net.dport = tcph.dest;
+ ad->u.net.sport = th->source;
+ ad->u.net.dport = th->dest;
break;
}
case IPPROTO_UDP: {
- struct udphdr udph;
+ struct udphdr _udph, *uh;
- if (skb_copy_bits(skb, offset, &udph, sizeof(udph)) < 0)
+ uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+ if (uh == NULL)
break;
- ad->u.net.sport = udph.source;
- ad->u.net.dport = udph.dest;
+ ad->u.net.sport = uh->source;
+ ad->u.net.dport = uh->dest;
break;
}