summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@penguin.transmeta.com>2002-04-03 01:03:45 -0800
committerLinus Torvalds <torvalds@penguin.transmeta.com>2002-04-03 01:03:45 -0800
commit5e4b50795ee8c7659a1181cea4c98712e02ea63e (patch)
treeb4e0f1a19c549bc54e93f4ada5c3b0a0953e58cb /net
parent893d709002e952c54cce2e140854207a5f7fdcca (diff)
parent6a1a68c8febfc71c895a05a10afe6812b7c77584 (diff)
Merge bk://linuxusb.bkbits.net/linus-2.5
into penguin.transmeta.com:/home/penguin/torvalds/repositories/kernel/linux
Diffstat (limited to 'net')
-rw-r--r--net/atm/pppoatm.c1
-rw-r--r--net/atm/resources.c144
-rw-r--r--net/ax25/af_ax25.c1
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c1
-rw-r--r--net/econet/af_econet.c2
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c28
-rw-r--r--net/ipv4/devinet.c5
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c29
-rw-r--r--net/ipv4/icmp.c8
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ipip.c6
-rw-r--r--net/ipv4/netfilter/Config.help11
-rw-r--r--net/ipv4/netfilter/Config.in2
-rw-r--r--net/ipv4/netfilter/Makefile13
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c372
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c58
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c106
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c14
-rw-r--r--net/ipv4/netfilter/ip_fw_compat_masq.c2
-rw-r--r--net/ipv4/netfilter/ip_fw_compat_redir.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c85
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c198
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c221
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c191
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c1
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c44
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c17
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c32
-rw-r--r--net/ipv4/netfilter/ip_queue.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c2
-rw-r--r--net/ipv4/route.c12
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_input.c8
-rw-r--r--net/ipv4/tcp_ipv4.c224
-rw-r--r--net/ipv4/tcp_minisocks.c65
-rw-r--r--net/ipv4/tcp_output.c40
-rw-r--r--net/ipv4/udp.c14
-rw-r--r--net/ipv6/addrconf.c22
-rw-r--r--net/ipv6/ndisc.c54
-rw-r--r--net/ipv6/netfilter/ip6_queue.c2
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/tcp_ipv6.c77
-rw-r--r--net/ipv6/udp.c5
-rw-r--r--net/khttpd/sockets.c2
-rw-r--r--net/lapb/lapb_iface.c1
-rw-r--r--net/netlink/netlink_dev.c2
-rw-r--r--net/netrom/af_netrom.c3
-rw-r--r--net/netsyms.c1
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/rose/af_rose.c1
-rw-r--r--net/sched/sch_prio.c1
-rw-r--r--net/sched/sch_sfq.c15
-rw-r--r--net/sunrpc/stats.c1
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/x25/af_x25.c1
66 files changed, 1472 insertions, 744 deletions
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 0671938f5781..48c41db8bf3e 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -363,3 +363,4 @@ module_exit(pppoatm_exit);
MODULE_AUTHOR("Mitchell Blank Jr <mitch@sfgoth.com>");
MODULE_DESCRIPTION("RFC2364 PPP over ATM/AAL5");
+MODULE_LICENSE("GPL");
diff --git a/net/atm/resources.c b/net/atm/resources.c
index c04f8b10ee22..3857176cc8bc 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -31,95 +31,107 @@ static struct atm_dev *last_dev = NULL;
struct atm_vcc *nodev_vccs = NULL;
extern spinlock_t atm_dev_lock;
-
-static struct atm_dev *alloc_atm_dev(const char *type)
+/* Caller must hold atm_dev_lock. */
+static struct atm_dev *__alloc_atm_dev(const char *type)
{
struct atm_dev *dev;
- dev = kmalloc(sizeof(*dev),GFP_KERNEL);
- if (!dev) return NULL;
- memset(dev,0,sizeof(*dev));
+ dev = kmalloc(sizeof(*dev), GFP_ATOMIC);
+ if (!dev)
+ return NULL;
+ memset(dev, 0, sizeof(*dev));
dev->type = type;
dev->signal = ATM_PHY_SIG_UNKNOWN;
dev->link_rate = ATM_OC3_PCR;
dev->next = NULL;
- spin_lock(&atm_dev_lock);
-
dev->prev = last_dev;
- if (atm_devs) last_dev->next = dev;
- else atm_devs = dev;
+ if (atm_devs)
+ last_dev->next = dev;
+ else
+ atm_devs = dev;
last_dev = dev;
- spin_unlock(&atm_dev_lock);
+
return dev;
}
-
-static void free_atm_dev(struct atm_dev *dev)
+/* Caller must hold atm_dev_lock. */
+static void __free_atm_dev(struct atm_dev *dev)
{
- spin_lock (&atm_dev_lock);
-
- if (dev->prev) dev->prev->next = dev->next;
- else atm_devs = dev->next;
- if (dev->next) dev->next->prev = dev->prev;
- else last_dev = dev->prev;
+ if (dev->prev)
+ dev->prev->next = dev->next;
+ else
+ atm_devs = dev->next;
+ if (dev->next)
+ dev->next->prev = dev->prev;
+ else
+ last_dev = dev->prev;
kfree(dev);
-
- spin_unlock (&atm_dev_lock);
}
-
+/* Caller must hold atm_dev_lock. */
struct atm_dev *atm_find_dev(int number)
{
struct atm_dev *dev;
for (dev = atm_devs; dev; dev = dev->next)
- if (dev->ops && dev->number == number) return dev;
+ if (dev->ops && dev->number == number)
+ return dev;
return NULL;
}
-struct atm_dev *atm_dev_register(const char *type,const struct atmdev_ops *ops,
- int number,atm_dev_flags_t *flags)
+struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
+ int number, atm_dev_flags_t *flags)
{
struct atm_dev *dev;
- dev = alloc_atm_dev(type);
+ spin_lock(&atm_dev_lock);
+
+ dev = __alloc_atm_dev(type);
if (!dev) {
printk(KERN_ERR "atm_dev_register: no space for dev %s\n",
type);
- return NULL;
+ goto done;
}
if (number != -1) {
if (atm_find_dev(number)) {
- free_atm_dev(dev);
- return NULL;
+ __free_atm_dev(dev);
+ dev = NULL;
+ goto done;
}
dev->number = number;
- }
- else {
+ } else {
dev->number = 0;
- while (atm_find_dev(dev->number)) dev->number++;
+ while (atm_find_dev(dev->number))
+ dev->number++;
}
dev->vccs = dev->last = NULL;
dev->dev_data = NULL;
barrier();
dev->ops = ops;
- if (flags) dev->flags = *flags;
- else memset(&dev->flags,0,sizeof(dev->flags));
- memset((void *) &dev->stats,0,sizeof(dev->stats));
+ if (flags)
+ dev->flags = *flags;
+ else
+ memset(&dev->flags, 0, sizeof(dev->flags));
+ memset(&dev->stats, 0, sizeof(dev->stats));
+
#ifdef CONFIG_PROC_FS
- if (ops->proc_read)
+ if (ops->proc_read) {
if (atm_proc_dev_register(dev) < 0) {
printk(KERN_ERR "atm_dev_register: "
- "atm_proc_dev_register failed for dev %s\n",type);
- spin_unlock (&atm_dev_lock);
- free_atm_dev(dev);
- return NULL;
+ "atm_proc_dev_register failed for dev %s\n",
+ type);
+ __free_atm_dev(dev);
+ dev = NULL;
+ goto done;
}
+ }
#endif
- spin_unlock (&atm_dev_lock);
+
+done:
+ spin_unlock(&atm_dev_lock);
return dev;
}
@@ -127,19 +139,22 @@ struct atm_dev *atm_dev_register(const char *type,const struct atmdev_ops *ops,
void atm_dev_deregister(struct atm_dev *dev)
{
#ifdef CONFIG_PROC_FS
- if (dev->ops->proc_read) atm_proc_dev_deregister(dev);
+ if (dev->ops->proc_read)
+ atm_proc_dev_deregister(dev);
#endif
- free_atm_dev(dev);
+ spin_lock(&atm_dev_lock);
+ __free_atm_dev(dev);
+ spin_unlock(&atm_dev_lock);
}
-
void shutdown_atm_dev(struct atm_dev *dev)
{
if (dev->vccs) {
- set_bit(ATM_DF_CLOSE,&dev->flags);
+ set_bit(ATM_DF_CLOSE, &dev->flags);
return;
}
- if (dev->ops->dev_close) dev->ops->dev_close(dev);
+ if (dev->ops->dev_close)
+ dev->ops->dev_close(dev);
atm_dev_deregister(dev);
}
@@ -149,16 +164,18 @@ struct sock *alloc_atm_vcc_sk(int family)
struct atm_vcc *vcc;
sk = sk_alloc(family, GFP_KERNEL, 1, NULL);
- if (!sk) return NULL;
+ if (!sk)
+ return NULL;
vcc = atm_sk(sk) = kmalloc(sizeof(*vcc), GFP_KERNEL);
if (!vcc) {
sk_free(sk);
return NULL;
}
- sock_init_data(NULL,sk);
- memset(vcc,0,sizeof(*vcc));
+ sock_init_data(NULL, sk);
+ memset(vcc, 0, sizeof(*vcc));
vcc->sk = sk;
- if (nodev_vccs) nodev_vccs->prev = vcc;
+ if (nodev_vccs)
+ nodev_vccs->prev = vcc;
vcc->prev = NULL;
vcc->next = nodev_vccs;
nodev_vccs = vcc;
@@ -168,11 +185,16 @@ struct sock *alloc_atm_vcc_sk(int family)
static void unlink_vcc(struct atm_vcc *vcc,struct atm_dev *hold_dev)
{
- if (vcc->prev) vcc->prev->next = vcc->next;
- else if (vcc->dev) vcc->dev->vccs = vcc->next;
- else nodev_vccs = vcc->next;
- if (vcc->next) vcc->next->prev = vcc->prev;
- else if (vcc->dev) vcc->dev->last = vcc->prev;
+ if (vcc->prev)
+ vcc->prev->next = vcc->next;
+ else if (vcc->dev)
+ vcc->dev->vccs = vcc->next;
+ else
+ nodev_vccs = vcc->next;
+ if (vcc->next)
+ vcc->next->prev = vcc->prev;
+ else if (vcc->dev)
+ vcc->dev->last = vcc->prev;
if (vcc->dev && vcc->dev != hold_dev && !vcc->dev->vccs &&
test_bit(ATM_DF_CLOSE,&vcc->dev->flags))
shutdown_atm_dev(vcc->dev);
@@ -185,7 +207,6 @@ void free_atm_vcc_sk(struct sock *sk)
sk_free(sk);
}
-
void bind_vcc(struct atm_vcc *vcc,struct atm_dev *dev)
{
unlink_vcc(vcc,dev);
@@ -193,19 +214,20 @@ void bind_vcc(struct atm_vcc *vcc,struct atm_dev *dev)
if (dev) {
vcc->next = NULL;
vcc->prev = dev->last;
- if (dev->vccs) dev->last->next = vcc;
- else dev->vccs = vcc;
+ if (dev->vccs)
+ dev->last->next = vcc;
+ else
+ dev->vccs = vcc;
dev->last = vcc;
- }
- else {
- if (nodev_vccs) nodev_vccs->prev = vcc;
+ } else {
+ if (nodev_vccs)
+ nodev_vccs->prev = vcc;
vcc->next = nodev_vccs;
vcc->prev = NULL;
nodev_vccs = vcc;
}
}
-
EXPORT_SYMBOL(atm_dev_register);
EXPORT_SYMBOL(atm_dev_deregister);
EXPORT_SYMBOL(atm_find_dev);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 4a950d9ec7b0..8bf321a41698 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1889,6 +1889,7 @@ module_init(ax25_init);
MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
MODULE_DESCRIPTION("The amateur radio AX.25 link layer protocol");
+MODULE_LICENSE("GPL");
static void __exit ax25_exit(void)
{
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 47090dc65fd2..e1329d03182a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1239,6 +1239,7 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
read_lock(&neigh_tbl_lock);
for (tbl=neigh_tables; tbl; tbl = tbl->next) {
int err = 0;
+ int override = 1;
struct neighbour *n;
if (tbl->family != ndm->ndm_family)
@@ -1266,6 +1267,7 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (n) {
if (nlh->nlmsg_flags&NLM_F_EXCL)
err = -EEXIST;
+ override = nlh->nlmsg_flags&NLM_F_REPLACE;
} else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
err = -ENOENT;
else {
@@ -1278,7 +1280,7 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (err == 0) {
err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
ndm->ndm_state,
- nlh->nlmsg_flags&NLM_F_REPLACE, 0);
+ override, 0);
}
if (n)
neigh_release(n);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3bddba5dbd2e..9c2bc5375880 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -748,7 +748,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
if (skb_cloned(skb)) {
if (!realloc)
BUG();
- if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return -ENOMEM;
}
if (len <= offset) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 9b78c14afa77..23c251f8922c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -108,6 +108,7 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/poll.h>
+#include <linux/tcp.h>
#include <linux/init.h>
#include <asm/uaccess.h>
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 8aeaff1bb471..69b9d72b8fe4 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -1133,3 +1133,5 @@ static int __init econet_proto_init(void)
module_init(econet_proto_init);
module_exit(econet_proto_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b70a21036be..f998e347e432 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -627,7 +627,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
int addr_len, int flags)
{
struct sock *sk=sock->sk;
- struct inet_opt *inet = inet_sk(sk);
int err;
long timeo;
@@ -655,13 +654,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
if (sk->state != TCP_CLOSE)
goto out;
- err = -EAGAIN;
- if (!inet->num) {
- if (sk->prot->get_port(sk, 0) != 0)
- goto out;
- inet->sport = htons(inet->num);
- }
-
err = sk->prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9e6a18144cbf..dd26ec77a82f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -450,6 +450,32 @@ int arp_bind_neighbour(struct dst_entry *dst)
}
/*
+ * Check if we can use proxy ARP for this path
+ */
+
+static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
+{
+ struct in_device *out_dev;
+ int imi, omi = -1;
+
+ if (!IN_DEV_PROXY_ARP(in_dev))
+ return 0;
+
+ if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0)
+ return 1;
+ if (imi == -1)
+ return 0;
+
+ /* place to check for proxy_arp for routes */
+
+ if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) {
+ omi = IN_DEV_MEDIUM_ID(out_dev);
+ in_dev_put(out_dev);
+ }
+ return (omi != imi && omi != -1);
+}
+
+/*
* Interface to link layer: send routine and receive handler.
*/
@@ -755,7 +781,7 @@ int arp_process(struct sk_buff *skb)
} else if (IN_DEV_FORWARD(in_dev)) {
if ((rt->rt_flags&RTCF_DNAT) ||
(addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
- (IN_DEV_PROXY_ARP(in_dev) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
+ (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 8acc73398857..80d1031f0a72 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1032,7 +1032,7 @@ int devinet_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
static struct devinet_sysctl_table
{
struct ctl_table_header *sysctl_header;
- ctl_table devinet_vars[14];
+ ctl_table devinet_vars[15];
ctl_table devinet_dev[2];
ctl_table devinet_conf_dir[2];
ctl_table devinet_proto_dir[2];
@@ -1066,6 +1066,9 @@ static struct devinet_sysctl_table
{NET_IPV4_CONF_PROXY_ARP, "proxy_arp",
&ipv4_devconf.proxy_arp, sizeof(int), 0644, NULL,
&proc_dointvec},
+ {NET_IPV4_CONF_MEDIUM_ID, "medium_id",
+ &ipv4_devconf.medium_id, sizeof(int), 0644, NULL,
+ &proc_dointvec},
{NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay",
&ipv4_devconf.bootp_relay, sizeof(int), 0644, NULL,
&proc_dointvec},
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c3c48beb995f..958b6218cf93 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -579,6 +579,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_UP:
fib_add_ifaddr(ifa);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ fib_sync_up(ifa->ifa_dev->dev);
+#endif
rt_cache_flush(-1);
break;
case NETDEV_DOWN:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index de25ddc0f658..c6273f1ad2ec 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -56,6 +56,8 @@ int fib_info_cnt;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
+static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
+
#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
@@ -869,8 +871,14 @@ int fib_sync_down(u32 local, struct net_device *dev, int force)
nh->nh_scope != scope) {
nh->nh_flags |= RTNH_F_DEAD;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ spin_lock_bh(&fib_multipath_lock);
fi->fib_power -= nh->nh_power;
nh->nh_power = 0;
+ spin_unlock_bh(&fib_multipath_lock);
+ if (force && nh->nh_dev) {
+ dev_put(nh->nh_dev);
+ nh->nh_dev = NULL;
+ }
#endif
dead++;
}
@@ -906,13 +914,19 @@ int fib_sync_up(struct net_device *dev)
alive++;
continue;
}
+ if (nh->nh_dev == NULL && nh->nh_oif == dev->ifindex) {
+ dev_hold(dev);
+ nh->nh_dev = dev;
+ }
if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
continue;
if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
continue;
alive++;
+ spin_lock_bh(&fib_multipath_lock);
nh->nh_power = 0;
nh->nh_flags &= ~RTNH_F_DEAD;
+ spin_unlock_bh(&fib_multipath_lock);
} endfor_nexthops(fi)
if (alive > 0) {
@@ -933,6 +947,7 @@ void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
struct fib_info *fi = res->fi;
int w;
+ spin_lock_bh(&fib_multipath_lock);
if (fi->fib_power <= 0) {
int power = 0;
change_nexthops(fi) {
@@ -942,12 +957,12 @@ void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
}
} endfor_nexthops(fi);
fi->fib_power = power;
-#if 1
if (power <= 0) {
- printk(KERN_CRIT "impossible 777\n");
+ spin_unlock_bh(&fib_multipath_lock);
+ /* Race condition: route has just become dead. */
+ res->nh_sel = 0;
return;
}
-#endif
}
@@ -963,15 +978,15 @@ void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
nh->nh_power--;
fi->fib_power--;
res->nh_sel = nhsel;
+ spin_unlock_bh(&fib_multipath_lock);
return;
}
}
} endfor_nexthops(fi);
-#if 1
- printk(KERN_CRIT "impossible 888\n");
-#endif
- return;
+ /* Race condition: route has just become dead. */
+ res->nh_sel = 0;
+ spin_unlock_bh(&fib_multipath_lock);
}
#endif
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ec1b8e6c091d..a5b7dbb0d0e1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -139,6 +139,8 @@ struct icmp_err icmp_err_convert[] = {
{ EHOSTUNREACH, 1 } /* ICMP_PREC_CUTOFF */
};
+extern int sysctl_ip_default_ttl;
+
/* Control parameters for ECHO replies. */
int sysctl_icmp_echo_ignore_all;
int sysctl_icmp_echo_ignore_broadcasts;
@@ -354,6 +356,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
icmp_out_count(icmp_param->data.icmph.type);
inet->tos = skb->nh.iph->tos;
+ inet->ttl = sysctl_ip_default_ttl;
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
if (icmp_param->replyopts.optlen) {
@@ -498,6 +501,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
icmp_param.offset=skb_in->nh.raw - skb_in->data;
icmp_out_count(icmp_param.data.icmph.type);
inet_sk(icmp_socket->sk)->tos = tos;
+ inet_sk(icmp_socket->sk)->ttl = sysctl_ip_default_ttl;
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts;
if (icmp_param.replyopts.srr) {
@@ -876,7 +880,7 @@ static void icmp_discard(struct sk_buff *skb)
int icmp_rcv(struct sk_buff *skb)
{
- struct icmphdr *icmph = skb->h.icmph;
+ struct icmphdr *icmph;
struct rtable *rt = (struct rtable*)skb->dst;
ICMP_INC_STATS_BH(IcmpInMsgs);
@@ -895,6 +899,8 @@ int icmp_rcv(struct sk_buff *skb)
if (!pskb_pull(skb, sizeof(struct icmphdr)))
goto error;
+ icmph = skb->h.icmph;
+
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 899bbb02cd54..c1056f760e9d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -760,7 +760,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
}
df = tiph->frag_off;
- mtu = rt->u.dst.pmtu - tunnel->hlen;
+ if (df)
+ mtu = rt->u.dst.pmtu - tunnel->hlen;
+ else
+ mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
if (skb->protocol == __constant_htons(ETH_P_IP)) {
if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3f93680cc5fe..85f756cdd630 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -224,8 +224,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
nf_debug_ip_local_deliver(skb);
#endif /*CONFIG_NETFILTER_DEBUG*/
- if (!pskb_may_pull(skb, ihl))
- goto out;
__skb_pull(skb, ihl);
#ifdef CONFIG_NETFILTER
@@ -279,7 +277,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
sock_put(raw_sk);
} else if (!flag) { /* Free and report errors */
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
-out:
kfree_skb(skb);
}
}
@@ -346,7 +343,6 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
goto drop;
iph = skb->nh.iph;
- skb->ip_summed = 0;
if (ip_options_compile(NULL, skb))
goto inhdr_error;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 87ad2a5a65c6..82634df7c6fe 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -572,7 +572,11 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_error;
}
- mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+ if (tiph->frag_off)
+ mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+ else
+ mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
+
if (mtu < 68) {
tunnel->stat.collisions++;
ip_rt_put(rt);
diff --git a/net/ipv4/netfilter/Config.help b/net/ipv4/netfilter/Config.help
index 5341465c4a2c..8d1c9e23ffeb 100644
--- a/net/ipv4/netfilter/Config.help
+++ b/net/ipv4/netfilter/Config.help
@@ -160,6 +160,17 @@ CONFIG_IP_NF_NAT
If you want to compile it as a module, say M here and read
<file:Documentation/modules.txt>. If unsure, say `N'.
+CONFIG_IP_NF_NAT_LOCAL
+ This option enables support for NAT of locally originated connections.
+ Enable this if you need to use destination NAT on connections
+ originating from local processes on the nat box itself.
+
+ Please note that you will need a recent version (>= 1.2.6a)
+ of the iptables userspace program in order to use this feature.
+ See http://www.iptables.org/ for download instructions.
+
+ If unsure, say 'N'.
+
CONFIG_IP_NF_TARGET_MASQUERADE
Masquerading is a special case of NAT: all outgoing connections are
changed to seem to come from a particular interface's address, and
diff --git a/net/ipv4/netfilter/Config.in b/net/ipv4/netfilter/Config.in
index 7f250e431318..7bf12c2a0de7 100644
--- a/net/ipv4/netfilter/Config.in
+++ b/net/ipv4/netfilter/Config.in
@@ -47,7 +47,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
define_bool CONFIG_IP_NF_NAT_NEEDED y
dep_tristate ' MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT
dep_tristate ' REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT
- bool ' NAT of local connections' CONFIG_IP_NF_NAT_LOCAL
+ bool ' NAT of local connections (READ HELP)' CONFIG_IP_NF_NAT_LOCAL
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
dep_tristate ' Basic SNMP-ALG support (EXPERIMENTAL)' CONFIG_IP_NF_NAT_SNMP_BASIC $CONFIG_IP_NF_NAT
fi
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 7e1bd4511532..90c33ec64130 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -9,18 +9,18 @@
O_TARGET := netfilter.o
-export-objs = ip_conntrack_standalone.o ip_conntrack_ftp.o ip_fw_compat.o ip_nat_standalone.o ip_tables.o arp_tables.o
+export-objs = ip_conntrack_standalone.o ip_fw_compat.o ip_nat_standalone.o ip_tables.o arp_tables.o
# Multipart objects.
list-multi := ip_conntrack.o iptable_nat.o ipfwadm.o ipchains.o
# objects for the conntrack and NAT core (used by standalone and backw. compat)
ip_nf_conntrack-objs := ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
-ip_nf_nat-objs := ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+ip_nf_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
# objects for the standalone - connection tracking / NAT
ip_conntrack-objs := ip_conntrack_standalone.o $(ip_nf_conntrack-objs)
-iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_helper.o $(ip_nf_nat-objs)
+iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o $(ip_nf_nat-objs)
# objects for backwards compatibility mode
ip_nf_compat-objs := ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(ip_nf_conntrack-objs) $(ip_nf_nat-objs)
@@ -33,7 +33,14 @@ obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
# connection tracking helpers
obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
+ifdef CONFIG_IP_NF_NAT_FTP
+ export-objs += ip_conntrack_ftp.o
+endif
+
obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
+ifdef CONFIG_IP_NF_NAT_IRC
+ export-objs += ip_conntrack_irc.o
+endif
# NAT helpers
obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 41c893ed78bd..eeb5419853ff 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -3,7 +3,12 @@
extension. */
/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General
- Public Licence. */
+ * Public Licence.
+ *
+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
+ * - new API and handling of conntrack/nat helpers
+ * - now capable of multiple expectations for one master
+ * */
#ifdef MODULE
#define __NO_VERSION__
@@ -38,6 +43,8 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
+#define IP_CONNTRACK_VERSION "2.0"
+
#if 0
#define DEBUGP printk
#else
@@ -45,6 +52,7 @@
#endif
DECLARE_RWLOCK(ip_conntrack_lock);
+DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
LIST_HEAD(expect_list);
@@ -77,7 +85,7 @@ struct ip_conntrack_protocol *__find_proto(u_int8_t protocol)
return p;
}
-struct ip_conntrack_protocol *find_proto(u_int8_t protocol)
+struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
{
struct ip_conntrack_protocol *p;
@@ -151,9 +159,58 @@ invert_tuple(struct ip_conntrack_tuple *inverse,
return protocol->invert_tuple(inverse, orig);
}
+/* remove one specific expectation from all lists and free it */
+static void unexpect_related(struct ip_conntrack_expect *expect)
+{
+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
+ DEBUGP("unexpect_related(%p)\n", expect);
+ /* delete from global and local lists */
+ list_del(&expect->list);
+ list_del(&expect->expected_list);
+ if (!expect->sibling)
+ expect->expectant->expecting--;
+ kfree(expect);
+}
+
+/* delete all expectations for this conntrack */
+static void destroy_expectations(struct ip_conntrack *ct)
+{
+ struct list_head *exp_entry, *next;
+ struct ip_conntrack_expect *exp;
+
+ DEBUGP("destroy_expectations(%p)\n", ct);
+
+ for (exp_entry = ct->sibling_list.next;
+ exp_entry != &ct->sibling_list; exp_entry = next) {
+ next = exp_entry->next;
+ exp = list_entry(exp_entry, struct ip_conntrack_expect,
+ expected_list);
+
+ /* we skip established expectations, as we want to delete
+ * the un-established ones only */
+ if (exp->sibling) {
+ DEBUGP("destroy_expectations: skipping established %p of %p\n", exp->sibling, ct);
+ continue;
+ }
+
+ IP_NF_ASSERT(list_inlist(&expect_list, exp));
+ IP_NF_ASSERT(exp->expectant == ct);
+
+ if (exp->expectant->helper->timeout
+ && ! del_timer(&exp->timeout)) {
+ DEBUGP("destroy_expectations: skipping dying expectation %p of %p\n", exp, ct);
+ continue;
+ }
+
+ /* delete expectation from global and private lists */
+ unexpect_related(exp);
+ }
+}
+
static void
clean_from_lists(struct ip_conntrack *ct)
{
+ DEBUGP("clean_from_lists(%p)\n", ct);
MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
/* Remove from both hash lists: must not NULL out next ptrs,
otherwise we'll look unconfirmed. Fortunately, LIST_DELETE
@@ -164,27 +221,45 @@ clean_from_lists(struct ip_conntrack *ct)
LIST_DELETE(&ip_conntrack_hash
[hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)],
&ct->tuplehash[IP_CT_DIR_REPLY]);
- /* If our expected is in the list, take it out. */
- if (ct->expected.expectant) {
- IP_NF_ASSERT(list_inlist(&expect_list, &ct->expected));
- IP_NF_ASSERT(ct->expected.expectant == ct);
- LIST_DELETE(&expect_list, &ct->expected);
- }
+
+ /* Destroy all un-established, pending expectations */
+ destroy_expectations(ct);
}
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
+ struct ip_conntrack_protocol *proto;
+ DEBUGP("destroy_conntrack(%p)\n", ct);
IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
IP_NF_ASSERT(!timer_pending(&ct->timeout));
- if (ct->master.master)
- nf_conntrack_put(&ct->master);
+ if (ct->master && master_ct(ct))
+ ip_conntrack_put(master_ct(ct));
+
+ /* To make sure we don't get any weird locking issues here:
+ * destroy_conntrack() MUST NOT be called with a write lock
+ * to ip_conntrack_lock!!! -HW */
+ proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+ if (proto && proto->destroy)
+ proto->destroy(ct);
if (ip_conntrack_destroyed)
ip_conntrack_destroyed(ct);
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Delete our master expectation from the local list
+ * and destroy it, if we've been expected */
+ if (ct->master) {
+ list_del(&ct->master->expected_list);
+ kfree(ct->master);
+ }
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+
+ DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
kmem_cache_free(ip_conntrack_cachep, ct);
atomic_dec(&ip_conntrack_count);
}
@@ -382,7 +457,7 @@ icmp_error_track(struct sk_buff *skb,
return NULL;
}
- innerproto = find_proto(inner->protocol);
+ innerproto = ip_ct_find_proto(inner->protocol);
/* Are they talking about one of our connections? */
if (inner->ihl * 4 + 8 > datalen
|| !get_tuple(inner, datalen, &origtuple, innerproto)) {
@@ -462,10 +537,18 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i,
return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
}
+struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
+{
+ return LIST_FIND(&helpers, helper_cmp,
+ struct ip_conntrack_helper *,
+ tuple);
+}
+
/* Compare parts depending on mask. */
static inline int expect_cmp(const struct ip_conntrack_expect *i,
const struct ip_conntrack_tuple *tuple)
{
+ MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
}
@@ -514,7 +597,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
return ERR_PTR(-ENOMEM);
}
- memset(conntrack, 0, sizeof(struct ip_conntrack));
+ memset(conntrack, 0, sizeof(*conntrack));
atomic_set(&conntrack->ct_general.use, 1);
conntrack->ct_general.destroy = destroy_conntrack;
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
@@ -533,31 +616,44 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
+ INIT_LIST_HEAD(&conntrack->sibling_list);
+
/* Mark clearly that it's not in the hash table. */
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list.next = NULL;
- /* Write lock required for deletion of expected. Without
- this, a read-lock would do. */
WRITE_LOCK(&ip_conntrack_lock);
- conntrack->helper = LIST_FIND(&helpers, helper_cmp,
- struct ip_conntrack_helper *,
- &repl_tuple);
/* Need finding and deleting of expected ONLY if we win race */
+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
expected = LIST_FIND(&expect_list, expect_cmp,
struct ip_conntrack_expect *, tuple);
+ READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
+
+ /* Look up the conntrack helper for master connections only */
+ if (!expected)
+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
+
+ /* If the expectation is dying, then this is a looser. */
+ if (expected
+ && expected->expectant->helper->timeout
+ && ! del_timer(&expected->timeout))
+ expected = NULL;
+
/* If master is not in hash table yet (ie. packet hasn't left
this machine yet), how can other end know about expected?
Hence these are not the droids you are looking for (if
master ct never got confirmed, we'd hold a reference to it
and weird things would happen to future packets). */
if (expected && is_confirmed(expected->expectant)) {
+ DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
+ conntrack, expected);
/* Welcome, Mr. Bond. We've been expecting you... */
+ IP_NF_ASSERT(master_ct(conntrack));
conntrack->status = IPS_EXPECTED;
- conntrack->master.master = &expected->expectant->ct_general;
- IP_NF_ASSERT(conntrack->master.master);
+ conntrack->master = expected;
+ expected->sibling = conntrack;
LIST_DELETE(&expect_list, expected);
- expected->expectant = NULL;
- nf_conntrack_get(&conntrack->master);
+ expected->expectant->expecting--;
+ nf_conntrack_get(&master_ct(conntrack)->infos[0]);
}
atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
@@ -662,7 +758,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
return NF_STOLEN;
}
- proto = find_proto((*pskb)->nh.iph->protocol);
+ proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
/* It may be an icmp error... */
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
@@ -706,66 +802,210 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
int invert_tuplepr(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig)
{
- return invert_tuple(inverse, orig, find_proto(orig->dst.protonum));
+ return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
}
-static void unexpect_related(struct ip_conntrack *related_to)
+static inline int resent_expect(const struct ip_conntrack_expect *i,
+ const struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *mask)
{
- MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
- list_del(&related_to->expected.list);
- related_to->expected.expectant = NULL;
+ DEBUGP("resent_expect\n");
+ DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
+ DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
+ DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
+ return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
+ || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
+ && ip_ct_tuple_equal(&i->mask, mask));
}
/* Would two expected things clash? */
static inline int expect_clash(const struct ip_conntrack_expect *i,
- const struct ip_conntrack_expect *new)
+ const struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *mask)
{
/* Part covered by intersection of masks must be unequal,
otherwise they clash */
struct ip_conntrack_tuple intersect_mask
- = { { i->mask.src.ip & new->mask.src.ip,
- { i->mask.src.u.all & new->mask.src.u.all } },
- { i->mask.dst.ip & new->mask.dst.ip,
- { i->mask.dst.u.all & new->mask.dst.u.all },
- i->mask.dst.protonum & new->mask.dst.protonum } };
+ = { { i->mask.src.ip & mask->src.ip,
+ { i->mask.src.u.all & mask->src.u.all } },
+ { i->mask.dst.ip & mask->dst.ip,
+ { i->mask.dst.u.all & mask->dst.u.all },
+ i->mask.dst.protonum & mask->dst.protonum } };
- return ip_ct_tuple_mask_cmp(&i->tuple, &new->tuple, &intersect_mask);
+ return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
}
-/* Add a related connection. */
-int ip_conntrack_expect_related(struct ip_conntrack *related_to,
- const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *mask,
- int (*expectfn)(struct ip_conntrack *))
+void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
{
WRITE_LOCK(&ip_conntrack_lock);
- if (related_to->expected.expectant)
- unexpect_related(related_to);
+ unexpect_related(expect);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+}
+
+static void expectation_timed_out(unsigned long ul_expect)
+{
+ struct ip_conntrack_expect *expect = (void *) ul_expect;
- related_to->expected.tuple = *tuple;
- related_to->expected.mask = *mask;
- related_to->expected.expectfn = expectfn;
+ DEBUGP("expectation %p timed out\n", expect);
+ ip_conntrack_unexpect_related(expect);
+}
+
+/* Add a related connection. */
+int ip_conntrack_expect_related(struct ip_conntrack *related_to,
+ struct ip_conntrack_expect *expect)
+{
+ struct ip_conntrack_expect *new;
+ int ret = 0;
- if (LIST_FIND(&expect_list, expect_clash,
- struct ip_conntrack_expect *, &related_to->expected)) {
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Because of the write lock, no reader can walk the lists,
+ * so there is no need to use the tuple lock too */
+
+ DEBUGP("ip_conntrack_expect_related %p\n", related_to);
+ DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
+ DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
+
+ new = LIST_FIND(&expect_list, resent_expect,
+ struct ip_conntrack_expect *, &expect->tuple, &expect->mask);
+ if (new) {
+ /* Helper private data may contain offsets but no pointers
+ pointing into the payload - otherwise we should have to copy
+ the data filled out by the helper over the old one */
+ DEBUGP("expect_related: resent packet\n");
+ if (related_to->helper->timeout) {
+ /* Refresh timer, if possible... */
+ if (del_timer(&new->timeout)) {
+ new->timeout.expires = jiffies + related_to->helper->timeout * HZ;
+ add_timer(&new->timeout);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return -EEXIST;
+ }
+ /* ... otherwise expectation is dying. Fall over and create a new one. */
+ new = NULL;
+ } else {
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return -EEXIST;
+ }
+ } else if (related_to->helper->max_expected
+ && related_to->expecting >= related_to->helper->max_expected) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "ip_conntrack: max number of expected connections %i of %s reached for %u.%u.%u.%u->%u.%u.%u.%u%s\n",
+ related_to->helper->max_expected,
+ related_to->helper->name,
+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip),
+ related_to->helper->flags & IP_CT_HELPER_F_REUSE_EXPECT ?
+ ", reusing" : "");
+ if (related_to->helper->flags & IP_CT_HELPER_F_REUSE_EXPECT) {
+ struct list_head *cur_item;
+
+ /* Let's choose the the oldest expectation to overwrite */
+ list_for_each(cur_item, &related_to->sibling_list) {
+ new = list_entry(cur_item, struct ip_conntrack_expect,
+ expected_list);
+ if (new->sibling == NULL)
+ break;
+ }
+ IP_NF_ASSERT(new);
+ if (related_to->helper->timeout
+ && !del_timer(&new->timeout)) {
+ /* Expectation is dying. Fall over and create a new one */
+ new = NULL;
+ } else {
+ list_del(&new->list);
+ list_del(&new->expected_list);
+ related_to->expecting--;
+ ret = -EPERM;
+ }
+ } else {
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return -EPERM;
+ }
+ } else if (LIST_FIND(&expect_list, expect_clash,
+ struct ip_conntrack_expect *, &expect->tuple, &expect->mask)) {
WRITE_UNLOCK(&ip_conntrack_lock);
+ DEBUGP("expect_related: busy!\n");
return -EBUSY;
}
+
+ if (!new) {
+ new = (struct ip_conntrack_expect *)
+ kmalloc(sizeof(*expect), GFP_ATOMIC);
+ if (!new) {
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ DEBUGP("expect_relaed: OOM allocating expect\n");
+ return -ENOMEM;
+ }
+ }
+
+ /* Zero out the new structure, then fill out it with the data */
+ DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
+ memset(new, 0, sizeof(*expect));
+ INIT_LIST_HEAD(&new->list);
+ INIT_LIST_HEAD(&new->expected_list);
+ memcpy(new, expect, sizeof(*expect));
+ new->expectant = related_to;
+ new->sibling = NULL;
+
+ /* add to expected list for this connection */
+ list_add(&new->expected_list, &related_to->sibling_list);
+ /* add to global list of expectations */
+ list_prepend(&expect_list, &new->list);
+ /* add and start timer if required */
+ if (related_to->helper->timeout) {
+ init_timer(&new->timeout);
+ new->timeout.data = (unsigned long)new;
+ new->timeout.function = expectation_timed_out;
+ new->timeout.expires = jiffies + related_to->helper->timeout * HZ;
+ add_timer(&new->timeout);
+ }
+ related_to->expecting++;
- list_prepend(&expect_list, &related_to->expected);
- related_to->expected.expectant = related_to;
WRITE_UNLOCK(&ip_conntrack_lock);
- return 0;
+ return ret;
}
-void ip_conntrack_unexpect_related(struct ip_conntrack *related_to)
+/* Change tuple in an existing expectation */
+int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
+ struct ip_conntrack_tuple *newtuple)
{
- WRITE_LOCK(&ip_conntrack_lock);
- unexpect_related(related_to);
- WRITE_UNLOCK(&ip_conntrack_lock);
-}
+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+
+ DEBUGP("change_expect:\n");
+ DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple);
+ DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask);
+ DEBUGP("newtuple: "); DUMP_TUPLE(newtuple);
+ if (expect->ct_tuple.dst.protonum == 0) {
+ /* Never seen before */
+ DEBUGP("change expect: never seen before\n");
+ if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
+ && LIST_FIND(&expect_list, expect_clash,
+ struct ip_conntrack_expect *, newtuple, &expect->mask)) {
+ /* Force NAT to find an unused tuple */
+ return -1;
+ } else {
+ WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
+ memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
+ memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
+ WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
+ return 0;
+ }
+ } else {
+ /* Resent packet */
+ DEBUGP("change expect: resent packet\n");
+ if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
+ return 0;
+ } else {
+ /* Force NAT to choose again the same port */
+ return -1;
+ }
+ }
+ return -1;
+}
+
/* Alter reply tuple (maybe alter helper). If it's already taken,
return 0 and don't do alteration. */
int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
@@ -783,10 +1023,12 @@ int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
DUMP_TUPLE(newreply);
conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
- conntrack->helper = LIST_FIND(&helpers, helper_cmp,
- struct ip_conntrack_helper *,
- newreply);
+ if (!conntrack->master)
+ conntrack->helper = LIST_FIND(&helpers, helper_cmp,
+ struct ip_conntrack_helper *,
+ newreply);
WRITE_UNLOCK(&ip_conntrack_lock);
+
return 1;
}
@@ -805,14 +1047,10 @@ static inline int unhelp(struct ip_conntrack_tuple_hash *i,
const struct ip_conntrack_helper *me)
{
if (i->ctrack->helper == me) {
- i->ctrack->helper = NULL;
/* Get rid of any expected. */
- if (i->ctrack->expected.expectant) {
- IP_NF_ASSERT(i->ctrack->expected.expectant
- == i->ctrack);
- LIST_DELETE(&expect_list, &i->ctrack->expected);
- i->ctrack->expected.expectant = NULL;
- }
+ destroy_expectations(i->ctrack);
+ /* And *then* set helper to NULL */
+ i->ctrack->helper = NULL;
}
return 0;
}
@@ -1096,8 +1334,10 @@ int __init ip_conntrack_init(void)
}
ip_conntrack_max = 8 * ip_conntrack_htable_size;
- printk("ip_conntrack (%u buckets, %d max)\n",
- ip_conntrack_htable_size, ip_conntrack_max);
+ printk("ip_conntrack version %s (%u buckets, %d max)"
+ " - %d bytes per conntrack\n", IP_CONNTRACK_VERSION,
+ ip_conntrack_htable_size, ip_conntrack_max,
+ sizeof(struct ip_conntrack));
ret = nf_register_sockopt(&so_getorigdst);
if (ret != 0)
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 3de1af16dd75..7dba1565d063 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -1,4 +1,5 @@
/* FTP extension for IP connection tracking. */
+#include <linux/config.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
@@ -242,8 +243,10 @@ static int help(const struct iphdr *iph, size_t len,
u_int32_t array[6] = { 0 };
int dir = CTINFO2DIR(ctinfo);
unsigned int matchlen, matchoff;
- struct ip_conntrack_tuple t, mask;
- struct ip_ct_ftp *info = &ct->help.ct_ftp_info;
+ struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
+ struct ip_conntrack_expect expect, *exp = &expect;
+ struct ip_ct_ftp_expect *exp_ftp_info = &exp->help.exp_ftp_info;
+
unsigned int i;
int found = 0;
@@ -271,8 +274,8 @@ static int help(const struct iphdr *iph, size_t len,
}
LOCK_BH(&ip_ftp_lock);
- old_seq_aft_nl_set = info->seq_aft_nl_set[dir];
- old_seq_aft_nl = info->seq_aft_nl[dir];
+ old_seq_aft_nl_set = ct_ftp_info->seq_aft_nl_set[dir];
+ old_seq_aft_nl = ct_ftp_info->seq_aft_nl[dir];
DEBUGP("conntrack_ftp: datalen %u\n", datalen);
if ((datalen > 0) && (data[datalen-1] == '\n')) {
@@ -281,8 +284,9 @@ static int help(const struct iphdr *iph, size_t len,
|| after(ntohl(tcph->seq) + datalen, old_seq_aft_nl)) {
DEBUGP("conntrack_ftp: updating nl to %u\n",
ntohl(tcph->seq) + datalen);
- info->seq_aft_nl[dir] = ntohl(tcph->seq) + datalen;
- info->seq_aft_nl_set[dir] = 1;
+ ct_ftp_info->seq_aft_nl[dir] =
+ ntohl(tcph->seq) + datalen;
+ ct_ftp_info->seq_aft_nl_set[dir] = 1;
}
}
UNLOCK_BH(&ip_ftp_lock);
@@ -330,16 +334,17 @@ static int help(const struct iphdr *iph, size_t len,
DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
(int)matchlen, data + matchoff,
matchlen, ntohl(tcph->seq) + matchoff);
+
+ memset(&expect, 0, sizeof(expect));
/* Update the ftp info */
LOCK_BH(&ip_ftp_lock);
if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
== ct->tuplehash[dir].tuple.src.ip) {
- info->is_ftp = 21;
- info->seq = ntohl(tcph->seq) + matchoff;
- info->len = matchlen;
- info->ftptype = search[i].ftptype;
- info->port = array[4] << 8 | array[5];
+ exp->seq = ntohl(tcph->seq) + matchoff;
+ exp_ftp_info->len = matchlen;
+ exp_ftp_info->ftptype = search[i].ftptype;
+ exp_ftp_info->port = array[4] << 8 | array[5];
} else {
/* Enrico Scholz's passive FTP to partially RNAT'd ftp
server: it really wants us to connect to a
@@ -356,18 +361,21 @@ static int help(const struct iphdr *iph, size_t len,
if (!loose) goto out;
}
- t = ((struct ip_conntrack_tuple)
+ exp->tuple = ((struct ip_conntrack_tuple)
{ { ct->tuplehash[!dir].tuple.src.ip,
{ 0 } },
{ htonl((array[0] << 24) | (array[1] << 16)
| (array[2] << 8) | array[3]),
{ htons(array[4] << 8 | array[5]) },
IPPROTO_TCP }});
- mask = ((struct ip_conntrack_tuple)
+ exp->mask = ((struct ip_conntrack_tuple)
{ { 0xFFFFFFFF, { 0 } },
{ 0xFFFFFFFF, { 0xFFFF }, 0xFFFF }});
+
+ exp->expectfn = NULL;
+
/* Ignore failure; should only happen with NAT */
- ip_conntrack_expect_related(ct, &t, &mask, NULL);
+ ip_conntrack_expect_related(ct, &expect);
out:
UNLOCK_BH(&ip_ftp_lock);
@@ -375,6 +383,7 @@ static int help(const struct iphdr *iph, size_t len,
}
static struct ip_conntrack_helper ftp[MAX_PORTS];
+static char ftp_names[MAX_PORTS][10];
/* Not __exit: called from init() */
static void fini(void)
@@ -390,9 +399,10 @@ static void fini(void)
static int __init init(void)
{
int i, ret;
+ char *tmpname;
if (ports[0] == 0)
- ports[0] = 21;
+ ports[0] = FTP_PORT;
for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
memset(&ftp[i], 0, sizeof(struct ip_conntrack_helper));
@@ -400,7 +410,19 @@ static int __init init(void)
ftp[i].tuple.dst.protonum = IPPROTO_TCP;
ftp[i].mask.src.u.tcp.port = 0xFFFF;
ftp[i].mask.dst.protonum = 0xFFFF;
+ ftp[i].max_expected = 1;
+ ftp[i].timeout = 0;
+ ftp[i].flags = IP_CT_HELPER_F_REUSE_EXPECT;
+ ftp[i].me = ip_conntrack_ftp;
ftp[i].help = help;
+
+ tmpname = &ftp_names[i][0];
+ if (ports[i] == FTP_PORT)
+ sprintf(tmpname, "ftp");
+ else
+ sprintf(tmpname, "ftp-%d", ports[i]);
+ ftp[i].name = tmpname;
+
DEBUGP("ip_ct_ftp: registering helper for port %d\n",
ports[i]);
ret = ip_conntrack_helper_register(&ftp[i]);
@@ -414,10 +436,10 @@ static int __init init(void)
return 0;
}
-
+#ifdef CONFIG_IP_NF_NAT_NEEDED
EXPORT_SYMBOL(ip_ftp_lock);
-EXPORT_SYMBOL(ip_conntrack_ftp);
-MODULE_LICENSE("GPL");
+#endif
+MODULE_LICENSE("GPL");
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index a92087b6daf8..39d31f60cb02 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -11,12 +11,18 @@
**
* Module load syntax:
* insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
+ * max_dcc_channels=n dcc_timeout=secs
*
* please give the ports of all IRC servers You wish to connect to.
- * If You don't specify ports, the default will be port 6667
+ * If You don't specify ports, the default will be port 6667.
+ * With max_dcc_channels you can define the maximum number of not
+ * yet answered DCC channels per IRC session (default 8).
+ * With dcc_timeout you can specify how long the system waits for
+ * an expected DCC channel (default 300 seconds).
*
*/
+#include <linux/config.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
@@ -30,6 +36,8 @@
#define MAX_PORTS 8
static int ports[MAX_PORTS];
static int ports_n_c = 0;
+static int max_dcc_channels = 8;
+static unsigned int dcc_timeout = 300;
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking module");
@@ -37,6 +45,10 @@ MODULE_LICENSE("GPL");
#ifdef MODULE_PARM
MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
+MODULE_PARM(max_dcc_channels, "i");
+MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
+MODULE_PARM(dcc_timeout, "i");
+MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
#endif
#define NUM_DCCPROTO 5
@@ -103,23 +115,15 @@ static int help(const struct iphdr *iph, size_t len,
u_int32_t tcplen = len - iph->ihl * 4;
u_int32_t datalen = tcplen - tcph->doff * 4;
int dir = CTINFO2DIR(ctinfo);
- struct ip_conntrack_tuple t, mask;
+ struct ip_conntrack_expect expect, *exp = &expect;
+ struct ip_ct_irc_expect *exp_irc_info = &exp->help.exp_irc_info;
u_int32_t dcc_ip;
u_int16_t dcc_port;
int i;
char *addr_beg_p, *addr_end_p;
- struct ip_ct_irc *info = &ct->help.ct_irc_info;
-
- mask = ((struct ip_conntrack_tuple)
- { { 0, { 0 } },
- { 0xFFFFFFFF, { 0xFFFF }, 0xFFFF }});
-
DEBUGP("entered\n");
- /* Can't track connections formed before we registered */
- if (!info)
- return NF_ACCEPT;
/* If packet is coming from IRC server */
if (dir == IP_CT_DIR_REPLY)
@@ -189,33 +193,37 @@ static int help(const struct iphdr *iph, size_t len,
continue;
}
+
+ memset(&expect, 0, sizeof(expect));
LOCK_BH(&ip_irc_lock);
/* save position of address in dcc string,
* neccessary for NAT */
- info->is_irc = IP_CONNTR_IRC;
DEBUGP("tcph->seq = %u\n", tcph->seq);
- info->seq = ntohl(tcph->seq) + (addr_beg_p - _data);
- info->len = (addr_end_p - addr_beg_p);
- info->port = dcc_port;
+ exp->seq = ntohl(tcph->seq) + (addr_beg_p - _data);
+ exp_irc_info->len = (addr_end_p - addr_beg_p);
+ exp_irc_info->port = dcc_port;
DEBUGP("wrote info seq=%u (ofs=%u), len=%d\n",
- info->seq, (addr_end_p - _data), info->len);
+ exp->seq, (addr_end_p - _data), exp_irc_info->len);
+
+ exp->tuple = ((struct ip_conntrack_tuple)
+ { { 0, { 0 } },
+ { htonl(dcc_ip), { htons(dcc_port) },
+ IPPROTO_TCP }});
+ exp->mask = ((struct ip_conntrack_tuple)
+ { { 0, { 0 } },
+ { 0xFFFFFFFF, { 0xFFFF }, 0xFFFF }});
- memset(&t, 0, sizeof(t));
- t.src.ip = 0;
- t.src.u.tcp.port = 0;
- t.dst.ip = htonl(dcc_ip);
- t.dst.u.tcp.port = htons(info->port);
- t.dst.protonum = IPPROTO_TCP;
+ exp->expectfn = NULL;
DEBUGP("expect_related %u.%u.%u.%u:%u-%u.%u.%u.%u:%u\n",
- NIPQUAD(t.src.ip),
- ntohs(t.src.u.tcp.port),
- NIPQUAD(t.dst.ip),
- ntohs(t.dst.u.tcp.port));
+ NIPQUAD(exp->tuple.src.ip),
+ ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.ip),
+ ntohs(exp->tuple.dst.u.tcp.port));
- ip_conntrack_expect_related(ct, &t, &mask, NULL);
+ ip_conntrack_expect_related(ct, &expect);
UNLOCK_BH(&ip_irc_lock);
return NF_ACCEPT;
@@ -226,29 +234,53 @@ static int help(const struct iphdr *iph, size_t len,
}
static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
+static char irc_names[MAX_PORTS][10];
static void fini(void);
static int __init init(void)
{
int i, ret;
+ struct ip_conntrack_helper *hlpr;
+ char *tmpname;
+ if (max_dcc_channels < 1) {
+ printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
+ return -EBUSY;
+ }
+ if (dcc_timeout < 0) {
+ printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
+ return -EBUSY;
+ }
+
/* If no port given, default to standard irc port */
if (ports[0] == 0)
- ports[0] = 6667;
+ ports[0] = IRC_PORT;
for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
- memset(&irc_helpers[i], 0,
+ hlpr = &irc_helpers[i];
+ memset(hlpr, 0,
sizeof(struct ip_conntrack_helper));
- irc_helpers[i].tuple.src.u.tcp.port = htons(ports[i]);
- irc_helpers[i].tuple.dst.protonum = IPPROTO_TCP;
- irc_helpers[i].mask.src.u.tcp.port = 0xFFFF;
- irc_helpers[i].mask.dst.protonum = 0xFFFF;
- irc_helpers[i].help = help;
+ hlpr->tuple.src.u.tcp.port = htons(ports[i]);
+ hlpr->tuple.dst.protonum = IPPROTO_TCP;
+ hlpr->mask.src.u.tcp.port = 0xFFFF;
+ hlpr->mask.dst.protonum = 0xFFFF;
+ hlpr->max_expected = max_dcc_channels;
+ hlpr->timeout = dcc_timeout;
+ hlpr->flags = IP_CT_HELPER_F_REUSE_EXPECT;
+ hlpr->me = ip_conntrack_irc;
+ hlpr->help = help;
+
+ tmpname = &irc_names[i][0];
+ if (ports[i] == IRC_PORT)
+ sprintf(tmpname, "irc");
+ else
+ sprintf(tmpname, "irc-%d", i);
+ hlpr->name = tmpname;
DEBUGP("port #%d: %d\n", i, ports[i]);
- ret = ip_conntrack_helper_register(&irc_helpers[i]);
+ ret = ip_conntrack_helper_register(hlpr);
if (ret) {
printk("ip_conntrack_irc: ERROR registering port %d\n",
@@ -273,5 +305,9 @@ static void fini(void)
}
}
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+EXPORT_SYMBOL(ip_irc_lock);
+#endif
+
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index fcc0eed71a0f..e5b399b26eba 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -57,5 +57,5 @@ new(struct ip_conntrack *conntrack, struct iphdr *iph, size_t len)
struct ip_conntrack_protocol ip_conntrack_generic_protocol
= { { NULL, NULL }, 0, "unknown",
generic_pkt_to_tuple, generic_invert_tuple, generic_print_tuple,
- generic_print_conntrack, established, new, NULL };
+ generic_print_conntrack, established, new, NULL, NULL, NULL };
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index b0eb65891d5f..b72e1a8eeea6 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -113,4 +113,4 @@ static int icmp_new(struct ip_conntrack *conntrack,
struct ip_conntrack_protocol ip_conntrack_protocol_icmp
= { { NULL, NULL }, IPPROTO_ICMP, "icmp",
icmp_pkt_to_tuple, icmp_invert_tuple, icmp_print_tuple,
- icmp_print_conntrack, icmp_packet, icmp_new, NULL };
+ icmp_print_conntrack, icmp_packet, icmp_new, NULL, NULL, NULL };
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 4f52a027fb3c..e22ba471b914 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -7,6 +7,9 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
+
+#include <net/tcp.h>
+
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/lockhelp.h>
@@ -227,7 +230,19 @@ static int tcp_new(struct ip_conntrack *conntrack,
return 1;
}
+static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp,
+ struct sk_buff **pskb)
+{
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + iph->ihl);
+ unsigned int datalen;
+
+ datalen = (*pskb)->len - iph->ihl*4 - tcph->doff*4;
+
+ return between(exp->seq, ntohl(tcph->seq), ntohl(tcph->seq) + datalen);
+}
+
struct ip_conntrack_protocol ip_conntrack_protocol_tcp
= { { NULL, NULL }, IPPROTO_TCP, "tcp",
tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack,
- tcp_packet, tcp_new, NULL };
+ tcp_packet, tcp_new, NULL, tcp_exp_matches_pkt, NULL };
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 86544b03d2ce..57e807026c18 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -71,4 +71,4 @@ static int udp_new(struct ip_conntrack *conntrack,
struct ip_conntrack_protocol ip_conntrack_protocol_udp
= { { NULL, NULL }, IPPROTO_UDP, "udp",
udp_pkt_to_tuple, udp_invert_tuple, udp_print_tuple, udp_print_conntrack,
- udp_packet, udp_new, NULL };
+ udp_packet, udp_new, NULL, NULL, NULL };
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index b93dd0167679..12480c0d9139 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -62,7 +62,13 @@ print_expect(char *buffer, const struct ip_conntrack_expect *expect)
{
unsigned int len;
- len = sprintf(buffer, "EXPECTING: proto=%u ",
+ if (expect->expectant->helper->timeout)
+ len = sprintf(buffer, "EXPECTING: %lu ",
+ timer_pending(&expect->timeout)
+ ? (expect->timeout.expires - jiffies)/HZ : 0);
+ else
+ len = sprintf(buffer, "EXPECTING: - ");
+ len += sprintf(buffer + len, "proto=%u ",
expect->tuple.dst.protonum);
len += print_tuple(buffer + len, &expect->tuple,
__find_proto(expect->tuple.dst.protonum));
@@ -314,7 +320,7 @@ void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
{
WRITE_LOCK(&ip_conntrack_lock);
- /* find_proto() returns proto_generic in case there is no protocol
+ /* ip_ct_find_proto() returns proto_generic in case there is no protocol
* helper. So this should be enough - HW */
LIST_DELETE(&protocol_list, proto);
WRITE_UNLOCK(&ip_conntrack_lock);
@@ -353,8 +359,12 @@ EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
EXPORT_SYMBOL(ip_ct_selective_cleanup);
EXPORT_SYMBOL(ip_ct_refresh);
+EXPORT_SYMBOL(ip_ct_find_proto);
+EXPORT_SYMBOL(ip_ct_find_helper);
EXPORT_SYMBOL(ip_conntrack_expect_related);
+EXPORT_SYMBOL(ip_conntrack_change_expect);
EXPORT_SYMBOL(ip_conntrack_unexpect_related);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
EXPORT_SYMBOL(ip_conntrack_htable_size);
+EXPORT_SYMBOL(ip_conntrack_lock);
diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c
index 1d462ac6d7a0..708dff11745b 100644
--- a/net/ipv4/netfilter/ip_fw_compat_masq.c
+++ b/net/ipv4/netfilter/ip_fw_compat_masq.c
@@ -130,7 +130,7 @@ check_for_demasq(struct sk_buff **pskb)
struct ip_conntrack *ct;
int ret;
- protocol = find_proto(iph->protocol);
+ protocol = ip_ct_find_proto(iph->protocol);
/* We don't feed packets to conntrack system unless we know
they're part of an connection already established by an
diff --git a/net/ipv4/netfilter/ip_fw_compat_redir.c b/net/ipv4/netfilter/ip_fw_compat_redir.c
index 1b11391683e1..0540d87e1134 100644
--- a/net/ipv4/netfilter/ip_fw_compat_redir.c
+++ b/net/ipv4/netfilter/ip_fw_compat_redir.c
@@ -43,7 +43,7 @@ do { \
netplay... */ \
printk("ASSERT: %s:%i(%s)\n", \
__FILE__, __LINE__, __FUNCTION__); \
-} while(0);
+} while(0)
#else
#define IP_NF_ASSERT(x)
#endif
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index ebbbde93ead4..50873f1890e7 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -21,10 +21,14 @@
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/listhelp.h>
#if 0
@@ -34,6 +38,7 @@
#endif
DECLARE_RWLOCK(ip_nat_lock);
+DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
/* Calculated at init based on memory size */
static unsigned int ip_nat_htable_size;
@@ -628,8 +633,9 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
}
/* If there's a helper, assign it; based on new tuple. */
- info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
- &reply);
+ if (!conntrack->master)
+ info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
+ &reply);
/* It's done. */
info->initialized |= (1 << HOOK2MANIP(hooknum));
@@ -724,6 +730,21 @@ manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len,
#endif
}
+static inline int exp_for_packet(struct ip_conntrack_expect *exp,
+ struct sk_buff **pskb)
+{
+ struct ip_conntrack_protocol *proto;
+ int ret = 1;
+
+ READ_LOCK(&ip_conntrack_lock);
+ proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
+ if (proto->exp_matches_pkt)
+ ret = proto->exp_matches_pkt(exp, pskb);
+ READ_UNLOCK(&ip_conntrack_lock);
+
+ return ret;
+}
+
/* Do packet manipulations according to binding. */
unsigned int
do_bindings(struct ip_conntrack *ct,
@@ -735,6 +756,7 @@ do_bindings(struct ip_conntrack *ct,
unsigned int i;
struct ip_nat_helper *helper;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ int is_tcp = (*pskb)->nh.iph->protocol == IPPROTO_TCP;
/* Need nat lock to protect against modification, but neither
conntrack (referenced) and helper (deleted with
@@ -773,11 +795,66 @@ do_bindings(struct ip_conntrack *ct,
READ_UNLOCK(&ip_nat_lock);
if (helper) {
+ struct ip_conntrack_expect *exp = NULL;
+ struct list_head *cur_item;
+ int ret = NF_ACCEPT;
+
+ DEBUGP("do_bindings: helper existing for (%p)\n", ct);
+
/* Always defragged for helpers */
IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
& __constant_htons(IP_MF|IP_OFFSET)));
- return helper->help(ct, info, ctinfo, hooknum, pskb);
- } else return NF_ACCEPT;
+
+ /* Have to grab read lock before sibling_list traversal */
+ READ_LOCK(&ip_conntrack_lock);
+ list_for_each(cur_item, &ct->sibling_list) {
+ exp = list_entry(cur_item, struct ip_conntrack_expect,
+ expected_list);
+
+ /* if this expectation is already established, skip */
+ if (exp->sibling)
+ continue;
+
+ if (exp_for_packet(exp, pskb)) {
+ /* FIXME: May be true multiple times in the case of UDP!! */
+ DEBUGP("calling nat helper (exp=%p) for packet\n",
+ exp);
+ ret = helper->help(ct, exp, info, ctinfo,
+ hooknum, pskb);
+ if (ret != NF_ACCEPT) {
+ READ_UNLOCK(&ip_conntrack_lock);
+ return ret;
+ }
+ }
+ }
+ /* Helper might want to manip the packet even when there is no expectation */
+ if (!exp && helper->flags & IP_NAT_HELPER_F_ALWAYS) {
+ DEBUGP("calling nat helper for packet without expectation\n");
+ ret = helper->help(ct, NULL, info, ctinfo,
+ hooknum, pskb);
+ if (ret != NF_ACCEPT) {
+ READ_UNLOCK(&ip_conntrack_lock);
+ return ret;
+ }
+ }
+ READ_UNLOCK(&ip_conntrack_lock);
+
+ /* Adjust sequence number only once per packet
+ * (helper is called at all hooks) */
+ if (is_tcp && (hooknum == NF_IP_POST_ROUTING
+ || hooknum == NF_IP_LOCAL_IN)) {
+ DEBUGP("ip_nat_core: adjusting sequence number\n");
+ /* future: put this in a l4-proto specific function,
+ * and call this function here. */
+ ip_nat_seq_adjust(*pskb, ct, ctinfo);
+ }
+
+ return ret;
+
+ } else
+ return NF_ACCEPT;
+
+ /* not reached */
}
unsigned int
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index 788a6412d88f..a539e979ae67 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -28,38 +28,30 @@ DECLARE_LOCK_EXTERN(ip_ftp_lock);
/* FIXME: Time out? --RR */
-static int
+static unsigned int
ftp_nat_expected(struct sk_buff **pskb,
unsigned int hooknum,
struct ip_conntrack *ct,
- struct ip_nat_info *info,
- struct ip_conntrack *master,
- struct ip_nat_info *masterinfo,
- unsigned int *verdict)
+ struct ip_nat_info *info)
{
struct ip_nat_multi_range mr;
u_int32_t newdstip, newsrcip, newip;
- struct ip_ct_ftp *ftpinfo;
+ struct ip_ct_ftp_expect *exp_ftp_info;
+ struct ip_conntrack *master = master_ct(ct);
+
IP_NF_ASSERT(info);
IP_NF_ASSERT(master);
- IP_NF_ASSERT(masterinfo);
IP_NF_ASSERT(!(info->initialized & (1<<HOOK2MANIP(hooknum))));
DEBUGP("nat_expected: We have a connection!\n");
- /* Master must be an ftp connection */
- ftpinfo = &master->help.ct_ftp_info;
+ exp_ftp_info = &ct->master->help.exp_ftp_info;
LOCK_BH(&ip_ftp_lock);
- if (ftpinfo->is_ftp != 21) {
- UNLOCK_BH(&ip_ftp_lock);
- DEBUGP("nat_expected: master not ftp\n");
- return 0;
- }
- if (ftpinfo->ftptype == IP_CT_FTP_PORT
- || ftpinfo->ftptype == IP_CT_FTP_EPRT) {
+ if (exp_ftp_info->ftptype == IP_CT_FTP_PORT
+ || exp_ftp_info->ftptype == IP_CT_FTP_EPRT) {
/* PORT command: make connection go to the client. */
newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
newsrcip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
@@ -92,11 +84,9 @@ ftp_nat_expected(struct sk_buff **pskb,
mr.range[0].flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
mr.range[0].min = mr.range[0].max
= ((union ip_conntrack_manip_proto)
- { htons(ftpinfo->port) });
+ { htons(exp_ftp_info->port) });
}
- *verdict = ip_nat_setup_info(ct, &mr, hooknum);
-
- return 1;
+ return ip_nat_setup_info(ct, &mr, hooknum);
}
static int
@@ -176,27 +166,22 @@ static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
[IP_CT_FTP_EPSV] mangle_epsv_packet
};
-static int ftp_data_fixup(const struct ip_ct_ftp *ct_ftp_info,
+static int ftp_data_fixup(const struct ip_ct_ftp_expect *ct_ftp_info,
struct ip_conntrack *ct,
- unsigned int datalen,
struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ struct ip_conntrack_expect *expect)
{
u_int32_t newip;
struct iphdr *iph = (*pskb)->nh.iph;
struct tcphdr *tcph = (void *)iph + iph->ihl*4;
u_int16_t port;
- struct ip_conntrack_tuple tuple;
- /* Don't care about source port */
- const struct ip_conntrack_tuple mask
- = { { 0xFFFFFFFF, { 0 } },
- { 0xFFFFFFFF, { 0xFFFF }, 0xFFFF } };
+ struct ip_conntrack_tuple newtuple;
- memset(&tuple, 0, sizeof(tuple));
MUST_BE_LOCKED(&ip_ftp_lock);
- DEBUGP("FTP_NAT: seq %u + %u in %u + %u\n",
- ct_ftp_info->seq, ct_ftp_info->len,
- ntohl(tcph->seq), datalen);
+ DEBUGP("FTP_NAT: seq %u + %u in %u\n",
+ expect->seq, ct_ftp_info->len,
+ ntohl(tcph->seq));
/* Change address inside packet to match way we're mapping
this connection. */
@@ -206,29 +191,34 @@ static int ftp_data_fixup(const struct ip_ct_ftp *ct_ftp_info,
is */
newip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
/* Expect something from client->server */
- tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+ newtuple.src.ip =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+ newtuple.dst.ip =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
} else {
/* PORT command: must be where server thinks client is */
newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
/* Expect something from server->client */
- tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+ newtuple.src.ip =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+ newtuple.dst.ip =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
}
- tuple.dst.protonum = IPPROTO_TCP;
+ newtuple.dst.protonum = IPPROTO_TCP;
+ newtuple.src.u.tcp.port = expect->tuple.src.u.tcp.port;
/* Try to get same port: if not, try to change it. */
for (port = ct_ftp_info->port; port != 0; port++) {
- tuple.dst.u.tcp.port = htons(port);
+ newtuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(ct, &tuple, &mask, NULL) == 0)
+ if (ip_conntrack_change_expect(expect, &newtuple) == 0)
break;
}
if (port == 0)
return 0;
if (!mangle[ct_ftp_info->ftptype](pskb, newip, port,
- ct_ftp_info->seq - ntohl(tcph->seq),
+ expect->seq - ntohl(tcph->seq),
ct_ftp_info->len, ct, ctinfo))
return 0;
@@ -236,6 +226,7 @@ static int ftp_data_fixup(const struct ip_ct_ftp *ct_ftp_info,
}
static unsigned int help(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *exp,
struct ip_nat_info *info,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
@@ -245,13 +236,12 @@ static unsigned int help(struct ip_conntrack *ct,
struct tcphdr *tcph = (void *)iph + iph->ihl*4;
unsigned int datalen;
int dir;
- int score;
- struct ip_ct_ftp *ct_ftp_info
- = &ct->help.ct_ftp_info;
+ struct ip_ct_ftp_expect *ct_ftp_info;
- /* Delete SACK_OK on initial TCP SYNs. */
- if (tcph->syn && !tcph->ack)
- ip_nat_delete_sack(*pskb, tcph);
+ if (!exp)
+ DEBUGP("ip_nat_ftp: no exp!!");
+
+ ct_ftp_info = &exp->help.exp_ftp_info;
/* Only mangle things once: original direction in POST_ROUTING
and reply direction on PRE_ROUTING. */
@@ -267,50 +257,34 @@ static unsigned int help(struct ip_conntrack *ct,
}
datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4;
- score = 0;
LOCK_BH(&ip_ftp_lock);
- if (ct_ftp_info->len) {
- /* If it's in the right range... */
- score += between(ct_ftp_info->seq, ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen);
- score += between(ct_ftp_info->seq + ct_ftp_info->len,
- ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen);
- if (score == 1) {
- /* Half a match? This means a partial retransmisison.
- It's a cracker being funky. */
- if (net_ratelimit()) {
- printk("FTP_NAT: partial packet %u/%u in %u/%u\n",
- ct_ftp_info->seq, ct_ftp_info->len,
- ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen);
- }
+ /* If it's in the right range... */
+ if (between(exp->seq + ct_ftp_info->len,
+ ntohl(tcph->seq),
+ ntohl(tcph->seq) + datalen)) {
+ if (!ftp_data_fixup(ct_ftp_info, ct, pskb, ctinfo, exp)) {
UNLOCK_BH(&ip_ftp_lock);
return NF_DROP;
- } else if (score == 2) {
- if (!ftp_data_fixup(ct_ftp_info, ct, datalen,
- pskb, ctinfo)) {
- UNLOCK_BH(&ip_ftp_lock);
- return NF_DROP;
- }
- /* skb may have been reallocated */
- iph = (*pskb)->nh.iph;
- tcph = (void *)iph + iph->ihl*4;
}
+ } else {
+ /* Half a match? This means a partial retransmisison.
+ It's a cracker being funky. */
+ if (net_ratelimit()) {
+ printk("FTP_NAT: partial packet %u/%u in %u/%u\n",
+ exp->seq, ct_ftp_info->len,
+ ntohl(tcph->seq),
+ ntohl(tcph->seq) + datalen);
+ }
+ UNLOCK_BH(&ip_ftp_lock);
+ return NF_DROP;
}
-
UNLOCK_BH(&ip_ftp_lock);
- ip_nat_seq_adjust(*pskb, ct, ctinfo);
-
return NF_ACCEPT;
}
static struct ip_nat_helper ftp[MAX_PORTS];
-static char ftp_names[MAX_PORTS][6];
-
-static struct ip_nat_expect ftp_expect
-= { { NULL, NULL }, ftp_nat_expected };
+static char ftp_names[MAX_PORTS][10];
/* Not __exit: called from init() */
static void fini(void)
@@ -321,49 +295,49 @@ static void fini(void)
DEBUGP("ip_nat_ftp: unregistering port %d\n", ports[i]);
ip_nat_helper_unregister(&ftp[i]);
}
-
- ip_nat_expect_unregister(&ftp_expect);
}
static int __init init(void)
{
- int i, ret;
+ int i, ret = 0;
char *tmpname;
- ret = ip_nat_expect_register(&ftp_expect);
- if (ret == 0) {
- if (ports[0] == 0)
- ports[0] = 21;
-
- for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
-
- memset(&ftp[i], 0, sizeof(struct ip_nat_helper));
-
- ftp[i].tuple.dst.protonum = IPPROTO_TCP;
- ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
- ftp[i].mask.dst.protonum = 0xFFFF;
- ftp[i].mask.src.u.tcp.port = 0xFFFF;
- ftp[i].help = help;
+ if (ports[0] == 0)
+ ports[0] = FTP_PORT;
- tmpname = &ftp_names[i][0];
- sprintf(tmpname, "ftp%2.2d", i);
- ftp[i].name = tmpname;
-
- DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
- ports[i]);
- ret = ip_nat_helper_register(&ftp[i]);
+ for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
- if (ret) {
- printk("ip_nat_ftp: error registering helper for port %d\n", ports[i]);
- fini();
- return ret;
- }
- ports_c++;
+ memset(&ftp[i], 0, sizeof(struct ip_nat_helper));
+
+ ftp[i].tuple.dst.protonum = IPPROTO_TCP;
+ ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
+ ftp[i].mask.dst.protonum = 0xFFFF;
+ ftp[i].mask.src.u.tcp.port = 0xFFFF;
+ ftp[i].help = help;
+ ftp[i].me = THIS_MODULE;
+ ftp[i].flags = 0;
+ ftp[i].expect = ftp_nat_expected;
+
+ tmpname = &ftp_names[i][0];
+ if (ports[i] == FTP_PORT)
+ sprintf(tmpname, "ftp");
+ else
+ sprintf(tmpname, "ftp-%d", i);
+ ftp[i].name = tmpname;
+
+ DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
+ ports[i]);
+ ret = ip_nat_helper_register(&ftp[i]);
+
+ if (ret) {
+ printk("ip_nat_ftp: error registering "
+ "helper for port %d\n", ports[i]);
+ fini();
+ return ret;
}
-
- } else {
- ip_nat_expect_unregister(&ftp_expect);
+ ports_c++;
}
+
return ret;
}
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 183ba3f85356..dd0fab83bc48 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -1,11 +1,18 @@
/* ip_nat_mangle.c - generic support functions for NAT helpers
*
- * (C) 2000 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
*
* distributed under the terms of GNU GPL
+ *
+ * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
+ * - add support for SACK adjustment
+ * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
+ * - merge SACK support into newnat API
*/
#include <linux/version.h>
+#include <linux/config.h>
#include <linux/module.h>
+#include <linux/kmod.h>
#include <linux/types.h>
#include <linux/timer.h>
#include <linux/skbuff.h>
@@ -19,6 +26,8 @@
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
@@ -32,7 +41,7 @@
#define DEBUGP(format, args...)
#define DUMP_OFFSET(x)
#endif
-
+
DECLARE_LOCK(ip_nat_seqofs_lock);
static inline int
@@ -199,6 +208,103 @@ ip_nat_mangle_tcp_packet(struct sk_buff **skb,
return 1;
}
+/* Adjust one found SACK option including checksum correction */
+static void
+sack_adjust(struct tcphdr *tcph,
+ unsigned char *ptr,
+ struct ip_nat_seq *natseq)
+{
+ struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
+ int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+ int i;
+
+ for (i = 0; i < num_sacks; i++, sp++) {
+ u_int32_t new_start_seq, new_end_seq;
+
+ if (after(ntohl(sp->start_seq) - natseq->offset_before,
+ natseq->correction_pos))
+ new_start_seq = ntohl(sp->start_seq)
+ - natseq->offset_after;
+ else
+ new_start_seq = ntohl(sp->start_seq)
+ - natseq->offset_before;
+ new_start_seq = htonl(new_start_seq);
+
+ if (after(ntohl(sp->end_seq) - natseq->offset_before,
+ natseq->correction_pos))
+ new_end_seq = ntohl(sp->end_seq)
+ - natseq->offset_after;
+ else
+ new_end_seq = ntohl(sp->end_seq)
+ - natseq->offset_before;
+ new_end_seq = htonl(new_end_seq);
+
+ DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+ ntohl(sp->start_seq), new_start_seq,
+ ntohl(sp->end_seq), new_end_seq);
+
+ tcph->check =
+ ip_nat_cheat_check(~sp->start_seq, new_start_seq,
+ ip_nat_cheat_check(~sp->end_seq,
+ new_end_seq,
+ tcph->check));
+
+ sp->start_seq = new_start_seq;
+ sp->end_seq = new_end_seq;
+ }
+}
+
+
+/* TCP SACK sequence number adjustment, return 0 if sack found and adjusted */
+static inline int
+ip_nat_sack_adjust(struct sk_buff *skb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ unsigned char *ptr;
+ int length, dir, sack_adjusted = 0;
+
+ iph = skb->nh.iph;
+ tcph = (void *)iph + iph->ihl*4;
+ length = (tcph->doff*4)-sizeof(struct tcphdr);
+ ptr = (unsigned char *)(tcph+1);
+
+ dir = CTINFO2DIR(ctinfo);
+
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return !sack_adjusted;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize > length) /* no partial opts */
+ return !sack_adjusted;
+ if (opcode == TCPOPT_SACK) {
+ /* found SACK */
+ if((opsize >= (TCPOLEN_SACK_BASE
+ +TCPOLEN_SACK_PERBLOCK)) &&
+ !((opsize - TCPOLEN_SACK_BASE)
+ % TCPOLEN_SACK_PERBLOCK))
+ sack_adjust(tcph, ptr-2,
+ &ct->nat.info.seq[!dir]);
+
+ sack_adjusted = 1;
+ }
+ ptr += opsize-2;
+ length -= opsize;
+ }
+ }
+ return !sack_adjusted;
+}
+
/* TCP sequence number adjustment */
int
ip_nat_seq_adjust(struct sk_buff *skb,
@@ -243,51 +349,9 @@ ip_nat_seq_adjust(struct sk_buff *skb,
tcph->seq = newseq;
tcph->ack_seq = newack;
- return 0;
-}
-
-/* Grrr... SACK. Fuck me even harder. Don't want to fix it on the
- fly, so blow it away. */
-void
-ip_nat_delete_sack(struct sk_buff *skb, struct tcphdr *tcph)
-{
- unsigned int i;
- u_int8_t *opt = (u_int8_t *)tcph;
-
- DEBUGP("Seeking SACKPERM in SYN packet (doff = %u).\n",
- tcph->doff * 4);
- for (i = sizeof(struct tcphdr); i < tcph->doff * 4;) {
- DEBUGP("%u ", opt[i]);
- switch (opt[i]) {
- case TCPOPT_NOP:
- case TCPOPT_EOL:
- i++;
- break;
-
- case TCPOPT_SACK_PERM:
- goto found_opt;
+ ip_nat_sack_adjust(skb, ct, ctinfo);
- default:
- /* Worst that can happen: it will take us over. */
- i += opt[i+1] ?: 1;
- }
- }
- DEBUGP("\n");
- return;
-
- found_opt:
- DEBUGP("\n");
- DEBUGP("Found SACKPERM at offset %u.\n", i);
-
- /* Must be within TCP header, and valid SACK perm. */
- if (i + opt[i+1] <= tcph->doff*4 && opt[i+1] == 2) {
- /* Replace with NOPs. */
- tcph->check
- = ip_nat_cheat_check(*((u_int16_t *)(opt + i))^0xFFFF,
- (TCPOPT_NOP<<8)|TCPOPT_NOP, tcph->check);
- opt[i] = opt[i+1] = TCPOPT_NOP;
- }
- else DEBUGP("Something wrong with SACK_PERM.\n");
+ return 0;
}
static inline int
@@ -297,10 +361,51 @@ helper_cmp(const struct ip_nat_helper *helper,
return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
}
+#define MODULE_MAX_NAMELEN 32
+
int ip_nat_helper_register(struct ip_nat_helper *me)
{
int ret = 0;
+ if (me->me && !(me->flags & IP_NAT_HELPER_F_STANDALONE)) {
+ struct ip_conntrack_helper *ct_helper;
+
+ if ((ct_helper = ip_ct_find_helper(&me->tuple))
+ && ct_helper->me) {
+ __MOD_INC_USE_COUNT(ct_helper->me);
+ } else {
+
+ /* We are a NAT helper for protocol X. If we need
+ * respective conntrack helper for protoccol X, compute
+ * conntrack helper name and try to load module */
+ char name[MODULE_MAX_NAMELEN];
+ const char *tmp = me->me->name;
+
+ if (strlen(tmp) + 6 > MODULE_MAX_NAMELEN) {
+ printk(__FUNCTION__ ": unable to "
+ "compute conntrack helper name "
+ "from %s\n", tmp);
+ return -EBUSY;
+ }
+ tmp += 6;
+ sprintf(name, "ip_conntrack%s", tmp);
+#ifdef CONFIG_KMOD
+ if (!request_module(name)
+ && (ct_helper = ip_ct_find_helper(&me->tuple))
+ && ct_helper->me) {
+ __MOD_INC_USE_COUNT(ct_helper->me);
+ } else {
+ printk("unable to load module %s\n", name);
+ return -EBUSY;
+ }
+#else
+ printk("unable to load module %s automatically "
+ "because kernel was compiled without kernel "
+ "module loader support\n", name);
+ return -EBUSY;
+#endif
+ }
+ }
WRITE_LOCK(&ip_nat_lock);
if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple))
ret = -EBUSY;
@@ -327,8 +432,14 @@ kill_helper(const struct ip_conntrack *i, void *helper)
void ip_nat_helper_unregister(struct ip_nat_helper *me)
{
+ int found = 0;
+
WRITE_LOCK(&ip_nat_lock);
- LIST_DELETE(&helpers, me);
+ /* Autoloading conntrack helper might have failed */
+ if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) {
+ LIST_DELETE(&helpers, me);
+ found = 1;
+ }
WRITE_UNLOCK(&ip_nat_lock);
/* Someone could be still looking at the helper in a bh. */
@@ -344,5 +455,19 @@ void ip_nat_helper_unregister(struct ip_nat_helper *me)
worse. --RR */
ip_ct_selective_cleanup(kill_helper, me);
- MOD_DEC_USE_COUNT;
+ if (found)
+ MOD_DEC_USE_COUNT;
+
+ /* If we are no standalone NAT helper, we need to decrement usage count
+ * on our conntrack helper */
+ if (me->me && !(me->flags & IP_NAT_HELPER_F_STANDALONE)) {
+ struct ip_conntrack_helper *ct_helper;
+
+ if ((ct_helper = ip_ct_find_helper(&me->tuple))
+ && ct_helper->me) {
+ __MOD_DEC_USE_COUNT(ct_helper->me);
+ } else
+ printk(__FUNCTION__ ": unable to decrement usage count"
+ " of conntrack helper %s\n", me->me->name);
+ }
}
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index 2a8b83afaf5b..3edc319f56fb 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -51,42 +51,29 @@ DECLARE_LOCK_EXTERN(ip_irc_lock);
/* FIXME: Time out? --RR */
-static int
+static unsigned int
irc_nat_expected(struct sk_buff **pskb,
unsigned int hooknum,
struct ip_conntrack *ct,
- struct ip_nat_info *info,
- struct ip_conntrack *master,
- struct ip_nat_info *masterinfo, unsigned int *verdict)
+ struct ip_nat_info *info)
{
struct ip_nat_multi_range mr;
u_int32_t newdstip, newsrcip, newip;
- struct ip_ct_irc *ircinfo;
+
+ struct ip_conntrack *master = master_ct(ct);
IP_NF_ASSERT(info);
IP_NF_ASSERT(master);
- IP_NF_ASSERT(masterinfo);
IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
DEBUGP("nat_expected: We have a connection!\n");
- /* Master must be an irc connection */
- ircinfo = &master->help.ct_irc_info;
- LOCK_BH(&ip_irc_lock);
- if (ircinfo->is_irc != IP_CONNTR_IRC) {
- UNLOCK_BH(&ip_irc_lock);
- DEBUGP("nat_expected: master not irc\n");
- return 0;
- }
-
newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
newsrcip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
DEBUGP("nat_expected: DCC cmd. %u.%u.%u.%u->%u.%u.%u.%u\n",
NIPQUAD(newsrcip), NIPQUAD(newdstip));
- UNLOCK_BH(&ip_irc_lock);
-
if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
newip = newsrcip;
else
@@ -99,16 +86,14 @@ irc_nat_expected(struct sk_buff **pskb,
mr.range[0].flags = IP_NAT_RANGE_MAP_IPS;
mr.range[0].min_ip = mr.range[0].max_ip = newip;
- *verdict = ip_nat_setup_info(ct, &mr, hooknum);
-
- return 1;
+ return ip_nat_setup_info(ct, &mr, hooknum);
}
-static int irc_data_fixup(const struct ip_ct_irc *ct_irc_info,
+static int irc_data_fixup(const struct ip_ct_irc_expect *ct_irc_info,
struct ip_conntrack *ct,
- unsigned int datalen,
struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ struct ip_conntrack_expect *expect)
{
u_int32_t newip;
struct ip_conntrack_tuple t;
@@ -121,9 +106,9 @@ static int irc_data_fixup(const struct ip_ct_irc *ct_irc_info,
MUST_BE_LOCKED(&ip_irc_lock);
- DEBUGP("IRC_NAT: info (seq %u + %u) packet(seq %u + %u)\n",
- ct_irc_info->seq, ct_irc_info->len,
- ntohl(tcph->seq), datalen);
+ DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
+ expect->seq, ct_irc_info->len,
+ ntohl(tcph->seq));
newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
@@ -133,13 +118,11 @@ static int irc_data_fixup(const struct ip_ct_irc *ct_irc_info,
only set in ip_conntrack_irc, with ip_irc_lock held
writable */
- t = ct->expected.tuple;
+ t = expect->tuple;
t.dst.ip = newip;
for (port = ct_irc_info->port; port != 0; port++) {
t.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(ct, &t,
- &ct->expected.mask,
- NULL) == 0) {
+ if (ip_conntrack_change_expect(expect, &t) == 0) {
DEBUGP("using port %d", port);
break;
}
@@ -166,26 +149,28 @@ static int irc_data_fixup(const struct ip_ct_irc *ct_irc_info,
buffer, NIPQUAD(newip), port);
return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- ct_irc_info->seq - ntohl(tcph->seq),
+ expect->seq - ntohl(tcph->seq),
ct_irc_info->len, buffer,
strlen(buffer));
}
static unsigned int help(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *exp,
struct ip_nat_info *info,
enum ip_conntrack_info ctinfo,
- unsigned int hooknum, struct sk_buff **pskb)
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
struct iphdr *iph = (*pskb)->nh.iph;
struct tcphdr *tcph = (void *) iph + iph->ihl * 4;
unsigned int datalen;
int dir;
- int score;
- struct ip_ct_irc *ct_irc_info = &ct->help.ct_irc_info;
+ struct ip_ct_irc_expect *ct_irc_info;
- /* Delete SACK_OK on initial TCP SYNs. */
- if (tcph->syn && !tcph->ack)
- ip_nat_delete_sack(*pskb, tcph);
+ if (!exp)
+ DEBUGP("ip_nat_irc: no exp!!");
+
+ ct_irc_info = &exp->help.exp_irc_info;
/* Only mangle things once: original direction in POST_ROUTING
and reply direction on PRE_ROUTING. */
@@ -202,55 +187,35 @@ static unsigned int help(struct ip_conntrack *ct,
DEBUGP("got beyond not touching\n");
datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4;
- score = 0;
LOCK_BH(&ip_irc_lock);
- if (ct_irc_info->len) {
- DEBUGP("got beyond ct_irc_info->len\n");
-
- /* If it's in the right range... */
- score += between(ct_irc_info->seq, ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen);
- score += between(ct_irc_info->seq + ct_irc_info->len,
- ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen);
- if (score == 1) {
- /* Half a match? This means a partial retransmisison.
- It's a cracker being funky. */
- if (net_ratelimit()) {
- printk
- ("IRC_NAT: partial packet %u/%u in %u/%u\n",
- ct_irc_info->seq, ct_irc_info->len,
- ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen);
- }
+ /* Check wether the whole IP/address pattern is carried in the payload */
+ if (between(exp->seq + ct_irc_info->len,
+ ntohl(tcph->seq),
+ ntohl(tcph->seq) + datalen)) {
+ if (!irc_data_fixup(ct_irc_info, ct, pskb, ctinfo, exp)) {
UNLOCK_BH(&ip_irc_lock);
return NF_DROP;
- } else if (score == 2) {
- DEBUGP("IRC_NAT: score=2, calling fixup\n");
- if (!irc_data_fixup(ct_irc_info, ct, datalen,
- pskb, ctinfo)) {
- UNLOCK_BH(&ip_irc_lock);
- return NF_DROP;
- }
- /* skb may have been reallocated */
- iph = (*pskb)->nh.iph;
- tcph = (void *) iph + iph->ihl * 4;
}
+ } else {
+ /* Half a match? This means a partial retransmisison.
+ It's a cracker being funky. */
+ if (net_ratelimit()) {
+ printk
+ ("IRC_NAT: partial packet %u/%u in %u/%u\n",
+ exp->seq, ct_irc_info->len,
+ ntohl(tcph->seq),
+ ntohl(tcph->seq) + datalen);
+ }
+ UNLOCK_BH(&ip_irc_lock);
+ return NF_DROP;
}
-
UNLOCK_BH(&ip_irc_lock);
- ip_nat_seq_adjust(*pskb, ct, ctinfo);
-
return NF_ACCEPT;
}
static struct ip_nat_helper ip_nat_irc_helpers[MAX_PORTS];
-static char ip_nih_names[MAX_PORTS][6];
-
-static struct ip_nat_expect irc_expect
- = { {NULL, NULL}, irc_nat_expected };
-
+static char irc_names[MAX_PORTS][10];
/* This function is intentionally _NOT_ defined as __exit, because
* it is needed by init() */
@@ -262,52 +227,54 @@ static void fini(void)
DEBUGP("ip_nat_irc: unregistering helper for port %d\n",
ports[i]);
ip_nat_helper_unregister(&ip_nat_irc_helpers[i]);
- }
- ip_nat_expect_unregister(&irc_expect);
+ }
}
+
static int __init init(void)
{
- int ret;
+ int ret = 0;
int i;
struct ip_nat_helper *hlpr;
char *tmpname;
- ret = ip_nat_expect_register(&irc_expect);
- if (ret == 0) {
-
- if (ports[0] == 0) {
- ports[0] = 6667;
- }
+ if (ports[0] == 0) {
+ ports[0] = IRC_PORT;
+ }
- for (i = 0; (i < MAX_PORTS) && ports[i] != 0; i++) {
- hlpr = &ip_nat_irc_helpers[i];
- memset(hlpr, 0,
- sizeof(struct ip_nat_helper));
-
- hlpr->tuple.dst.protonum = IPPROTO_TCP;
- hlpr->tuple.src.u.tcp.port = htons(ports[i]);
- hlpr->mask.src.u.tcp.port = 0xFFFF;
- hlpr->mask.dst.protonum = 0xFFFF;
- hlpr->help = help;
-
- tmpname = &ip_nih_names[i][0];
- sprintf(tmpname, "irc%2.2d", i);
-
- hlpr->name = tmpname;
- DEBUGP
- ("ip_nat_irc: Trying to register helper for port %d: name %s\n",
- ports[i], hlpr->name);
- ret = ip_nat_helper_register(hlpr);
-
- if (ret) {
- printk
- ("ip_nat_irc: error registering helper for port %d\n",
- ports[i]);
- fini();
- return 1;
- }
- ports_c++;
+ for (i = 0; (i < MAX_PORTS) && ports[i] != 0; i++) {
+ hlpr = &ip_nat_irc_helpers[i];
+ memset(hlpr, 0,
+ sizeof(struct ip_nat_helper));
+
+ hlpr->tuple.dst.protonum = IPPROTO_TCP;
+ hlpr->tuple.src.u.tcp.port = htons(ports[i]);
+ hlpr->mask.src.u.tcp.port = 0xFFFF;
+ hlpr->mask.dst.protonum = 0xFFFF;
+ hlpr->help = help;
+ hlpr->flags = 0;
+ hlpr->me = THIS_MODULE;
+ hlpr->expect = irc_nat_expected;
+
+ tmpname = &irc_names[i][0];
+ if (ports[i] == IRC_PORT)
+ sprintf(tmpname, "irc");
+ else
+ sprintf(tmpname, "irc-%d", i);
+ hlpr->name = tmpname;
+
+ DEBUGP
+ ("ip_nat_irc: Trying to register helper for port %d: name %s\n",
+ ports[i], hlpr->name);
+ ret = ip_nat_helper_register(hlpr);
+
+ if (ret) {
+ printk
+ ("ip_nat_irc: error registering helper for port %d\n",
+ ports[i]);
+ fini();
+ return 1;
}
+ ports_c++;
}
return ret;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index ff25da08cd09..d27959ea3c41 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -4,7 +4,6 @@
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/if.h>
-
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index 0e39070368f9..a2b5de683dce 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -1,5 +1,5 @@
/* The "unknown" protocol. This is what is used for protocols we
- * don't understand. It's returned by find_proto().
+ * don't understand. It's returned by ip_ct_find_proto().
*/
#include <linux/types.h>
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 9eacf45d908c..6ee71c840242 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -106,8 +106,6 @@ static struct ipt_table nat_table
= { { NULL, NULL }, "nat", &nat_initial_table.repl,
NAT_VALID_HOOKS, RW_LOCK_UNLOCKED, NULL, THIS_MODULE };
-LIST_HEAD(nat_expect_list);
-
/* Source NAT */
static unsigned int ipt_snat_target(struct sk_buff **pskb,
unsigned int hooknum,
@@ -254,19 +252,6 @@ alloc_null_binding(struct ip_conntrack *conntrack,
return ip_nat_setup_info(conntrack, &mr, hooknum);
}
-static inline int call_expect(const struct ip_nat_expect *i,
- struct sk_buff **pskb,
- unsigned int hooknum,
- struct ip_conntrack *ct,
- struct ip_nat_info *info,
- struct ip_conntrack *master,
- struct ip_nat_info *masterinfo,
- unsigned int *verdict)
-{
- return i->expect(pskb, hooknum, ct, info, master, masterinfo,
- verdict);
-}
-
int ip_nat_rule_find(struct sk_buff **pskb,
unsigned int hooknum,
const struct net_device *in,
@@ -276,19 +261,8 @@ int ip_nat_rule_find(struct sk_buff **pskb,
{
int ret;
- /* Master won't vanish while this ctrack still alive */
- if (ct->master.master) {
- struct ip_conntrack *master;
-
- master = (struct ip_conntrack *)ct->master.master;
- if (LIST_FIND(&nat_expect_list,
- call_expect,
- struct ip_nat_expect *,
- pskb, hooknum, ct, info,
- master, &master->nat.info, &ret))
- return ret;
- }
ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+
if (ret == NF_ACCEPT) {
if (!(info->initialized & (1 << HOOK2MANIP(hooknum))))
/* NUL mapping */
@@ -297,22 +271,6 @@ int ip_nat_rule_find(struct sk_buff **pskb,
return ret;
}
-int ip_nat_expect_register(struct ip_nat_expect *expect)
-{
- WRITE_LOCK(&ip_nat_lock);
- list_prepend(&nat_expect_list, expect);
- WRITE_UNLOCK(&ip_nat_lock);
-
- return 0;
-}
-
-void ip_nat_expect_unregister(struct ip_nat_expect *expect)
-{
- WRITE_LOCK(&ip_nat_lock);
- LIST_DELETE(&nat_expect_list, expect);
- WRITE_UNLOCK(&ip_nat_lock);
-}
-
static struct ipt_target ipt_snat_reg
= { { NULL, NULL }, "SNAT", ipt_snat_target, ipt_snat_checkentry, NULL };
static struct ipt_target ipt_dnat_reg
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index eaf18396fd95..f1f22e6ef446 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1244,6 +1244,7 @@ static int snmp_translate(struct ip_conntrack *ct,
* NAT helper function, packets arrive here from NAT code.
*/
static unsigned int nat_help(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *exp,
struct ip_nat_info *info,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
@@ -1304,19 +1305,27 @@ static unsigned int nat_help(struct ip_conntrack *ct,
return NF_DROP;
}
-static struct ip_nat_helper snmp = { { NULL, NULL },
+static struct ip_nat_helper snmp = {
+ { NULL, NULL },
+ "snmp",
+ IP_NAT_HELPER_F_STANDALONE,
+ THIS_MODULE,
{ { 0, { __constant_htons(SNMP_PORT) } },
{ 0, { 0 }, IPPROTO_UDP } },
{ { 0, { 0xFFFF } },
{ 0, { 0 }, 0xFFFF } },
- nat_help, "snmp" };
+ nat_help, NULL };
-static struct ip_nat_helper snmp_trap = { { NULL, NULL },
+static struct ip_nat_helper snmp_trap = {
+ { NULL, NULL },
+ "snmp_trap",
+ IP_NAT_HELPER_F_STANDALONE,
+ THIS_MODULE,
{ { 0, { __constant_htons(SNMP_TRAP_PORT) } },
{ 0, { 0 }, IPPROTO_UDP } },
{ { 0, { 0xFFFF } },
{ 0, { 0 }, 0xFFFF } },
- nat_help, "snmp_trap" };
+ nat_help, NULL };
/*****************************************************************************
*
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index b0d299703269..f30fa72b0f45 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -5,7 +5,12 @@
*/
/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General
- Public Licence. */
+ * Public Licence.
+ *
+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
+ * - new API and handling of conntrack/nat helpers
+ * - now capable of multiple expectations for one master
+ * */
#include <linux/config.h>
#include <linux/types.h>
@@ -45,6 +50,15 @@
: ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
: "*ERROR*")))
+static inline int call_expect(struct ip_conntrack *master,
+ struct sk_buff **pskb,
+ unsigned int hooknum,
+ struct ip_conntrack *ct,
+ struct ip_nat_info *info)
+{
+ return master->nat.info.helper->expect(pskb, hooknum, ct, info);
+}
+
static unsigned int
ip_nat_fn(unsigned int hooknum,
struct sk_buff **pskb,
@@ -111,8 +125,16 @@ ip_nat_fn(unsigned int hooknum,
int in_hashes = info->initialized;
unsigned int ret;
- ret = ip_nat_rule_find(pskb, hooknum, in, out,
- ct, info);
+ if (ct->master
+ && master_ct(ct)->nat.info.helper
+ && master_ct(ct)->nat.info.helper->expect) {
+ ret = call_expect(master_ct(ct), pskb,
+ hooknum, ct, info);
+ } else {
+ ret = ip_nat_rule_find(pskb, hooknum, in, out,
+ ct, info);
+ }
+
if (ret != NF_ACCEPT) {
WRITE_UNLOCK(&ip_nat_lock);
return ret;
@@ -335,11 +357,7 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
EXPORT_SYMBOL(ip_nat_protocol_unregister);
EXPORT_SYMBOL(ip_nat_helper_register);
EXPORT_SYMBOL(ip_nat_helper_unregister);
-EXPORT_SYMBOL(ip_nat_expect_register);
-EXPORT_SYMBOL(ip_nat_expect_unregister);
EXPORT_SYMBOL(ip_nat_cheat_check);
EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-EXPORT_SYMBOL(ip_nat_seq_adjust);
-EXPORT_SYMBOL(ip_nat_delete_sack);
EXPORT_SYMBOL(ip_nat_used_tuple);
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 0894fc63c7a2..e9932533f87a 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -464,7 +464,7 @@ static int netlink_send_peer(ipq_queue_element_t *e)
return netlink_unicast(nfnl, skb, nlq->peer.pid, MSG_DONTWAIT);
}
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0);
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
static __inline__ void netlink_receive_user_skb(struct sk_buff *skb)
{
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index a978874f4849..3f5e6b074854 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -61,7 +61,7 @@ MODULE_LICENSE("GPL");
#define DEBUGP(format, args...)
#endif
-#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format, ## args); } while (0);
+#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format, ## args); } while (0)
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("IP tables userspace logging module");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 110ba12eb3df..098312a7bf6a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -583,7 +583,7 @@ static int rt_garbage_collect(void)
if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
goto out;
if (net_ratelimit())
- printk("dst cache overflow\n");
+ printk(KERN_WARNING "dst cache overflow\n");
return 1;
work_done:
@@ -657,7 +657,7 @@ restart:
}
if (net_ratelimit())
- printk("Neighbour table overflow.\n");
+ printk(KERN_WARNING "Neighbour table overflow.\n");
rt_drop(rt);
return -ENOBUFS;
}
@@ -667,8 +667,8 @@ restart:
#if RT_CACHE_DEBUG >= 2
if (rt->u.rt_next) {
struct rtable *trt;
- printk("rt_cache @%02x: %u.%u.%u.%u", hash,
- NIPQUAD(rt->rt_dst));
+ printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash,
+ NIPQUAD(rt->rt_dst));
for (trt = rt->u.rt_next; trt; trt = trt->u.rt_next)
printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst));
printk("\n");
@@ -2454,7 +2454,7 @@ void __init ip_rt_init(void)
#ifdef CONFIG_NET_CLS_ROUTE
for (order = 0;
- (PAGE_SIZE << order) < 256 * sizeof(ip_rt_acct) * NR_CPUS; order++)
+ (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
/* NOTHING */;
ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
if (!ip_rt_acct)
@@ -2487,7 +2487,7 @@ void __init ip_rt_init(void)
if (!rt_hash_table)
panic("Failed to allocate IP route cache hash table\n");
- printk("IP: routing cache hash table of %u buckets, %ldKbytes\n",
+ printk(KERN_INFO "IP: routing cache hash table of %u buckets, %ldKbytes\n",
rt_hash_mask,
(long) (rt_hash_mask * sizeof(struct rt_hash_bucket)) / 1024);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1e0fb441c74c..b3b60b30c182 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -219,6 +219,8 @@ ctl_table ipv4_table[] = {
&sysctl_icmp_ratelimit, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_ICMP_RATEMASK, "icmp_ratemask",
&sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_TCP_TW_REUSE, "tcp_tw_reuse",
+ &sysctl_tcp_tw_reuse, sizeof(int), 0644, NULL, &proc_dointvec},
{0}
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 286c18ca2ee4..78b0c6fb03df 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1792,7 +1792,7 @@ void tcp_destroy_sock(struct sock *sk)
#ifdef TCP_DEBUG
if (sk->zapped) {
- printk("TCP: double destroy sk=%p\n", sk);
+ printk(KERN_DEBUG "TCP: double destroy sk=%p\n", sk);
sock_hold(sk);
}
sk->zapped = 1;
@@ -2558,7 +2558,7 @@ void __init tcp_init(void)
sysctl_tcp_rmem[2] = 2*43689;
}
- printk("TCP: Hash tables configured (established %d bind %d)\n",
+ printk(KERN_INFO "TCP: Hash tables configured (established %d bind %d)\n",
tcp_ehash_size<<1, tcp_bhash_size);
tcpdiag_init();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a397d84e0044..9755d32c27f3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -124,9 +124,6 @@ static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *s
len = skb->len;
if (len >= tp->ack.rcv_mss) {
tp->ack.rcv_mss = len;
- /* Dubious? Rather, it is final cut. 8) */
- if (tcp_flag_word(skb->h.th)&TCP_REMNANT)
- tp->ack.pending |= TCP_ACK_PUSHED;
} else {
/* Otherwise, we make more careful check taking into account,
* that SACKs block is variable.
@@ -463,7 +460,7 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
if (after(tp->snd_una, tp->rtt_seq)) {
if (tp->mdev_max < tp->rttvar)
tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
- tp->rtt_seq = tp->snd_una;
+ tp->rtt_seq = tp->snd_nxt;
tp->mdev_max = TCP_RTO_MIN;
}
} else {
@@ -1769,6 +1766,7 @@ static int tcp_clean_rtx_queue(struct sock *sk)
acked |= FLAG_DATA_ACKED;
} else {
acked |= FLAG_SYN_ACKED;
+ tp->retrans_stamp = 0;
}
if (sacked) {
@@ -3873,6 +3871,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
switch (sk->state) {
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
+ case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
case TCP_FIN_WAIT1:
@@ -3890,7 +3889,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
}
/* Fall through */
- case TCP_LAST_ACK:
case TCP_ESTABLISHED:
tcp_data_queue(sk, skb);
queued = 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b7049e4294af..b839d36b93f1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -64,6 +64,8 @@
#include <linux/ipsec.h>
extern int sysctl_ip_dynaddr;
+extern int sysctl_ip_default_ttl;
+int sysctl_tcp_tw_reuse = 0;
/* Check TCP sequence numbers in ICMP packets. */
#define ICMP_MIN_LENGTH 8
@@ -163,18 +165,18 @@ __inline__ void tcp_inherit_port(struct sock *sk, struct sock *child)
local_bh_enable();
}
-static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, unsigned short snum)
-{
- inet_sk(sk)->num = snum;
+static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, unsigned short snum)
+{
+ inet_sk(sk)->num = snum;
if ((sk->bind_next = tb->owners) != NULL)
tb->owners->bind_pprev = &sk->bind_next;
tb->owners = sk;
sk->bind_pprev = &tb->owners;
sk->prev = (struct sock *) tb;
-}
+}
static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
-{
+{
struct inet_opt *inet = inet_sk(sk);
struct sock *sk2 = tb->owners;
int sk_reuse = sk->reuse;
@@ -193,8 +195,8 @@ static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
}
}
}
- return sk2 != NULL;
-}
+ return sk2 != NULL;
+}
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
@@ -247,12 +249,14 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
break;
}
if (tb != NULL && tb->owners != NULL) {
- if (tb->fastreuse != 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {
+ if (sk->reuse > 1)
+ goto success;
+ if (tb->fastreuse > 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {
goto success;
} else {
- ret = 1;
+ ret = 1;
if (tcp_bind_conflict(sk, tb))
- goto fail_unlock;
+ goto fail_unlock;
}
}
ret = 1;
@@ -269,7 +273,7 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
tb->fastreuse = 0;
success:
if (sk->prev == NULL)
- tcp_bind_hash(sk, tb, snum);
+ tcp_bind_hash(sk, tb, snum);
BUG_TRAP(sk->prev == (struct sock *) tb);
ret = 0;
@@ -341,13 +345,13 @@ void tcp_listen_wlock(void)
}
}
-static __inline__ void __tcp_v4_hash(struct sock *sk)
+static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
{
struct sock **skp;
rwlock_t *lock;
BUG_TRAP(sk->pprev==NULL);
- if(sk->state == TCP_LISTEN) {
+ if(listen_possible && sk->state == TCP_LISTEN) {
skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
lock = &tcp_lhash_lock;
tcp_listen_wlock();
@@ -362,7 +366,7 @@ static __inline__ void __tcp_v4_hash(struct sock *sk)
sk->pprev = skp;
sock_prot_inc_use(sk->prot);
write_unlock(lock);
- if (sk->state == TCP_LISTEN)
+ if (listen_possible && sk->state == TCP_LISTEN)
wake_up(&tcp_lhash_wait);
}
@@ -370,7 +374,7 @@ static void tcp_v4_hash(struct sock *sk)
{
if (sk->state != TCP_CLOSE) {
local_bh_disable();
- __tcp_v4_hash(sk);
+ __tcp_v4_hash(sk, 1);
local_bh_enable();
}
}
@@ -379,6 +383,9 @@ void tcp_unhash(struct sock *sk)
{
rwlock_t *lock;
+ if (!sk->pprev)
+ goto ende;
+
if (sk->state == TCP_LISTEN) {
local_bh_disable();
tcp_listen_wlock();
@@ -397,6 +404,8 @@ void tcp_unhash(struct sock *sk)
sock_prot_dec_use(sk->prot);
}
write_unlock_bh(lock);
+
+ ende:
if (sk->state == TCP_LISTEN)
wake_up(&tcp_lhash_wait);
}
@@ -538,20 +547,22 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
skb->h.th->source);
}
-static int tcp_v4_check_established(struct sock *sk)
+/* called with local bh disabled */
+static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
+ struct tcp_tw_bucket **twp)
{
struct inet_opt *inet = inet_sk(sk);
u32 daddr = inet->rcv_saddr;
u32 saddr = inet->daddr;
int dif = sk->bound_dev_if;
TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
- __u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
- int hash = tcp_hashfn(daddr, inet->num, saddr, inet->dport);
+ __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
+ int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
struct tcp_ehash_bucket *head = &tcp_ehash[hash];
struct sock *sk2, **skp;
struct tcp_tw_bucket *tw;
- write_lock_bh(&head->lock);
+ write_lock(&head->lock);
/* Check TIME-WAIT sockets first. */
for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp) != NULL;
@@ -575,7 +586,9 @@ static int tcp_v4_check_established(struct sock *sk)
fall back to VJ's scheme and use initial
timestamp retrieved from peer table.
*/
- if (tw->ts_recent_stamp) {
+ if (tw->ts_recent_stamp &&
+ (!twp || (sysctl_tcp_tw_reuse &&
+ xtime.tv_sec - tw->ts_recent_stamp > 1))) {
if ((tp->write_seq =
tw->snd_nxt + 65535 + 2) == 0)
tp->write_seq = 1;
@@ -597,6 +610,10 @@ static int tcp_v4_check_established(struct sock *sk)
}
unique:
+ /* Must record num and sport now. Otherwise we will see
+ * in hash table socket with a funny identity. */
+ inet->num = lport;
+ inet->sport = htons(lport);
BUG_TRAP(sk->pprev==NULL);
if ((sk->next = *skp) != NULL)
(*skp)->pprev = &sk->next;
@@ -605,15 +622,16 @@ unique:
sk->pprev = skp;
sk->hashent = hash;
sock_prot_inc_use(sk->prot);
- write_unlock_bh(&head->lock);
+ write_unlock(&head->lock);
- if (tw) {
+ if (twp) {
+ *twp = tw;
+ NET_INC_STATS_BH(TimeWaitRecycled);
+ } else if (tw) {
/* Silly. Should hash-dance instead... */
- local_bh_disable();
tcp_tw_deschedule(tw);
tcp_timewait_kill(tw);
NET_INC_STATS_BH(TimeWaitRecycled);
- local_bh_enable();
tcp_tw_put(tw);
}
@@ -621,34 +639,120 @@ unique:
return 0;
not_unique:
- write_unlock_bh(&head->lock);
+ write_unlock(&head->lock);
return -EADDRNOTAVAIL;
}
-/* Hash SYN-SENT socket to established hash table after
- * checking that it is unique. Note, that without kernel lock
- * we MUST make these two operations atomically.
- *
- * Optimization: if it is bound and tcp_bind_bucket has the only
- * owner (us), we need not to scan established bucket.
+/*
+ * Bind a port for a connect operation and hash it.
*/
-
-int tcp_v4_hash_connecting(struct sock *sk)
+static int tcp_v4_hash_connect(struct sock *sk)
{
unsigned short snum = inet_sk(sk)->num;
- struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(snum)];
- struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev;
-
+ struct tcp_bind_hashbucket *head;
+ struct tcp_bind_bucket *tb;
+
+ if (snum == 0) {
+ int rover;
+ int low = sysctl_local_port_range[0];
+ int high = sysctl_local_port_range[1];
+ int remaining = (high - low) + 1;
+ struct tcp_tw_bucket *tw = NULL;
+
+ local_bh_disable();
+
+ /* TODO. Actually it is not so bad idea to remove
+ * tcp_portalloc_lock before next submission to Linus.
+ * As soon as we touch this place at all it is time to think.
+ *
+ * Now it protects single _advisory_ variable tcp_port_rover,
+ * hence it is mostly useless.
+ * Code will work nicely if we just delete it, but
+ * I am afraid in contented case it will work not better or
+ * even worse: another cpu just will hit the same bucket
+ * and spin there.
+ * So some cpu salt could remove both contention and
+ * memory pingpong. Any ideas how to do this in a nice way?
+ */
+ spin_lock(&tcp_portalloc_lock);
+ rover = tcp_port_rover;
+
+ do {
+ rover++;
+ if ((rover < low) || (rover > high))
+ rover = low;
+ head = &tcp_bhash[tcp_bhashfn(rover)];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+ * because the established check is already
+ * unique enough.
+ */
+ for (tb = head->chain; tb; tb = tb->next) {
+ if (tb->port == rover) {
+ BUG_TRAP(tb->owners != NULL);
+ if (tb->fastreuse >= 0)
+ goto next_port;
+ if (!__tcp_v4_check_established(sk, rover, &tw))
+ goto ok;
+ goto next_port;
+ }
+ }
+
+ tb = tcp_bucket_create(head, rover);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ break;
+ }
+ tb->fastreuse = -1;
+ goto ok;
+
+ next_port:
+ spin_unlock(&head->lock);
+ } while (--remaining > 0);
+ tcp_port_rover = rover;
+ spin_unlock(&tcp_portalloc_lock);
+
+ local_bh_enable();
+
+ return -EADDRNOTAVAIL;
+
+ ok:
+ /* All locks still held and bhs disabled */
+ tcp_port_rover = rover;
+ spin_unlock(&tcp_portalloc_lock);
+
+ tcp_bind_hash(sk, tb, rover);
+ if (!sk->pprev) {
+ inet_sk(sk)->sport = htons(rover);
+ __tcp_v4_hash(sk, 0);
+ }
+ spin_unlock(&head->lock);
+
+ if (tw) {
+ tcp_tw_deschedule(tw);
+ tcp_timewait_kill(tw);
+ tcp_tw_put(tw);
+ }
+
+ local_bh_enable();
+ return 0;
+ }
+
+ head = &tcp_bhash[tcp_bhashfn(snum)];
+ tb = (struct tcp_bind_bucket *)sk->prev;
spin_lock_bh(&head->lock);
if (tb->owners == sk && sk->bind_next == NULL) {
- __tcp_v4_hash(sk);
+ __tcp_v4_hash(sk, 0);
spin_unlock_bh(&head->lock);
return 0;
} else {
- spin_unlock_bh(&head->lock);
-
+ int ret;
+ spin_unlock(&head->lock);
/* No definite answer... Walk to established hash table */
- return tcp_v4_check_established(sk);
+ ret = __tcp_v4_check_established(sk, snum, NULL);
+ local_bh_enable();
+ return ret;
}
}
@@ -658,7 +762,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct inet_opt *inet = inet_sk(sk);
struct tcp_opt *tp = tcp_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
- struct sk_buff *buff;
struct rtable *rt;
u32 daddr, nexthop;
int tmp;
@@ -693,12 +796,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (!inet->opt || !inet->opt->srr)
daddr = rt->rt_dst;
- err = -ENOBUFS;
- buff = alloc_skb(MAX_TCP_HEADER + 15, sk->allocation);
-
- if (buff == NULL)
- goto failure;
-
if (!inet->saddr)
inet->saddr = rt->rt_src;
inet->rcv_saddr = inet->saddr;
@@ -729,24 +826,38 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->dport = usin->sin_port;
inet->daddr = daddr;
+ tp->ext_header_len = 0;
+ if (inet->opt)
+ tp->ext_header_len = inet->opt->optlen;
+
+ tp->mss_clamp = 536;
+
+ /* Socket identity is still unknown (sport may be zero).
+ * However we set state to SYN-SENT and not releasing socket
+ * lock select source port, enter ourselves into the hash tables and
+ * complete initalization after this.
+ */
+ tcp_set_state(sk, TCP_SYN_SENT);
+ err = tcp_v4_hash_connect(sk);
+ if (err)
+ goto failure;
+
if (!tp->write_seq)
tp->write_seq = secure_tcp_sequence_number(inet->saddr,
inet->daddr,
inet->sport,
usin->sin_port);
- tp->ext_header_len = 0;
- if (inet->opt)
- tp->ext_header_len = inet->opt->optlen;
inet->id = tp->write_seq ^ jiffies;
- tp->mss_clamp = 536;
+ err = tcp_connect(sk);
+ if (err)
+ goto failure;
- err = tcp_connect(sk, buff);
- if (err == 0)
- return 0;
+ return 0;
failure:
+ tcp_set_state(sk, TCP_CLOSE);
__sk_dst_reset(sk);
sk->route_caps = 0;
inet->dport = 0;
@@ -799,7 +910,6 @@ static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
req->expires = jiffies + TCP_TIMEOUT_INIT;
req->retrans = 0;
req->sk = NULL;
- req->index = h;
req->dl_next = lopt->syn_table[h];
write_lock(&tp->syn_wait_lock);
@@ -1092,6 +1202,7 @@ static void tcp_v4_send_reset(struct sk_buff *skb)
arg.n_iov = 1;
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+ inet_sk(tcp_socket->sk)->ttl = sysctl_ip_default_ttl;
ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
TCP_INC_STATS_BH(TcpOutSegs);
@@ -1478,7 +1589,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = dst->advmss;
tcp_initialize_rcv_mss(newsk);
- __tcp_v4_hash(newsk);
+ __tcp_v4_hash(newsk, 0);
__tcp_inherit_port(sk, newsk);
return newsk;
@@ -1902,7 +2013,6 @@ struct tcp_func ipv4_specific = {
tcp_v4_rebuild_header,
tcp_v4_conn_request,
tcp_v4_syn_recv_sock,
- tcp_v4_hash_connecting,
tcp_v4_remember_stamp,
sizeof(struct iphdr),
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 318bceafa44e..b429b96c3af6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -786,6 +786,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
newtp->mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
+
+ TCP_INC_STATS_BH(TcpPassiveOpens);
}
return newsk;
}
@@ -849,8 +851,38 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
/* Further reproduces section "SEGMENT ARRIVES"
for state SYN-RECEIVED of RFC793.
It is broken, however, it does not work only
- when SYNs are crossed, which is impossible in our
- case.
+ when SYNs are crossed.
+
+ You would think that SYN crossing is impossible here, since
+ we should have a SYN_SENT socket (from connect()) on our end,
+ but this is not true if the crossed SYNs were sent to both
+ ends by a malicious third party. We must defend against this,
+ and to do that we first verify the ACK (as per RFC793, page
+ 36) and reset if it is invalid. Is this a true full defense?
+ To convince ourselves, let us consider a way in which the ACK
+ test can still pass in this 'malicious crossed SYNs' case.
+ Malicious sender sends identical SYNs (and thus identical sequence
+ numbers) to both A and B:
+
+ A: gets SYN, seq=7
+ B: gets SYN, seq=7
+
+ By our good fortune, both A and B select the same initial
+ send sequence number of seven :-)
+
+ A: sends SYN|ACK, seq=7, ack_seq=8
+ B: sends SYN|ACK, seq=7, ack_seq=8
+
+ So we are now A eating this SYN|ACK, ACK test passes. So
+ does sequence test, SYN is truncated, and thus we consider
+ it a bare ACK.
+
+ If tp->defer_accept, we silently drop this bare ACK. Otherwise,
+ we create an established connection. Both ends (listening sockets)
+ accept the new incoming connection and try to talk to each other. 8-)
+
+ Note: This case is both harmless, and rare. Possibility is about the
+ same as us discovering intelligent life on another plant tomorrow.
But generally, we should (RFC lies!) to accept ACK
from SYNACK both here and in tcp_rcv_state_process().
@@ -862,6 +894,22 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
before attempt to create socket.
*/
+ /* RFC793 page 36: "If the connection is in any non-synchronized state ...
+ * and the incoming segment acknowledges something not yet
+ * sent (the segment carries an unaccaptable ACK) ...
+ * a reset is sent."
+ */
+ if (!(flg & TCP_FLAG_ACK))
+ return NULL;
+
+ /* Invalid ACK: reset will be sent by listening socket */
+ if (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)
+ return sk;
+ /* Also, it would be not so bad idea to check rcv_tsecr, which
+ * is essentially ACK extension and too early or too late values
+ * should cause reset in unsynchronized states.
+ */
+
/* RFC793: "first check sequence number". */
if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
@@ -891,19 +939,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
goto embryonic_reset;
- /* RFC793: "fifth check the ACK field" */
-
- if (!(flg & TCP_FLAG_ACK))
- return NULL;
-
- /* Invalid ACK: reset will be sent by listening socket */
- if (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)
- return sk;
- /* Also, it would be not so bad idea to check rcv_tsecr, which
- * is essentially ACK extension and too early or too late values
- * should cause reset in unsynchronized states.
- */
-
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
req->acked = 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 71b406306d24..2f95c9a13880 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -38,6 +38,7 @@
#include <net/tcp.h>
+#include <linux/compiler.h>
#include <linux/smp_lock.h>
/* People can turn this off for buggy TCP's found in printers etc. */
@@ -1156,14 +1157,14 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
return skb;
}
-int tcp_connect(struct sock *sk, struct sk_buff *buff)
+/*
+ * Do all connect socket setups that can be done AF independent.
+ */
+static inline void tcp_connect_init(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_opt *tp = tcp_sk(sk);
- /* Reserve space for headers. */
- skb_reserve(buff, MAX_TCP_HEADER);
-
/* We'll fix this up when we get a response from the other end.
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
@@ -1190,14 +1191,6 @@ int tcp_connect(struct sock *sk, struct sk_buff *buff)
tp->rcv_ssthresh = tp->rcv_wnd;
- /* Socket identity change complete, no longer
- * in TCP_CLOSE, so enter ourselves into the
- * hash tables.
- */
- tcp_set_state(sk,TCP_SYN_SENT);
- if (tp->af_specific->hash_connecting(sk))
- goto err_out;
-
sk->err = 0;
sk->done = 0;
tp->snd_wnd = 0;
@@ -1211,6 +1204,24 @@ int tcp_connect(struct sock *sk, struct sk_buff *buff)
tp->rto = TCP_TIMEOUT_INIT;
tp->retransmits = 0;
tcp_clear_retrans(tp);
+}
+
+/*
+ * Build a SYN and send it off.
+ */
+int tcp_connect(struct sock *sk)
+{
+ struct tcp_opt *tp = tcp_sk(sk);
+ struct sk_buff *buff;
+
+ tcp_connect_init(sk);
+
+ buff = alloc_skb(MAX_TCP_HEADER + 15, sk->allocation);
+ if (unlikely(buff == NULL))
+ return -ENOBUFS;
+
+ /* Reserve space for headers. */
+ skb_reserve(buff, MAX_TCP_HEADER);
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
TCP_ECN_send_syn(tp, buff);
@@ -1233,11 +1244,6 @@ int tcp_connect(struct sock *sk, struct sk_buff *buff)
/* Timer for repeating the SYN until an answer. */
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
return 0;
-
-err_out:
- tcp_set_state(sk,TCP_CLOSE);
- kfree_skb(buff);
- return -EADDRNOTAVAIL;
}
/* Send out a delayed ack, the caller does the policy checking
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b0b616b52a12..c67ea0c1191b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -725,6 +725,8 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct inet_opt *inet = inet_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
struct rtable *rt;
+ u32 saddr;
+ int oif;
int err;
@@ -736,8 +738,16 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
- err = ip_route_connect(&rt, usin->sin_addr.s_addr, inet->saddr,
- RT_CONN_FLAGS(sk), sk->bound_dev_if);
+ oif = sk->bound_dev_if;
+ saddr = inet->saddr;
+ if (MULTICAST(usin->sin_addr.s_addr)) {
+ if (!oif)
+ oif = inet->mc_index;
+ if (!saddr)
+ saddr = inet->mc_addr;
+ }
+ err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
+ RT_CONN_FLAGS(sk), oif);
if (err)
return err;
if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 397a86e3a608..cdc2e011fc68 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -62,6 +62,8 @@
#include <asm/uaccess.h>
+#define IPV6_MAX_ADDRESSES 16
+
/* Set to 3 to get tracing... */
#define ACONF_DEBUG 2
@@ -586,6 +588,18 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
return err;
}
+int ipv6_count_addresses(struct inet6_dev *idev)
+{
+ int cnt = 0;
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+ cnt++;
+ read_unlock_bh(&idev->lock);
+ return cnt;
+}
+
int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev)
{
struct inet6_ifaddr * ifp;
@@ -895,8 +909,12 @@ ok:
ifp = ipv6_get_ifaddr(&addr, dev);
if (ifp == NULL && valid_lft) {
- ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
- addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+ /* Do not allow to create too much of autoconfigured
+ * addresses; this would be too easy way to crash kernel.
+ */
+ if (ipv6_count_addresses(in6_dev) < IPV6_MAX_ADDRESSES)
+ ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+ addr_type&IPV6_ADDR_SCOPE_MASK, 0);
if (ifp == NULL) {
in6_dev_put(in6_dev);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2231feb00c2f..994e627963f1 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -361,7 +361,7 @@ void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
ipv6_addr_copy(&msg->target, solicited_addr);
if (inc_opt)
- ndisc_fill_option((void*)&msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, dev->addr_len);
+ ndisc_fill_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, dev->addr_len);
/* checksum */
msg->icmph.icmp6_cksum = csum_ipv6_magic(solicited_addr, daddr, len,
@@ -422,7 +422,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
ipv6_addr_copy(&msg->target, solicit);
if (send_llinfo)
- ndisc_fill_option((void*)&msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len);
+ ndisc_fill_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len);
/* checksum */
msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
@@ -572,6 +572,11 @@ static void ndisc_router_discovery(struct sk_buff *skb)
printk(KERN_WARNING "ICMP RA: source address is not linklocal\n");
return;
}
+ if (optlen < 0) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "ICMP RA: packet too short\n");
+ return;
+ }
/*
* set the RA_RECV flag in the interface
@@ -928,7 +933,7 @@ ndisc_recv_ns(struct in6_addr *saddr, struct sk_buff *skb)
u8 *opt;
opt = skb->h.raw;
- opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ opt += sizeof(struct nd_msg);
opt = ndisc_find_option(opt, skb->dev->addr_len+2, skb->tail - opt, ND_OPT_SOURCE_LL_ADDR);
return neigh_event_ns(&nd_tbl, opt, saddr, skb->dev);
@@ -936,12 +941,11 @@ ndisc_recv_ns(struct in6_addr *saddr, struct sk_buff *skb)
static __inline__ int ndisc_recv_na(struct neighbour *neigh, struct sk_buff *skb)
{
- struct nd_msg *msg = (struct nd_msg *) skb->h.raw;
u8 *opt;
+ struct nd_msg *msg = (struct nd_msg*) skb->h.raw;
- opt = skb->h.raw;
- opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
- opt = ndisc_find_option(opt, skb->dev->addr_len+2, skb->tail - opt, ND_OPT_TARGET_LL_ADDR);
+ opt = ndisc_find_option(msg->opt, skb->dev->addr_len+2,
+ skb->tail - msg->opt, ND_OPT_TARGET_LL_ADDR);
return neigh_update(neigh, opt,
msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
@@ -962,7 +966,6 @@ int ndisc_rcv(struct sk_buff *skb)
struct nd_msg *msg = (struct nd_msg *) skb->h.raw;
struct neighbour *neigh;
struct inet6_ifaddr *ifp;
- unsigned int payload_len;
__skb_push(skb, skb->data-skb->h.raw);
@@ -985,11 +988,9 @@ int ndisc_rcv(struct sk_buff *skb)
* (Some checking in ndisc_find_option)
*/
- payload_len = ntohs(skb->nh.ipv6h->payload_len);
switch (msg->icmph.icmp6_type) {
case NDISC_NEIGHBOUR_SOLICITATION:
- /* XXX: import nd_neighbor_solicit from glibc netinet/icmp6.h */
- if (payload_len < 8+16) {
+ if (skb->len < sizeof(struct nd_msg)) {
if (net_ratelimit())
printk(KERN_WARNING "ICMP NS: packet too short\n");
return 0;
@@ -1069,12 +1070,13 @@ int ndisc_rcv(struct sk_buff *skb)
neigh = ndisc_recv_ns(saddr, skb);
- if (neigh) {
+ if (neigh || !dev->hard_header) {
ndisc_send_na(dev, neigh, saddr, &ifp->addr,
ifp->idev->cnf.forwarding, 1,
ipv6_addr_type(&ifp->addr)&IPV6_ADDR_ANYCAST ? 0 : 1,
1);
- neigh_release(neigh);
+ if (neigh)
+ neigh_release(neigh);
}
}
in6_ifa_put(ifp);
@@ -1118,8 +1120,7 @@ int ndisc_rcv(struct sk_buff *skb)
return 0;
case NDISC_NEIGHBOUR_ADVERTISEMENT:
- /* XXX: import nd_neighbor_advert from glibc netinet/icmp6.h */
- if (payload_len < 16+8 ) {
+ if (skb->len < sizeof(struct nd_msg)) {
if (net_ratelimit())
printk(KERN_WARNING "ICMP NA: packet too short\n");
return 0;
@@ -1180,35 +1181,12 @@ int ndisc_rcv(struct sk_buff *skb)
break;
case NDISC_ROUTER_ADVERTISEMENT:
- /* XXX: import nd_router_advert from glibc netinet/icmp6.h */
- if (payload_len < 8+4+4) {
- if (net_ratelimit())
- printk(KERN_WARNING "ICMP RA: packet too short\n");
- return 0;
- }
ndisc_router_discovery(skb);
break;
case NDISC_REDIRECT:
- /* XXX: import nd_redirect from glibc netinet/icmp6.h */
- if (payload_len < 8+16+16) {
- if (net_ratelimit())
- printk(KERN_WARNING "ICMP redirect: packet too short\n");
- return 0;
- }
ndisc_redirect_rcv(skb);
break;
-
- case NDISC_ROUTER_SOLICITATION:
- /* No RS support in the kernel, but we do some required checks */
-
- /* XXX: import nd_router_solicit from glibc netinet/icmp6.h */
- if (payload_len < 8) {
- if (net_ratelimit())
- printk(KERN_WARNING "ICMP RS: packet too short\n");
- return 0;
- }
- break;
};
return 0;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 235533afd4fe..07e58857d777 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -518,7 +518,7 @@ static int netlink_send_peer(ip6q_queue_element_t *e)
return netlink_unicast(nfnl, skb, nlq6->peer.pid, MSG_DONTWAIT);
}
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0);
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
static __inline__ void netlink_receive_user_skb(struct sk_buff *skb)
{
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b3cc361c9ca2..389674e2c9c4 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -517,7 +517,11 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_error;
}
- mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+ if (tiph->frag_off)
+ mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+ else
+ mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
+
if (mtu < 68) {
tunnel->stat.collisions++;
ip_rt_put(rt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7ff24c734dae..27a49a6260ce 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -136,7 +136,7 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
break;
}
if (tb != NULL && tb->owners != NULL) {
- if (tb->fastreuse != 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {
+ if (tb->fastreuse > 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {
goto success;
} else {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -377,22 +377,22 @@ static __inline__ unsigned tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport)
}
static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
- struct ipv6hdr *ip6h,
- struct tcphdr *th,
- int iif,
- struct open_request ***prevp)
+ struct open_request ***prevp,
+ __u16 rport,
+ struct in6_addr *raddr,
+ struct in6_addr *laddr,
+ int iif)
{
struct tcp_listen_opt *lopt = tp->listen_opt;
struct open_request *req, **prev;
- __u16 rport = th->source;
- for (prev = &lopt->syn_table[tcp_v6_synq_hash(&ip6h->saddr, rport)];
+ for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport)];
(req = *prev) != NULL;
prev = &req->dl_next) {
if (req->rmt_port == rport &&
req->class->family == AF_INET6 &&
- !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) &&
- !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &ip6h->daddr) &&
+ !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
+ !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
(!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
BUG_TRAP(req->sk == NULL);
*prevp = prev;
@@ -499,11 +499,21 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static int tcp_v6_hash_connecting(struct sock *sk)
+static int tcp_v6_hash_connect(struct sock *sk)
{
- unsigned short snum = inet_sk(sk)->num;
- struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(snum)];
- struct tcp_bind_bucket *tb = head->chain;
+ struct tcp_bind_hashbucket *head;
+ struct tcp_bind_bucket *tb;
+
+ /* XXX */
+ if (inet_sk(sk)->num == 0) {
+ int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
+ if (err)
+ return err;
+ inet_sk(sk)->sport = htons(inet_sk(sk)->num);
+ }
+
+ head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
+ tb = head->chain;
spin_lock_bh(&head->lock);
@@ -534,7 +544,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct in6_addr saddr_buf;
struct flowi fl;
struct dst_entry *dst;
- struct sk_buff *buff;
int addr_type;
int err;
@@ -675,17 +684,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
- err = -ENOBUFS;
- buff = alloc_skb(MAX_TCP_HEADER + 15, sk->allocation);
-
- if (buff == NULL)
- goto failure;
-
inet->dport = usin->sin6_port;
- /*
- * Init variables
- */
+ tcp_set_state(sk, TCP_SYN_SENT);
+ err = tcp_v6_hash_connect(sk);
+ if (err)
+ goto late_failure;
if (!tp->write_seq)
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
@@ -693,10 +697,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
inet->sport,
inet->dport);
- err = tcp_connect(sk, buff);
- if (err == 0)
- return 0;
+ err = tcp_connect(sk);
+ if (err)
+ goto late_failure;
+
+ return 0;
+late_failure:
+ tcp_set_state(sk, TCP_CLOSE);
failure:
__sk_dst_reset(sk);
inet->dport = 0;
@@ -708,8 +716,6 @@ void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
int type, int code, int offset, __u32 info)
{
struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
- struct in6_addr *saddr = &hdr->saddr;
- struct in6_addr *daddr = &hdr->daddr;
struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
struct ipv6_pinfo *np;
struct sock *sk;
@@ -717,7 +723,7 @@ void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct tcp_opt *tp;
__u32 seq;
- sk = tcp_v6_lookup(daddr, th->dest, saddr, th->source, skb->dev->ifindex);
+ sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
if (sk == NULL) {
ICMP6_INC_STATS_BH(Icmp6InErrors);
@@ -790,15 +796,12 @@ void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Might be for an open_request */
switch (sk->state) {
struct open_request *req, **prev;
- struct ipv6hdr hd;
case TCP_LISTEN:
if (sk->lock.users)
goto out;
- /* Grrrr - fix this later. */
- ipv6_addr_copy(&hd.saddr, saddr);
- ipv6_addr_copy(&hd.daddr, daddr);
- req = tcp_v6_search_req(tp, &hd, th, tcp_v6_iif(skb), &prev);
+ req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
+ &hdr->saddr, tcp_v6_iif(skb));
if (!req)
goto out;
@@ -1107,7 +1110,8 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
struct sock *nsk;
/* Find possible connection requests. */
- req = tcp_v6_search_req(tp, skb->nh.ipv6h, th, tcp_v6_iif(skb), &prev);
+ req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
if (req)
return tcp_check_req(sk, skb, req, prev);
@@ -1142,7 +1146,6 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
req->sk = NULL;
req->expires = jiffies + TCP_TIMEOUT_INIT;
req->retrans = 0;
- req->index = h;
req->dl_next = lopt->syn_table[h];
write_lock(&tp->syn_wait_lock);
@@ -1790,7 +1793,6 @@ static struct tcp_func ipv6_specific = {
tcp_v6_rebuild_header,
tcp_v6_conn_request,
tcp_v6_syn_recv_sock,
- tcp_v6_hash_connecting,
tcp_v6_remember_stamp,
sizeof(struct ipv6hdr),
@@ -1810,7 +1812,6 @@ static struct tcp_func ipv6_mapped = {
tcp_v4_rebuild_header,
tcp_v6_conn_request,
tcp_v6_syn_recv_sock,
- tcp_v4_hash_connecting,
tcp_v4_remember_stamp,
sizeof(struct iphdr),
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a895089358aa..f0be1474dcdb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -293,6 +293,8 @@ ipv4_connected:
return -EINVAL;
}
sk->bound_dev_if = usin->sin6_scope_id;
+ if (!sk->bound_dev_if && (addr_type&IPV6_ADDR_MULTICAST))
+ fl.oif = np->mcast_oif;
}
/* Connect to link-local address requires an interface */
@@ -317,6 +319,9 @@ ipv4_connected:
fl.uli_u.ports.dport = inet->dport;
fl.uli_u.ports.sport = inet->sport;
+ if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
+ fl.oif = np->mcast_oif;
+
if (flowlabel) {
if (flowlabel->opt && flowlabel->opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
diff --git a/net/khttpd/sockets.c b/net/khttpd/sockets.c
index 74bfe614d463..0241f9bfc7f3 100644
--- a/net/khttpd/sockets.c
+++ b/net/khttpd/sockets.c
@@ -82,7 +82,7 @@ int StartListening(const int Port)
MainSocket = sock;
- EnterFunction("StartListening");
+ LeaveFunction("StartListening");
return 1;
}
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index aeafa310f1e4..5142e74ae9aa 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -407,5 +407,6 @@ static int __init lapb_init(void)
MODULE_AUTHOR("Jonathan Naylor <g4klx@g4klx.demon.co.uk>");
MODULE_DESCRIPTION("The X.25 Link Access Procedure B link layer protocol");
+MODULE_LICENSE("GPL");
module_init(lapb_init);
diff --git a/net/netlink/netlink_dev.c b/net/netlink/netlink_dev.c
index a5bb25ab5089..131209fea01d 100644
--- a/net/netlink/netlink_dev.c
+++ b/net/netlink/netlink_dev.c
@@ -206,6 +206,8 @@ int __init init_netlink(void)
#ifdef MODULE
+MODULE_LICENSE("GPL");
+
int init_module(void)
{
printk(KERN_INFO "Network Kernel/User communications module 0.04\n");
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 65b59754b62e..ebaae2dbd2da 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -29,7 +29,7 @@
* NET/ROM 006 Alan(GW4PTS) Brought in line with the ANK changes
* Jonathan(G4KLX) Removed hdrincl.
* NET/ROM 007 Jonathan(G4KLX) New timer architecture.
- * Impmented Idle timer.
+ * Implemented Idle timer.
* Arnaldo C. Melo s/suser/capable/, micro cleanups
*/
@@ -1338,6 +1338,7 @@ MODULE_PARM_DESC(nr_ndevs, "number of NET/ROM devices");
MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
MODULE_DESCRIPTION("The amateur radio NET/ROM network and transport layer protocol");
+MODULE_LICENSE("GPL");
static void __exit nr_exit(void)
{
diff --git a/net/netsyms.c b/net/netsyms.c
index abf875169c99..9aa257b30a6e 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -364,7 +364,6 @@ EXPORT_SYMBOL(tcp_inherit_port);
EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
EXPORT_SYMBOL(tcp_v4_do_rcv);
EXPORT_SYMBOL(tcp_v4_connect);
-EXPORT_SYMBOL(tcp_v4_hash_connecting);
EXPORT_SYMBOL(tcp_unhash);
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(tcp_prot);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 84e1262d6660..75000d694681 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -358,6 +358,8 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
if (dev->hard_header) {
skb->data -= dev->hard_header_len;
skb->tail -= dev->hard_header_len;
+ if (len < dev->hard_header_len)
+ skb->nh.raw = skb->data;
}
/* Returns -EFAULT on error */
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 6a2c2decc516..e183529c2320 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1519,6 +1519,7 @@ MODULE_PARM_DESC(rose_ndevs, "number of ROSE devices");
MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
MODULE_DESCRIPTION("The amateur radio ROSE network layer protocol");
+MODULE_LICENSE("GPL");
static void __exit rose_exit(void)
{
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 6d056b86c04d..62a37363c841 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -327,6 +327,7 @@ static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *
if (cl - 1 > q->bands)
return -ENOENT;
+ tcm->tcm_handle |= TC_H_MIN(cl);
if (q->queues[cl-1])
tcm->tcm_info = q->queues[cl-1]->handle;
return 0;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index f2d0d92f7b8e..c96762fbdcb9 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -105,6 +105,7 @@ struct sfq_sched_data
/* Parameters */
int perturb_period;
unsigned quantum; /* Allotment per round: MUST BE >= MTU */
+ int limit;
/* Variables */
struct timer_list perturb_timer;
@@ -275,7 +276,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
q->tail = x;
}
}
- if (++sch->q.qlen < SFQ_DEPTH-1) {
+ if (++sch->q.qlen < q->limit-1) {
sch->stats.bytes += skb->len;
sch->stats.packets++;
return 0;
@@ -310,7 +311,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
q->tail = x;
}
}
- if (++sch->q.qlen < SFQ_DEPTH-1)
+ if (++sch->q.qlen < q->limit - 1)
return 0;
sch->stats.drops++;
@@ -390,6 +391,11 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
sch_tree_lock(sch);
q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
q->perturb_period = ctl->perturb_period*HZ;
+ if (ctl->limit)
+ q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+
+ while (sch->q.qlen >= q->limit-1)
+ sfq_drop(sch);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
@@ -416,6 +422,7 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
}
+ q->limit = SFQ_DEPTH;
q->max_depth = 0;
q->tail = SFQ_DEPTH;
if (opt == NULL) {
@@ -448,9 +455,9 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.quantum = q->quantum;
opt.perturb_period = q->perturb_period/HZ;
- opt.limit = SFQ_DEPTH;
+ opt.limit = q->limit;
opt.divisor = SFQ_HASH_DIVISOR;
- opt.flows = SFQ_DEPTH;
+ opt.flows = q->limit;
RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 7adc5541fa2f..5f8bc3fc0761 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -21,6 +21,7 @@
#include <linux/proc_fs.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svcsock.h>
+#include <linux/init.h>
#define RPCDBG_FACILITY RPCDBG_MISC
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 0751536f7828..8f3f3fe5fbb5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1097,7 +1097,7 @@ udp_write_space(struct sock *sk)
return;
/* Wait until we have enough socket memory. */
- if (sock_writeable(sk))
+ if (!sock_writeable(sk))
return;
if (!xprt_test_and_set_wspace(xprt)) {
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d169cffdc663..f70de78a143e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1371,6 +1371,7 @@ EXPORT_NO_SYMBOLS;
MODULE_AUTHOR("Jonathan Naylor <g4klx@g4klx.demon.co.uk>");
MODULE_DESCRIPTION("The X.25 Packet Layer network layer protocol");
+MODULE_LICENSE("GPL");
static void __exit x25_exit(void)
{