From 0a80568fadcfdea3058fd2eec10098387171a09f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 08:41:57 -0700 Subject: [NETFILTER]: ip_nat_snmp call skb_make_writable() The snmp helper needs an explicit call to skb_ip_make_writable. Please apply. Signed-off-by: James Morris Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_nat_snmp_basic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 23854e0ec26f..8773860e5012 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -1252,6 +1252,9 @@ static unsigned int nat_help(struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); struct iphdr *iph = (*pskb)->nh.iph; struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + + if (!skb_ip_make_writable(pskb, (*pskb)->len)) + return NF_DROP; spin_lock_bh(&snmp_lock); -- cgit v1.2.3 From a6e13e0093a5989857d3803bba2a578606d406ce Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 08:42:44 -0700 Subject: [NETFILTER]: ipt_ULOG fix for last packet delay The ULOG target used to delay the last packet until another one was received. This patch fixes the issue. Signed-off-by: Ruby Joker Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_ULOG.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index c352df54aa1f..d15c8175594b 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -97,7 +97,6 @@ typedef struct { static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ static struct sock *nflognl; /* our socket */ -static size_t qlen; /* current length of multipart-nlmsg */ DECLARE_LOCK(ulog_lock); /* spinlock */ /* send one ulog_buff_t to userspace */ @@ -116,7 +115,7 @@ static void ulog_send(unsigned int nlgroupnum) NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum); DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n", - ub->qlen, nlgroup); + ub->qlen, nlgroupnum); netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC); ub->qlen = 0; @@ -126,7 +125,7 @@ static void ulog_send(unsigned int nlgroupnum) } -/* timer function to flush queue in ULOG_FLUSH_INTERVAL time */ +/* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n"); @@ -261,12 +260,6 @@ static void ipt_ulog_packet(unsigned int hooknum, ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; } - /* if threshold is reached, send message to userspace */ - if (qlen >= loginfo->qthreshold) { - if (loginfo->qthreshold > 1) - nlh->nlmsg_type = NLMSG_DONE; - } - ub->lastnlh = nlh; /* if timer isn't already running, start it */ @@ -275,6 +268,13 @@ static void ipt_ulog_packet(unsigned int hooknum, add_timer(&ub->timer); } + /* if threshold is reached, send message to userspace */ + if (ub->qlen >= loginfo->qthreshold) { + if (loginfo->qthreshold > 1) + nlh->nlmsg_type = NLMSG_DONE; + ulog_send(groupnum); + } + UNLOCK_BH(&ulog_lock); return; -- cgit v1.2.3 From a94cb519c37429afc5ce9a65f18b98f5c54a104b Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 08:43:32 -0700 Subject: [NETFILTER]: Use new module_param() api This patch makes all of ipv4/ipv6 netfilter use the 'new' module_param API. Signed-off-by: Rusty Russell Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_amanda.c | 3 ++- net/ipv4/netfilter/ip_conntrack_core.c | 6 +++--- net/ipv4/netfilter/ip_conntrack_ftp.c | 12 ++++++------ net/ipv4/netfilter/ip_conntrack_irc.c | 14 +++++++------- net/ipv4/netfilter/ip_conntrack_tftp.c | 10 +++++----- net/ipv4/netfilter/ip_nat_ftp.c | 10 +++++----- net/ipv4/netfilter/ip_nat_irc.c | 11 +++++------ net/ipv4/netfilter/ip_nat_snmp_basic.c | 3 ++- net/ipv4/netfilter/ip_nat_tftp.c | 10 +++++----- net/ipv4/netfilter/ipt_LOG.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 17 +++++++++-------- net/ipv4/netfilter/ipt_recent.c | 11 ++++++----- net/ipv4/netfilter/iptable_filter.c | 3 ++- net/ipv6/netfilter/ip6t_LOG.c | 3 ++- net/ipv6/netfilter/ip6table_filter.c | 3 ++- 15 files changed, 62 insertions(+), 56 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index 4e8f4d83baf2..5a328192067e 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -34,7 +35,7 @@ static unsigned int master_timeout = 300; MODULE_AUTHOR("Brian J. Murrell "); MODULE_DESCRIPTION("Amanda connection tracking module"); MODULE_LICENSE("GPL"); -MODULE_PARM(master_timeout, "i"); +module_param(master_timeout, int, 0600); MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); static char *conns[] = { "DATA ", "MESG ", "INDEX " }; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 00a89f4f8d8b..d4811b5fea22 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -34,8 +34,8 @@ #include #include #include -/* For ERR_PTR(). Yeah, I know... --RR */ -#include +#include +#include /* This rwlock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ @@ -1373,7 +1373,7 @@ void ip_conntrack_cleanup(void) } static int hashsize; -MODULE_PARM(hashsize, "i"); +module_param(hashsize, int, 0400); int __init ip_conntrack_init(void) { diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index b9c27d5e458b..e4579a4b8fc7 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -19,6 +19,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell "); @@ -33,10 +34,10 @@ struct module *ip_conntrack_ftp = THIS_MODULE; #define MAX_PORTS 8 static int ports[MAX_PORTS]; static int ports_c; -MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i"); +module_param_array(ports, int, ports_c, 0400); static int loose; -MODULE_PARM(loose, "i"); +module_param(loose, int, 0600); #if 0 #define DEBUGP printk @@ -420,10 +421,10 @@ static int __init init(void) int i, ret; char *tmpname; - if (ports[0] == 0) - ports[0] = FTP_PORT; + if (ports_c == 0) + ports[ports_c++] = FTP_PORT; - for (i = 0; (i < MAX_PORTS) && ports[i]; i++) { + for (i = 0; i < ports_c; i++) { ftp[i].tuple.src.u.tcp.port = htons(ports[i]); ftp[i].tuple.dst.protonum = IPPROTO_TCP; ftp[i].mask.src.u.tcp.port = 0xFFFF; @@ -449,7 +450,6 @@ static int __init init(void) fini(); return ret; } - ports_c++; } return 0; } diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index 32b5daee81c0..e06cb4ede710 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -32,6 +32,7 @@ #include #include #include +#include #define MAX_PORTS 8 static int ports[MAX_PORTS]; @@ -44,11 +45,11 @@ static char irc_buffer[65536]; MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); MODULE_LICENSE("GPL"); -MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i"); +module_param_array(ports, int, ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of IRC servers"); -MODULE_PARM(max_dcc_channels, "i"); +module_param(max_dcc_channels, int, 0400); MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session"); -MODULE_PARM(dcc_timeout, "i"); +module_param(dcc_timeout, int, 0400); MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels"); static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " }; @@ -252,10 +253,10 @@ static int __init init(void) } /* If no port given, default to standard irc port */ - if (ports[0] == 0) - ports[0] = IRC_PORT; + if (ports_c == 0) + ports[ports_c++] = IRC_PORT; - for (i = 0; (i < MAX_PORTS) && ports[i]; i++) { + for (i = 0; i < ports_c; i++) { hlpr = &irc_helpers[i]; hlpr->tuple.src.u.tcp.port = htons(ports[i]); hlpr->tuple.dst.protonum = IPPROTO_TCP; @@ -284,7 +285,6 @@ static int __init init(void) fini(); return -EBUSY; } - ports_c++; } return 0; } diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index ddee7378209d..79a2b10070a5 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -19,6 +19,7 @@ #include #include #include +#include MODULE_AUTHOR("Magnus Boden "); MODULE_DESCRIPTION("tftp connection tracking helper"); @@ -27,7 +28,7 @@ MODULE_LICENSE("GPL"); #define MAX_PORTS 8 static int ports[MAX_PORTS]; static int ports_c; -MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i"); +module_param_array(ports, int, ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of tftp servers"); #if 0 @@ -104,10 +105,10 @@ static int __init init(void) int i, ret; char *tmpname; - if (!ports[0]) - ports[0]=TFTP_PORT; + if (ports_c == 0) + ports[ports_c++] = TFTP_PORT; - for (i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) { + for (i = 0; i < ports_c; i++) { /* Create helper structure */ memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper)); @@ -137,7 +138,6 @@ static int __init init(void) fini(); return(ret); } - ports_c++; } return(0); } diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index 946ca05bb90f..ae15f410bc4b 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -33,7 +34,7 @@ MODULE_DESCRIPTION("ftp NAT helper"); static int ports[MAX_PORTS]; static int ports_c; -MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i"); +module_param_array(ports, int, ports_c, 0400); DECLARE_LOCK_EXTERN(ip_ftp_lock); @@ -313,10 +314,10 @@ static int __init init(void) int i, ret = 0; char *tmpname; - if (ports[0] == 0) - ports[0] = FTP_PORT; + if (ports_c == 0) + ports[ports_c] = FTP_PORT; - for (i = 0; (i < MAX_PORTS) && ports[i]; i++) { + for (i = 0; i < ports_c; i++) { ftp[i].tuple.dst.protonum = IPPROTO_TCP; ftp[i].tuple.src.u.tcp.port = htons(ports[i]); ftp[i].mask.dst.protonum = 0xFFFF; @@ -343,7 +344,6 @@ static int __init init(void) fini(); return ret; } - ports_c++; } return ret; diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c index dc778dd4ab2c..06555b44e49b 100644 --- a/net/ipv4/netfilter/ip_nat_irc.c +++ b/net/ipv4/netfilter/ip_nat_irc.c @@ -27,6 +27,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -41,7 +42,7 @@ static int ports_c; MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); -MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i"); +module_param_array(ports, int, ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of IRC servers"); /* protects irc part of conntracks */ @@ -235,11 +236,10 @@ static int __init init(void) struct ip_nat_helper *hlpr; char *tmpname; - if (ports[0] == 0) { - ports[0] = IRC_PORT; - } + if (ports_c == 0) + ports[ports_c++] = IRC_PORT; - for (i = 0; (i < MAX_PORTS) && ports[i] != 0; i++) { + for (i = 0; i < ports_c; i++) { hlpr = &ip_nat_irc_helpers[i]; hlpr->tuple.dst.protonum = IPPROTO_TCP; hlpr->tuple.src.u.tcp.port = htons(ports[i]); @@ -269,7 +269,6 @@ static int __init init(void) fini(); return 1; } - ports_c++; } return ret; } diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 8773860e5012..4ad6e08a78b8 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1360,4 +1361,4 @@ static void __exit fini(void) module_init(init); module_exit(fini); -MODULE_PARM(debug, "i"); +module_param(debug, bool, 0600); diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c index a2097bfbefb3..c94b999fc3e8 100644 --- a/net/ipv4/netfilter/ip_nat_tftp.c +++ b/net/ipv4/netfilter/ip_nat_tftp.c @@ -32,6 +32,7 @@ #include #include #include +#include MODULE_AUTHOR("Magnus Boden "); MODULE_DESCRIPTION("tftp NAT helper"); @@ -41,7 +42,7 @@ MODULE_LICENSE("GPL"); static int ports[MAX_PORTS]; static int ports_c = 0; -MODULE_PARM(ports,"1-" __MODULE_STRING(MAX_PORTS) "i"); +module_param_array(ports, int, ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of tftp servers"); #if 0 @@ -162,10 +163,10 @@ static int __init init(void) int i, ret = 0; char *tmpname; - if (!ports[0]) - ports[0] = TFTP_PORT; + if (ports_c == 0) + ports[ports_c++] = TFTP_PORT; - for (i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) { + for (i = 0; i < ports_c; i++) { memset(&tftp[i], 0, sizeof(struct ip_nat_helper)); tftp[i].tuple.dst.protonum = IPPROTO_UDP; @@ -194,7 +195,6 @@ static int __init init(void) fini(); return ret; } - ports_c++; } return ret; } diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index b79962e225f7..47e49ad8202b 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -28,7 +28,7 @@ MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables syslog logging module"); static unsigned int nflog = 1; -MODULE_PARM(nflog, "i"); +module_param(nflog, int, 0400); MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); #if 0 diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d15c8175594b..51d16d33bcbd 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -34,8 +34,8 @@ * by that factor. * * flushtimeout: - * Specify, after how many clock ticks (intel: 100 per second) the queue - * should be flushed even if it is not full yet. + * Specify, after how many hundredths of a second the queue should be + * flushed even if it is not full yet. * * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp */ @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -74,15 +75,15 @@ MODULE_DESCRIPTION("iptables userspace logging module"); #define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0) static unsigned int nlbufsiz = 4096; -MODULE_PARM(nlbufsiz, "i"); +module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */ MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); -static unsigned int flushtimeout = 10 * HZ; -MODULE_PARM(flushtimeout, "i"); -MODULE_PARM_DESC(flushtimeout, "buffer flush timeout"); +static unsigned int flushtimeout = 10; +module_param(flushtimeout, int, 0600); +MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); static unsigned int nflog = 1; -MODULE_PARM(nflog, "i"); +module_param(nflog, int, 0400); MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); /* global data structures */ @@ -264,7 +265,7 @@ static void ipt_ulog_packet(unsigned int hooknum, /* if timer isn't already running, start it */ if (!timer_pending(&ub->timer)) { - ub->timer.expires = jiffies + flushtimeout; + ub->timer.expires = jiffies + flushtimeout * HZ / 100; add_timer(&ub->timer); } diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 08b786ac34dd..15472b3e9e56 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -37,12 +38,12 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost . htt MODULE_AUTHOR("Stephen Frost "); MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER); MODULE_LICENSE("GPL"); -MODULE_PARM(ip_list_tot,"i"); -MODULE_PARM(ip_pkt_list_tot,"i"); -MODULE_PARM(ip_list_hash_size,"i"); -MODULE_PARM(ip_list_perms,"i"); +module_param(ip_list_tot, int, 0400); +module_param(ip_pkt_list_tot, int, 0400); +module_param(ip_list_hash_size, int, 0400); +module_param(ip_list_perms, int, 0400); #ifdef DEBUG -MODULE_PARM(debug,"i"); +module_param(debug, int, 0600); MODULE_PARM_DESC(debug,"debugging level, defaults to 1"); #endif MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list"); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 8fb2ed9d1f9a..6b291da92656 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -11,6 +11,7 @@ */ #include +#include #include MODULE_LICENSE("GPL"); @@ -155,7 +156,7 @@ static struct nf_hook_ops ipt_ops[] = { /* Default to forward because I got too much mail already. */ static int forward = NF_ACCEPT; -MODULE_PARM(forward, "i"); +module_param(forward, bool, 0000); static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index bb8590bdd605..acc673ce9591 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -26,7 +27,7 @@ MODULE_DESCRIPTION("IP6 tables LOG target module"); MODULE_LICENSE("GPL"); static unsigned int nflog = 1; -MODULE_PARM(nflog, "i"); +module_param(nflog, int, 0400); MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); struct in_device; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 46daa79051d1..aca6d21cc588 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -10,6 +10,7 @@ */ #include +#include #include MODULE_LICENSE("GPL"); @@ -156,7 +157,7 @@ static struct nf_hook_ops ip6t_ops[] = { /* Default to forward because I got too much mail already. */ static int forward = NF_ACCEPT; -MODULE_PARM(forward, "i"); +module_param(forward, bool, 0000); static int __init init(void) { -- cgit v1.2.3 From 44d4281e6b76110f9a7076856f353e741b32facb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 08:44:17 -0700 Subject: [NETFILTER]: Fix mutex declaration On Sun, Jun 20, 2004 at 01:23:28PM +0200, Christoph Hellwig wrote: > okay, the gunk we had in arp_tables is in ip6_tables and ip6_tables, > too. Signed-off-by: Christoph Hellwig Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 1 - include/linux/netfilter_ipv6/ip6_tables.h | 4 ---- net/ipv4/netfilter/ip_tables.c | 2 ++ net/ipv6/netfilter/ip6_tables.c | 1 + 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index b4c2b2b381c1..02a006f17ac4 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -336,7 +336,6 @@ ipt_get_target(struct ipt_entry *e) * Main firewall chains definitions and global var's definitions. */ #ifdef __KERNEL__ -static DECLARE_MUTEX(ipt_mutex); #include extern void ipt_init(void) __init; diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index f9983d16cc1c..6f70cf3df39a 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -107,10 +107,6 @@ struct ip6t_counters u_int64_t pcnt, bcnt; /* Packet and byte counters */ }; -#ifdef __KERNEL__ -static DECLARE_MUTEX(ip6t_mutex); -#endif - /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper protocols */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f24f17b8e03e..878d3bb329ea 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -61,6 +61,8 @@ do { \ #endif #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) +static DECLARE_MUTEX(ipt_mutex); + /* Must have mutex */ #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d2ce00d81d4c..0cef15b866f5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -66,6 +66,7 @@ do { \ #endif #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) +static DECLARE_MUTEX(ip6t_mutex); /* Must have mutex */ #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0) -- cgit v1.2.3 From 4cc4f57c678e3a8e762480db063da7e61d4f89c2 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 08:45:11 -0700 Subject: [NETFILTER]: Use slab cache for ip_conntrack_expect This patch adds a new slab cache (ip_conntrack_expect) for expectations. Signed-off-by: Pablo Neira Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index d4811b5fea22..141293477c1d 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -68,6 +68,7 @@ int ip_conntrack_max; static atomic_t ip_conntrack_count = ATOMIC_INIT(0); struct list_head *ip_conntrack_hash; static kmem_cache_t *ip_conntrack_cachep; +static kmem_cache_t *ip_conntrack_expect_cachep; struct ip_conntrack ip_conntrack_untracked; extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; @@ -177,7 +178,7 @@ destroy_expect(struct ip_conntrack_expect *exp) IP_NF_ASSERT(atomic_read(&exp->use) == 0); IP_NF_ASSERT(!timer_pending(&exp->timeout)); - kfree(exp); + kmem_cache_free(ip_conntrack_expect_cachep, exp); } inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) @@ -336,7 +337,7 @@ destroy_conntrack(struct nf_conntrack *nfct) list_del(&ct->master->expected_list); master = ct->master->expectant; } - kfree(ct->master); + kmem_cache_free(ip_conntrack_expect_cachep, ct->master); } WRITE_UNLOCK(&ip_conntrack_lock); @@ -923,9 +924,8 @@ struct ip_conntrack_expect * ip_conntrack_expect_alloc(void) { struct ip_conntrack_expect *new; - - new = (struct ip_conntrack_expect *) - kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC); + + new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC); if (!new) { DEBUGP("expect_related: OOM allocating expect\n"); return NULL; @@ -933,6 +933,7 @@ ip_conntrack_expect_alloc(void) /* tuple_cmp compares whole union, we have to initialized cleanly */ memset(new, 0, sizeof(struct ip_conntrack_expect)); + atomic_set(&new->use, 1); return new; } @@ -944,7 +945,6 @@ ip_conntrack_expect_insert(struct ip_conntrack_expect *new, DEBUGP("new expectation %p of conntrack %p\n", new, related_to); new->expectant = related_to; new->sibling = NULL; - atomic_set(&new->use, 1); /* add to expected list for this connection */ list_add_tail(&new->expected_list, &related_to->sibling_list); @@ -997,7 +997,8 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, } WRITE_UNLOCK(&ip_conntrack_lock); - kfree(expect); + /* This expectation is not inserted so no need to lock */ + kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EEXIST; } else if (related_to->helper->max_expected && @@ -1015,7 +1016,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, related_to->helper->name, NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); - kfree(expect); + kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EPERM; } DEBUGP("ip_conntrack: max number of expected " @@ -1049,7 +1050,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, WRITE_UNLOCK(&ip_conntrack_lock); DEBUGP("expect_related: busy!\n"); - kfree(expect); + kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EBUSY; } @@ -1368,6 +1369,7 @@ void ip_conntrack_cleanup(void) } kmem_cache_destroy(ip_conntrack_cachep); + kmem_cache_destroy(ip_conntrack_expect_cachep); vfree(ip_conntrack_hash); nf_unregister_sockopt(&so_getorigdst); } @@ -1420,6 +1422,15 @@ int __init ip_conntrack_init(void) printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); goto err_free_hash; } + + ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect", + sizeof(struct ip_conntrack_expect), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!ip_conntrack_expect_cachep) { + printk(KERN_ERR "Unable to create ip_expect slab cache\n"); + goto err_free_conntrack_slab; + } + /* Don't NEED lock here, but good form anyway. */ WRITE_LOCK(&ip_conntrack_lock); /* Sew in builtin protocols. */ @@ -1447,6 +1458,8 @@ int __init ip_conntrack_init(void) return ret; +err_free_conntrack_slab: + kmem_cache_destroy(ip_conntrack_cachep); err_free_hash: vfree(ip_conntrack_hash); err_unreg_sockopt: -- cgit v1.2.3 From fd8bcd0027282c466dde59902c4d101b57e3f7dd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 08:45:50 -0700 Subject: [NETFILTER]: Connection based accounting This patch adds a config option to enable per-flow packet and byte accounting to ip_conntrack. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 17 ++++++++++++++-- net/ipv4/netfilter/Kconfig | 4 ++++ net/ipv4/netfilter/ip_conntrack_amanda.c | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 26 +++++++++++++++++++++---- net/ipv4/netfilter/ip_conntrack_proto_generic.c | 4 ++-- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 2 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 2 +- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 7 ++++--- net/ipv4/netfilter/ip_conntrack_standalone.c | 17 +++++++++++++++- 9 files changed, 66 insertions(+), 15 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 1974f162f5a0..824f10875720 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -156,6 +156,12 @@ struct ip_conntrack_expect union ip_conntrack_expect_help help; }; +struct ip_conntrack_counter +{ + u_int64_t packets; + u_int64_t bytes; +}; + struct ip_conntrack_helper; struct ip_conntrack @@ -173,6 +179,11 @@ struct ip_conntrack /* Timer function; drops refcnt when it goes off. */ struct timer_list timeout; +#ifdef CONFIG_IP_NF_CT_ACCT + /* Accounting Information (same cache line as other written members) */ + struct ip_conntrack_counter counters[IP_CT_DIR_MAX]; +#endif + /* If we're expecting another related connection, this will be in expected linked list */ struct list_head sibling_list; @@ -245,8 +256,10 @@ extern int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig); /* Refresh conntrack for this many jiffies */ -extern void ip_ct_refresh(struct ip_conntrack *ct, - unsigned long extra_jiffies); +extern void ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies); /* These are for NAT. Icky. */ /* Call me when a conntrack is destroyed. */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index b58141ead442..39ef9751bfe6 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -628,5 +628,9 @@ config IP_NF_MATCH_REALM If you want to compile it as a module, say M here and read Documentation/modules.txt. If unsure, say `N'. +config IP_NF_CT_ACCT + bool "Connection tracking flow accounting" + depends on IP_NF_CONNTRACK + endmenu diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index 5a328192067e..fc741925911a 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -59,7 +59,7 @@ static int help(struct sk_buff *skb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - ip_ct_refresh(ct, master_timeout * HZ); + ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); /* No data? */ dataoff = skb->nh.iph->ihl*4 + sizeof(struct udphdr); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 141293477c1d..19bd1a9560a0 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1165,21 +1165,39 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) synchronize_net(); } -/* Refresh conntrack for this many jiffies. */ -void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies) +static inline void ct_add_counters(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb) +{ +#ifdef CONFIG_IP_NF_CT_ACCT + if (skb) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + ntohs(skb->nh.iph->tot_len); + } +#endif +} + +/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ +void ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies) { IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); /* If not in hash table, timer will not be active yet */ - if (!is_confirmed(ct)) + if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - else { + ct_add_counters(ct, ctinfo, skb); + } else { WRITE_LOCK(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); } + ct_add_counters(ct, ctinfo, skb); WRITE_UNLOCK(&ip_conntrack_lock); } } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c index 0df558a58020..6a7db7754512 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c @@ -50,9 +50,9 @@ static unsigned int generic_print_conntrack(char *buffer, /* Returns verdict for packet, or -1 for invalid. */ static int packet(struct ip_conntrack *conntrack, const struct sk_buff *skb, - enum ip_conntrack_info conntrackinfo) + enum ip_conntrack_info ctinfo) { - ip_ct_refresh(conntrack, ip_ct_generic_timeout); + ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 47114840fa84..e854193eb768 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -94,7 +94,7 @@ static int icmp_packet(struct ip_conntrack *ct, ct->timeout.function((unsigned long)ct); } else { atomic_inc(&ct->proto.icmp.count); - ip_ct_refresh(ct, ip_ct_icmp_timeout); + ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout); } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 463cafa6692a..73fe0401d5ce 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -225,7 +225,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, set_bit(IPS_ASSURED_BIT, &conntrack->status); out: WRITE_UNLOCK(&tcp_lock); - ip_ct_refresh(conntrack, *tcp_timeouts[newconntrack]); + ip_ct_refresh_acct(conntrack, ctinfo, skb, *tcp_timeouts[newconntrack]); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index a63c32d1840e..a69e14b5c9a2 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -60,16 +60,17 @@ static unsigned int udp_print_conntrack(char *buffer, /* Returns verdict for packet, and may modify conntracktype */ static int udp_packet(struct ip_conntrack *conntrack, const struct sk_buff *skb, - enum ip_conntrack_info conntrackinfo) + enum ip_conntrack_info ctinfo) { /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { - ip_ct_refresh(conntrack, ip_ct_udp_timeout_stream); + ip_ct_refresh_acct(conntrack, ctinfo, skb, + ip_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ set_bit(IPS_ASSURED_BIT, &conntrack->status); } else - ip_ct_refresh(conntrack, ip_ct_udp_timeout); + ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 80edac904188..0d78eb0f0f93 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -83,6 +83,17 @@ print_expect(char *buffer, const struct ip_conntrack_expect *expect) return len; } +#ifdef CONFIG_IP_NF_CT_ACCT +static unsigned int +print_counters(char *buffer, struct ip_conntrack_counter *counter) +{ + return sprintf(buffer, "packets=%llu bytes=%llu ", + counter->packets, counter->bytes); +} +#else +#define print_counters(x, y) 0 +#endif + static unsigned int print_conntrack(char *buffer, struct ip_conntrack *conntrack) { @@ -102,11 +113,15 @@ print_conntrack(char *buffer, struct ip_conntrack *conntrack) len += print_tuple(buffer + len, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, proto); + len += print_counters(buffer + len, + &conntrack->counters[IP_CT_DIR_ORIGINAL]); if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) len += sprintf(buffer + len, "[UNREPLIED] "); len += print_tuple(buffer + len, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, proto); + len += print_counters(buffer + len, + &conntrack->counters[IP_CT_DIR_REPLY]); if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) len += sprintf(buffer + len, "[ASSURED] "); len += sprintf(buffer + len, "use=%u ", @@ -638,7 +653,7 @@ EXPORT_SYMBOL(need_ip_conntrack); EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_selective_cleanup); -EXPORT_SYMBOL(ip_ct_refresh); +EXPORT_SYMBOL(ip_ct_refresh_acct); EXPORT_SYMBOL(ip_ct_find_proto); EXPORT_SYMBOL(__ip_ct_find_proto); EXPORT_SYMBOL(ip_ct_find_helper); -- cgit v1.2.3 From 331c9e9ad112e08f8b224817083ae2ef6126409a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 09:00:49 -0700 Subject: [NETFILTER]: Move /proc/net/ip_conntrack to seq_file This patch makes ip_conntrack use the seq_file API Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- .../linux/netfilter_ipv4/ip_conntrack_protocol.h | 5 + net/ipv4/netfilter/ip_conntrack_standalone.c | 297 +++++++++++++-------- 2 files changed, 198 insertions(+), 104 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index 56e37ef255b7..55531ad34192 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -3,6 +3,11 @@ #define _IP_CONNTRACK_PROTOCOL_H #include +/* length of buffer to which print_tuple/print_conntrack members are + * writing */ + +#define IP_CT_PRINT_BUFLEN 100 + struct ip_conntrack_protocol { /* Next pointer. */ diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 0d78eb0f0f93..a5dc49b128e7 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -63,142 +64,224 @@ print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple, return len; } -/* FIXME: Don't print source proto part. --RR */ -static unsigned int -print_expect(char *buffer, const struct ip_conntrack_expect *expect) -{ - unsigned int len; - - if (expect->expectant->helper->timeout) - len = sprintf(buffer, "EXPECTING: %lu ", - timer_pending(&expect->timeout) - ? (expect->timeout.expires - jiffies)/HZ : 0); - else - len = sprintf(buffer, "EXPECTING: - "); - len += sprintf(buffer + len, "use=%u proto=%u ", - atomic_read(&expect->use), expect->tuple.dst.protonum); - len += print_tuple(buffer + len, &expect->tuple, - __ip_ct_find_proto(expect->tuple.dst.protonum)); - len += sprintf(buffer + len, "\n"); - return len; -} - #ifdef CONFIG_IP_NF_CT_ACCT static unsigned int -print_counters(char *buffer, struct ip_conntrack_counter *counter) +seq_print_counters(struct seq_file *s, struct ip_conntrack_counter *counter) { - return sprintf(buffer, "packets=%llu bytes=%llu ", - counter->packets, counter->bytes); + return seq_printf(s, "packets=%llu bytes=%llu ", + counter->packets, counter->bytes); } #else -#define print_counters(x, y) 0 +#define seq_print_counters(x, y) 0 #endif -static unsigned int -print_conntrack(char *buffer, struct ip_conntrack *conntrack) +static void *ct_seq_start(struct seq_file *s, loff_t *pos) { - unsigned int len; - struct ip_conntrack_protocol *proto - = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); + unsigned int *bucket; - len = sprintf(buffer, "%-8s %u %lu ", - proto->name, - conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum, - timer_pending(&conntrack->timeout) - ? (conntrack->timeout.expires - jiffies)/HZ : 0); - - len += proto->print_conntrack(buffer + len, conntrack); - len += print_tuple(buffer + len, - &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - proto); - len += print_counters(buffer + len, - &conntrack->counters[IP_CT_DIR_ORIGINAL]); - if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) - len += sprintf(buffer + len, "[UNREPLIED] "); - len += print_tuple(buffer + len, - &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, - proto); - len += print_counters(buffer + len, - &conntrack->counters[IP_CT_DIR_REPLY]); - if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) - len += sprintf(buffer + len, "[ASSURED] "); - len += sprintf(buffer + len, "use=%u ", - atomic_read(&conntrack->ct_general.use)); - len += sprintf(buffer + len, "\n"); + /* strange seq_file api calls stop even if we fail, + * thus we need to grab lock since stop unlocks */ + READ_LOCK(&ip_conntrack_lock); + + if (*pos >= ip_conntrack_htable_size) + return NULL; - return len; + bucket = kmalloc(sizeof(unsigned int), GFP_KERNEL); + if (!bucket) { + return ERR_PTR(-ENOMEM); + } + + *bucket = *pos; + return bucket; } + +static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + unsigned int *bucket = (unsigned int *) v; -/* Returns true when finished. */ -static inline int -conntrack_iterate(const struct ip_conntrack_tuple_hash *hash, - char *buffer, off_t offset, off_t *upto, - unsigned int *len, unsigned int maxlen) + *pos = ++(*bucket); + if (*pos >= ip_conntrack_htable_size) { + kfree(v); + return NULL; + } + return bucket; +} + +static void ct_seq_stop(struct seq_file *s, void *v) { - unsigned int newlen; - IP_NF_ASSERT(hash->ctrack); + READ_UNLOCK(&ip_conntrack_lock); +} + +/* return 0 on success, 1 in case of error */ +static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash, + struct seq_file *s) +{ + struct ip_conntrack *conntrack = hash->ctrack; + struct ip_conntrack_protocol *proto; + char buffer[IP_CT_PRINT_BUFLEN]; MUST_BE_READ_LOCKED(&ip_conntrack_lock); - /* Only count originals */ + IP_NF_ASSERT(conntrack); + + /* we only want to print DIR_ORIGINAL */ if (DIRECTION(hash)) return 0; - if ((*upto)++ < offset) - return 0; + proto = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + IP_NF_ASSERT(proto); + + if (seq_printf(s, "%-8s %u %lu ", + proto->name, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, + timer_pending(&conntrack->timeout) + ? (conntrack->timeout.expires - jiffies)/HZ : 0) != 0) + return 1; + + proto->print_conntrack(buffer, conntrack); + if (seq_puts(s, buffer)) + return 1; + + print_tuple(buffer, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + proto); + + if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) + return 1; + + if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) + if (seq_printf(s, "[UNREPLIED] ")) + return 1; + + print_tuple(buffer, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, + proto); + if (seq_puts(s, buffer)) + return 1; + + if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) + return 1; - newlen = print_conntrack(buffer + *len, hash->ctrack); - if (*len + newlen > maxlen) + if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) + if (seq_printf(s, "[ASSURED] ")) + return 1; + + if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) return 1; - else *len += newlen; return 0; } -static int -list_conntracks(char *buffer, char **start, off_t offset, int length) + +static int ct_seq_show(struct seq_file *s, void *v) { - unsigned int i; - unsigned int len = 0; - off_t upto = 0; - struct list_head *e; + unsigned int *bucket = (unsigned int *) v; - READ_LOCK(&ip_conntrack_lock); - /* Traverse hash; print originals then reply. */ - for (i = 0; i < ip_conntrack_htable_size; i++) { - if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate, - struct ip_conntrack_tuple_hash *, - buffer, offset, &upto, &len, length)) - goto finished; + if (LIST_FIND(&ip_conntrack_hash[*bucket], ct_seq_real_show, + struct ip_conntrack_tuple_hash *, s)) { + /* buffer was filled and unable to print that tuple */ + return 1; } + return 0; +} + +static struct seq_operations ct_seq_ops = { + .start = ct_seq_start, + .next = ct_seq_next, + .stop = ct_seq_stop, + .show = ct_seq_show +}; + +static int ct_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ct_seq_ops); +} - /* Now iterate through expecteds. */ +static struct file_operations ct_file_ops = { + .owner = THIS_MODULE, + .open = ct_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* expects */ +static void *exp_seq_start(struct seq_file *s, loff_t *pos) +{ + struct list_head *e = &ip_conntrack_expect_list; + loff_t i; + + /* strange seq_file api calls stop even if we fail, + * thus we need to grab lock since stop unlocks */ + READ_LOCK(&ip_conntrack_lock); READ_LOCK(&ip_conntrack_expect_tuple_lock); - list_for_each(e, &ip_conntrack_expect_list) { - unsigned int last_len; - struct ip_conntrack_expect *expect - = (struct ip_conntrack_expect *)e; - if (upto++ < offset) continue; - - last_len = len; - len += print_expect(buffer + len, expect); - if (len > length) { - len = last_len; - goto finished_expects; - } + + if (list_empty(e)) + return NULL; + + for (i = 0; i <= *pos; i++) { + e = e->next; + if (e == &ip_conntrack_expect_list) + return NULL; } + return e; +} + +static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct list_head *e = v; - finished_expects: + e = e->next; + + if (e == &ip_conntrack_expect_list) + return NULL; + + return e; +} + +static void exp_seq_stop(struct seq_file *s, void *v) +{ READ_UNLOCK(&ip_conntrack_expect_tuple_lock); - finished: READ_UNLOCK(&ip_conntrack_lock); +} - /* `start' hack - see fs/proc/generic.c line ~165 */ - *start = (char *)((unsigned int)upto - offset); - return len; +static int exp_seq_show(struct seq_file *s, void *v) +{ + struct ip_conntrack_expect *expect = v; + char buffer[IP_CT_PRINT_BUFLEN]; + + if (expect->expectant->helper->timeout) + seq_printf(s, "%lu ", timer_pending(&expect->timeout) + ? (expect->timeout.expires - jiffies)/HZ : 0); + else + seq_printf(s, "- "); + + seq_printf(s, "use=%u proto=%u ", atomic_read(&expect->use), + expect->tuple.dst.protonum); + + print_tuple(buffer, &expect->tuple, + __ip_ct_find_proto(expect->tuple.dst.protonum)); + return seq_printf(s, "%s\n", buffer); +} + +static struct seq_operations exp_seq_ops = { + .start = exp_seq_start, + .next = exp_seq_next, + .stop = exp_seq_stop, + .show = exp_seq_show +}; + +static int exp_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &exp_seq_ops); } + +static struct file_operations exp_file_ops = { + .owner = THIS_MODULE, + .open = exp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; static unsigned int ip_confirm(unsigned int hooknum, struct sk_buff **pskb, @@ -509,7 +592,7 @@ static ctl_table ip_ct_net_table[] = { #endif static int init_or_cleanup(int init) { - struct proc_dir_entry *proc; + struct proc_dir_entry *proc, *proc_exp; int ret = 0; if (!init) goto cleanup; @@ -518,14 +601,18 @@ static int init_or_cleanup(int init) if (ret < 0) goto cleanup_nothing; - proc = proc_net_create("ip_conntrack", 0440, list_conntracks); + proc = proc_net_create("ip_conntrack", 0440, NULL); if (!proc) goto cleanup_init; - proc->owner = THIS_MODULE; + proc->proc_fops = &ct_file_ops; + + proc_exp = proc_net_create("ip_conntrack_expect", 0440, NULL); + if (!proc_exp) goto cleanup_proc; + proc_exp->proc_fops = &exp_file_ops; ret = nf_register_hook(&ip_conntrack_defrag_ops); if (ret < 0) { printk("ip_conntrack: can't register pre-routing defrag hook.\n"); - goto cleanup_proc; + goto cleanup_proc_exp; } ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops); if (ret < 0) { @@ -577,6 +664,8 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_conntrack_defrag_local_out_ops); cleanup_defragops: nf_unregister_hook(&ip_conntrack_defrag_ops); +cleanup_proc_exp: + proc_net_remove("ip_conntrack_exp"); cleanup_proc: proc_net_remove("ip_conntrack"); cleanup_init: -- cgit v1.2.3 From 37306cb0d818cdc3846d8ffbfe2d717125ee6a9b Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 09:03:50 -0700 Subject: [NETFILTER]: New ip_sctp match This patch adds ipt_sctp, enabling iptables to match on sctp ports and chunktypes. Signed-off-by: Kiran Kumar Immidi Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_sctp.h | 107 +++++++++++++++++ net/ipv4/netfilter/Kconfig | 4 + net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_sctp.c | 201 ++++++++++++++++++++++++++++++++ 4 files changed, 313 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_sctp.h create mode 100644 net/ipv4/netfilter/ipt_sctp.c diff --git a/include/linux/netfilter_ipv4/ipt_sctp.h b/include/linux/netfilter_ipv4/ipt_sctp.h new file mode 100644 index 000000000000..e93a9ec99fc2 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_sctp.h @@ -0,0 +1,107 @@ +#ifndef _IPT_SCTP_H_ +#define _IPT_SCTP_H_ + +#define IPT_SCTP_SRC_PORTS 0x01 +#define IPT_SCTP_DEST_PORTS 0x02 +#define IPT_SCTP_CHUNK_TYPES 0x04 + +#define IPT_SCTP_VALID_FLAGS 0x07 + +#define ELEMCOUNT(x) (sizeof(x)/sizeof(x[0])) + + +struct ipt_sctp_flag_info { + u_int8_t chunktype; + u_int8_t flag; + u_int8_t flag_mask; +}; + +#define IPT_NUM_SCTP_FLAGS 4 + +struct ipt_sctp_info { + u_int16_t dpts[2]; /* Min, Max */ + u_int16_t spts[2]; /* Min, Max */ + + u_int32_t chunkmap[256 / sizeof (u_int32_t)]; /* Bit mask of chunks to be matched according to RFC 2960 */ + +#define SCTP_CHUNK_MATCH_ANY 0x01 /* Match if any of the chunk types are present */ +#define SCTP_CHUNK_MATCH_ALL 0x02 /* Match if all of the chunk types are present */ +#define SCTP_CHUNK_MATCH_ONLY 0x04 /* Match if these are the only chunk types present */ + + u_int32_t chunk_match_type; + struct ipt_sctp_flag_info flag_info[IPT_NUM_SCTP_FLAGS]; + int flag_count; + + u_int32_t flags; + u_int32_t invflags; +}; + +#define bytes(type) (sizeof(type) * 8) + +#define SCTP_CHUNKMAP_SET(chunkmap, type) \ + do { \ + chunkmap[type / bytes(u_int32_t)] |= \ + 1 << (type % bytes(u_int32_t)); \ + } while (0) + +#define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \ + do { \ + chunkmap[type / bytes(u_int32_t)] &= \ + ~(1 << (type % bytes(u_int32_t))); \ + } while (0) + +#define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \ +({ \ + (chunkmap[type / bytes (u_int32_t)] & \ + (1 << (type % bytes (u_int32_t)))) ? 1: 0; \ +}) + +#define SCTP_CHUNKMAP_RESET(chunkmap) \ + do { \ + int i; \ + for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + chunkmap[i] = 0; \ + } while (0) + +#define SCTP_CHUNKMAP_SET_ALL(chunkmap) \ + do { \ + int i; \ + for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + chunkmap[i] = ~0; \ + } while (0) + +#define SCTP_CHUNKMAP_COPY(destmap, srcmap) \ + do { \ + int i; \ + for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + destmap[i] = srcmap[i]; \ + } while (0) + +#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \ +({ \ + int i; \ + int flag = 1; \ + for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \ + if (chunkmap[i]) { \ + flag = 0; \ + break; \ + } \ + } \ + flag; \ +}) + +#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \ +({ \ + int i; \ + int flag = 1; \ + for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \ + if (chunkmap[i] != ~0) { \ + flag = 0; \ + break; \ + } \ + } \ + flag; \ +}) + +#endif /* _IPT_SCTP_H_ */ + diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 39ef9751bfe6..9f29305e3fdf 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -632,5 +632,9 @@ config IP_NF_CT_ACCT bool "Connection tracking flow accounting" depends on IP_NF_CONNTRACK +config IP_NF_MATCH_SCTP + tristate 'SCTP protocol match support' + depends on IP_NF_IPTABLES + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index bdb23fde133f..05b6be683257 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o +obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o diff --git a/net/ipv4/netfilter/ipt_sctp.c b/net/ipv4/netfilter/ipt_sctp.c new file mode 100644 index 000000000000..8f875940b8dc --- /dev/null +++ b/net/ipv4/netfilter/ipt_sctp.c @@ -0,0 +1,201 @@ +#include +#include +#include +#include + +#include +#include + +#ifdef DEBUG_SCTP +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ + || (!!((invflag) & (option)) ^ (cond))) + +static int +match_flags(const struct ipt_sctp_flag_info *flag_info, + const int flag_count, + u_int8_t chunktype, + u_int8_t chunkflags) +{ + int i; + + for (i = 0; i < flag_count; i++) { + if (flag_info[i].chunktype == chunktype) { + return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag; + } + } + + return 1; +} + +static int +match_packet(const struct sk_buff *skb, + const u_int32_t *chunkmap, + int chunk_match_type, + const struct ipt_sctp_flag_info *flag_info, + const int flag_count, + int *hotdrop) +{ + int offset; + u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)]; + sctp_chunkhdr_t sch; + +#ifdef DEBUG_SCTP + int i = 0; +#endif + + if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) { + SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap); + } + + offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t); + do { + if (skb_copy_bits(skb, offset, &sch, sizeof(sch)) < 0) { + duprintf("Dropping invalid SCTP packet.\n"); + *hotdrop = 1; + return 0; + } + + duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n", + ++i, offset, sch.type, htons(sch.length), sch.flags); + + offset += (htons(sch.length) + 3) & ~3; + + duprintf("skb->len: %d\toffset: %d\n", skb->len, offset); + + if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch.type)) { + switch (chunk_match_type) { + case SCTP_CHUNK_MATCH_ANY: + if (match_flags(flag_info, flag_count, + sch.type, sch.flags)) { + return 1; + } + break; + + case SCTP_CHUNK_MATCH_ALL: + if (match_flags(flag_info, flag_count, + sch.type, sch.flags)) { + SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch.type); + } + break; + + case SCTP_CHUNK_MATCH_ONLY: + if (!match_flags(flag_info, flag_count, + sch.type, sch.flags)) { + return 0; + } + break; + } + } else { + switch (chunk_match_type) { + case SCTP_CHUNK_MATCH_ONLY: + return 0; + } + } + } while (offset < skb->len); + + switch (chunk_match_type) { + case SCTP_CHUNK_MATCH_ALL: + return SCTP_CHUNKMAP_IS_CLEAR(chunkmap); + case SCTP_CHUNK_MATCH_ANY: + return 0; + case SCTP_CHUNK_MATCH_ONLY: + return 1; + } + + /* This will never be reached, but required to stop compiler whine */ + return 0; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_sctp_info *info; + sctp_sctphdr_t sh; + + info = (const struct ipt_sctp_info *)matchinfo; + + if (offset) { + duprintf("Dropping non-first fragment.. FIXME\n"); + return 0; + } + + if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &sh, sizeof(sh)) < 0) { + duprintf("Dropping evil TCP offset=0 tinygram.\n"); + *hotdrop = 1; + return 0; + } + duprintf("spt: %d\tdpt: %d\n", ntohs(sh.source), ntohs(sh.dest)); + + return SCCHECK(((ntohs(sh.source) >= info->spts[0]) + && (ntohs(sh.source) <= info->spts[1])), + IPT_SCTP_SRC_PORTS, info->flags, info->invflags) + && SCCHECK(((ntohs(sh.dest) >= info->dpts[0]) + && (ntohs(sh.dest) <= info->dpts[1])), + IPT_SCTP_DEST_PORTS, info->flags, info->invflags) + && SCCHECK(match_packet(skb, info->chunkmap, info->chunk_match_type, + info->flag_info, info->flag_count, + hotdrop), + IPT_SCTP_CHUNK_TYPES, info->flags, info->invflags); +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_sctp_info *info; + + info = (const struct ipt_sctp_info *)matchinfo; + + return ip->proto == IPPROTO_SCTP + && !(ip->invflags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_sctp_info)) + && !(info->flags & ~IPT_SCTP_VALID_FLAGS) + && !(info->invflags & ~IPT_SCTP_VALID_FLAGS) + && !(info->invflags & ~info->flags) + && ((!(info->flags & IPT_SCTP_CHUNK_TYPES)) || + (info->chunk_match_type & + (SCTP_CHUNK_MATCH_ALL + | SCTP_CHUNK_MATCH_ANY + | SCTP_CHUNK_MATCH_ONLY))); +} + +static struct ipt_match sctp_match = +{ + .list = { NULL, NULL}, + .name = "sctp", + .match = &match, + .checkentry = &checkentry, + .destroy = NULL, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&sctp_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&sctp_match); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Kiran Kumar Immidi"); +MODULE_DESCRIPTION("Match for SCTP protocol packets"); + -- cgit v1.2.3 From 32d6a4f944676c0b384d5167fbe4d5672e28da91 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 09:04:41 -0700 Subject: [NETFILTER]: Make 'helper' list of ip_nat_core static This patch makes the 'helper' symbol static to not pollute the namespace Signed-off-by: Rusty Russell Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat_helper.h | 5 +++-- net/ipv4/netfilter/ip_nat_core.c | 11 +---------- net/ipv4/netfilter/ip_nat_helper.c | 13 +++++++++++++ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_nat_helper.h b/include/linux/netfilter_ipv4/ip_nat_helper.h index 185a24a6a047..be6bb082d0ba 100644 --- a/include/linux/netfilter_ipv4/ip_nat_helper.h +++ b/include/linux/netfilter_ipv4/ip_nat_helper.h @@ -38,11 +38,12 @@ struct ip_nat_helper struct ip_nat_info *info); }; -extern struct list_head helpers; - extern int ip_nat_helper_register(struct ip_nat_helper *me); extern void ip_nat_helper_unregister(struct ip_nat_helper *me); +extern struct ip_nat_helper * +ip_nat_find_helper(const struct ip_conntrack_tuple *tuple); + /* These return true or false. */ extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb, struct ip_conntrack *ct, diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1c6b7810655a..d350134dacb1 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -49,7 +49,6 @@ static unsigned int ip_nat_htable_size; static struct list_head *bysource; static struct list_head *byipsproto; LIST_HEAD(protos); -LIST_HEAD(helpers); extern struct ip_nat_protocol unknown_nat_protocol; @@ -498,13 +497,6 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, return ret; } -static inline int -helper_cmp(const struct ip_nat_helper *helper, - const struct ip_conntrack_tuple *tuple) -{ - return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask); -} - /* Where to manip the reply packets (will be reverse manip). */ static unsigned int opposite_hook[NF_IP_NUMHOOKS] = { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING, @@ -643,8 +635,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, /* If there's a helper, assign it; based on new tuple. */ if (!conntrack->master) - info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, - &reply); + info->helper = ip_nat_find_helper(&reply); /* It's done. */ info->initialized |= (1 << HOOK2MANIP(hooknum)); diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index a49c722adbc1..2e8d021aff44 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -47,6 +47,7 @@ #define DUMP_OFFSET(x) #endif +static LIST_HEAD(helpers); DECLARE_LOCK(ip_nat_seqofs_lock); /* Setup TCP sequence correction given this change at this sequence */ @@ -419,6 +420,18 @@ int ip_nat_helper_register(struct ip_nat_helper *me) return ret; } +struct ip_nat_helper * +ip_nat_find_helper(const struct ip_conntrack_tuple *tuple) +{ + struct ip_nat_helper *h; + + READ_LOCK(&ip_nat_lock); + h = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, tuple); + READ_UNLOCK(&ip_nat_lock); + + return h; +} + static int kill_helper(const struct ip_conntrack *i, void *helper) { -- cgit v1.2.3 From 9e8605db81c3ccccda0f68960120f698f47e385a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 09:05:28 -0700 Subject: [NETFILTER]: init_conntrack() optimization This patch optimizes the code path during init_conntrack() Signed-off-by: Pablo Neira Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 51 +++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 19bd1a9560a0..2966684ac9ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -694,41 +694,48 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, struct ip_conntrack_expect *, tuple); READ_UNLOCK(&ip_conntrack_expect_tuple_lock); - /* If master is not in hash table yet (ie. packet hasn't left - this machine yet), how can other end know about expected? - Hence these are not the droids you are looking for (if - master ct never got confirmed, we'd hold a reference to it - and weird things would happen to future packets). */ - if (expected && !is_confirmed(expected->expectant)) - expected = NULL; - - /* Look up the conntrack helper for master connections only */ - if (!expected) - conntrack->helper = ip_ct_find_helper(&repl_tuple); + if (expected) { + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (!is_confirmed(expected->expectant)) { + conntrack->helper = ip_ct_find_helper(&repl_tuple); + goto end; + } - /* If the expectation is dying, then this is a loser. */ - if (expected - && expected->expectant->helper->timeout - && ! del_timer(&expected->timeout)) - expected = NULL; + /* Expectation is dying... */ + if (expected->expectant->helper->timeout + && !del_timer(&expected->timeout)) + goto end; - if (expected) { DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", conntrack, expected); /* Welcome, Mr. Bond. We've been expecting you... */ + IP_NF_ASSERT(master_ct(conntrack)); __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = expected; expected->sibling = conntrack; LIST_DELETE(&ip_conntrack_expect_list, expected); expected->expectant->expecting--; nf_conntrack_get(&master_ct(conntrack)->infos[0]); - } - atomic_inc(&ip_conntrack_count); + + /* this is a braindead... --pablo */ + atomic_inc(&ip_conntrack_count); + WRITE_UNLOCK(&ip_conntrack_lock); + + if (expected->expectfn) + expected->expectfn(conntrack); + + goto ret; + } else + conntrack->helper = ip_ct_find_helper(&repl_tuple); + +end: atomic_inc(&ip_conntrack_count); WRITE_UNLOCK(&ip_conntrack_lock); - if (expected && expected->expectfn) - expected->expectfn(conntrack); - return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; +ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; } /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ -- cgit v1.2.3 From 08d121b773543e79f455e81aaf277595b077b7df Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 09:06:12 -0700 Subject: [NETFILTER]: Move error tracking into conntrack protocol helper This patch moves icmp_error_track out of the generic conntrack core and into the icmp helper, where it really belongs. It also adds some generic infrastructure for logging packets that are 'out of spec'. Signed-off-by: Pablo Neira Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_core.h | 20 +-- .../linux/netfilter_ipv4/ip_conntrack_protocol.h | 16 +++ net/ipv4/netfilter/ip_conntrack_core.c | 113 +++------------ net/ipv4/netfilter/ip_conntrack_proto_generic.c | 16 ++- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 153 ++++++++++++++++++++- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 16 ++- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 63 ++++++++- net/ipv4/netfilter/ip_conntrack_standalone.c | 16 +++ net/ipv4/netfilter/ip_fw_compat_masq.c | 7 +- 9 files changed, 299 insertions(+), 121 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 4c8b5d189089..9a31e96b7ab7 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -21,15 +21,17 @@ extern struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol); extern struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol); extern struct list_head protocol_list; -/* Returns conntrack if it dealt with ICMP, and filled in skb->nfct */ -extern struct ip_conntrack *icmp_error_track(struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, - unsigned int hooknum); -extern int get_tuple(const struct iphdr *iph, - const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_protocol *protocol); +extern int +ip_ct_get_tuple(const struct iphdr *iph, + const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_protocol *protocol); + +extern int +ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig, + const struct ip_conntrack_protocol *protocol); /* Find a connection corresponding to a tuple. */ struct ip_conntrack_tuple_hash * diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index 55531ad34192..55d57404acb8 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -55,6 +55,9 @@ struct ip_conntrack_protocol int (*exp_matches_pkt)(struct ip_conntrack_expect *exp, const struct sk_buff *skb); + int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, + unsigned int hooknum); + /* Module (if any) which this is connected to. */ struct module *me; }; @@ -68,4 +71,17 @@ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp; extern int ip_conntrack_protocol_tcp_init(void); + +/* Log invalid packets */ +extern unsigned int ip_ct_log_invalid; + +#ifdef DEBUG_INVALID_PACKETS +#define LOG_INVALID(proto) \ + (ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) +#else +#define LOG_INVALID(proto) \ + ((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \ + && net_ratelimit()) +#endif + #endif /*_IP_CONNTRACK_PROTOCOL_H*/ diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 2966684ac9ec..8d23d6829cef 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -128,11 +128,11 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple) } int -get_tuple(const struct iphdr *iph, - const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_protocol *protocol) +ip_ct_get_tuple(const struct iphdr *iph, + const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_protocol *protocol) { /* Never happen */ if (iph->frag_off & htons(IP_OFFSET)) { @@ -148,10 +148,10 @@ get_tuple(const struct iphdr *iph, return protocol->pkt_to_tuple(skb, dataoff, tuple); } -static int -invert_tuple(struct ip_conntrack_tuple *inverse, - const struct ip_conntrack_tuple *orig, - const struct ip_conntrack_protocol *protocol) +int +ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig, + const struct ip_conntrack_protocol *protocol) { inverse->src.ip = orig->dst.ip; inverse->dst.ip = orig->src.ip; @@ -497,83 +497,6 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, return h != NULL; } -/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ -struct ip_conntrack * -icmp_error_track(struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, - unsigned int hooknum) -{ - struct ip_conntrack_tuple innertuple, origtuple; - struct { - struct icmphdr icmp; - struct iphdr ip; - } inside; - struct ip_conntrack_protocol *innerproto; - struct ip_conntrack_tuple_hash *h; - int dataoff; - - IP_NF_ASSERT(skb->nfct == NULL); - - /* Not enough header? */ - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0) - return NULL; - - if (inside.icmp.type != ICMP_DEST_UNREACH - && inside.icmp.type != ICMP_SOURCE_QUENCH - && inside.icmp.type != ICMP_TIME_EXCEEDED - && inside.icmp.type != ICMP_PARAMETERPROB - && inside.icmp.type != ICMP_REDIRECT) - return NULL; - - /* Ignore ICMP's containing fragments (shouldn't happen) */ - if (inside.ip.frag_off & htons(IP_OFFSET)) { - DEBUGP("icmp_error_track: fragment of proto %u\n", - inside.ip.protocol); - return NULL; - } - - innerproto = ip_ct_find_proto(inside.ip.protocol); - dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4; - /* Are they talking about one of our connections? */ - if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) { - DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol); - return NULL; - } - - /* Ordinarily, we'd expect the inverted tupleproto, but it's - been preserved inside the ICMP. */ - if (!invert_tuple(&innertuple, &origtuple, innerproto)) { - DEBUGP("icmp_error_track: Can't invert tuple\n"); - return NULL; - } - - *ctinfo = IP_CT_RELATED; - - h = ip_conntrack_find_get(&innertuple, NULL); - if (!h) { - /* Locally generated ICMPs will match inverted if they - haven't been SNAT'ed yet */ - /* FIXME: NAT code has to handle half-done double NAT --RR */ - if (hooknum == NF_IP_LOCAL_OUT) - h = ip_conntrack_find_get(&origtuple, NULL); - - if (!h) { - DEBUGP("icmp_error_track: no match\n"); - return NULL; - } - /* Reverse direction from that found */ - if (DIRECTION(h) != IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } else { - if (DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } - - /* Update skb to refer to this connection */ - skb->nfct = &h->ctrack->infos[*ctinfo]; - return h->ctrack; -} - /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ static inline int unreplied(const struct ip_conntrack_tuple_hash *i) @@ -655,7 +578,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, } } - if (!invert_tuple(&repl_tuple, tuple, protocol)) { + if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { DEBUGP("Can't invert tuple.\n"); return NULL; } @@ -751,7 +674,8 @@ resolve_normal_ct(struct sk_buff *skb, IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); - if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto)) + if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, + &tuple,proto)) return NULL; /* look for tuple match */ @@ -836,10 +760,12 @@ unsigned int ip_conntrack_in(unsigned int hooknum, proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); - /* It may be an icmp error... */ - if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP - && icmp_error_track(*pskb, &ctinfo, hooknum)) - return NF_ACCEPT; + /* It may be an special packet, error, unclean... + * inverse of the return code tells to the netfilter + * core what to do with the packet. */ + if (proto->error != NULL + && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) + return -ret; if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) /* Not valid part of a connection */ @@ -877,7 +803,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum, int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig) { - return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum)); + return ip_ct_invert_tuple(inverse, orig, + ip_ct_find_proto(orig->dst.protonum)); } static inline int resent_expect(const struct ip_conntrack_expect *i, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c index 6a7db7754512..c7a913149b8e 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c @@ -62,8 +62,14 @@ static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb) return 1; } -struct ip_conntrack_protocol ip_conntrack_generic_protocol -= { { NULL, NULL }, 0, "unknown", - generic_pkt_to_tuple, generic_invert_tuple, generic_print_tuple, - generic_print_conntrack, packet, new, NULL, NULL, NULL }; - +struct ip_conntrack_protocol ip_conntrack_generic_protocol = +{ + .proto = 0, + .name = "unknown", + .pkt_to_tuple = generic_pkt_to_tuple, + .invert_tuple = generic_invert_tuple, + .print_tuple = generic_print_tuple, + .print_conntrack = generic_print_conntrack, + .packet = packet, + .new = new, +}; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index e854193eb768..b833a7089433 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -12,6 +12,11 @@ #include #include #include +#include +#include +#include +#include +#include #include unsigned long ip_ct_icmp_timeout = 30*HZ; @@ -122,7 +127,147 @@ static int icmp_new(struct ip_conntrack *conntrack, return 1; } -struct ip_conntrack_protocol ip_conntrack_protocol_icmp -= { { NULL, NULL }, IPPROTO_ICMP, "icmp", - icmp_pkt_to_tuple, icmp_invert_tuple, icmp_print_tuple, - icmp_print_conntrack, icmp_packet, icmp_new, NULL, NULL, NULL }; +static int +icmp_error_message(struct sk_buff *skb, + enum ip_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct ip_conntrack_tuple innertuple, origtuple; + struct { + struct icmphdr icmp; + struct iphdr ip; + } inside; + struct ip_conntrack_protocol *innerproto; + struct ip_conntrack_tuple_hash *h; + int dataoff; + + IP_NF_ASSERT(skb->nfct == NULL); + + /* Not enough header? */ + if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0) + return NF_ACCEPT; + + /* Ignore ICMP's containing fragments (shouldn't happen) */ + if (inside.ip.frag_off & htons(IP_OFFSET)) { + DEBUGP("icmp_error_track: fragment of proto %u\n", + inside.ip.protocol); + return NF_ACCEPT; + } + + innerproto = ip_ct_find_proto(inside.ip.protocol); + dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4; + /* Are they talking about one of our connections? */ + if (!ip_ct_get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) { + DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol); + return NF_ACCEPT; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { + DEBUGP("icmp_error_track: Can't invert tuple\n"); + return NF_ACCEPT; + } + + *ctinfo = IP_CT_RELATED; + + h = ip_conntrack_find_get(&innertuple, NULL); + if (!h) { + /* Locally generated ICMPs will match inverted if they + haven't been SNAT'ed yet */ + /* FIXME: NAT code has to handle half-done double NAT --RR */ + if (hooknum == NF_IP_LOCAL_OUT) + h = ip_conntrack_find_get(&origtuple, NULL); + + if (!h) { + DEBUGP("icmp_error_track: no match\n"); + return NF_ACCEPT; + } + /* Reverse direction from that found */ + if (DIRECTION(h) != IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + } else { + if (DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + } + + /* Update skb to refer to this connection */ + skb->nfct = &h->ctrack->infos[*ctinfo]; + return -NF_ACCEPT; +} + +/* Small and modified version of icmp_rcv */ +static int +icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct icmphdr icmph; + + /* Not enough header? */ + if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph))!=0) { + if (LOG_INVALID(IPPROTO_ICMP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_icmp: short packet "); + return -NF_ACCEPT; + } + + /* See ip_conntrack_proto_tcp.c */ + if (hooknum != NF_IP_PRE_ROUTING) + goto checksum_skipped; + + switch (skb->ip_summed) { + case CHECKSUM_HW: + if (!(u16)csum_fold(skb->csum)) + break; + if (LOG_INVALID(IPPROTO_ICMP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_icmp: bad HW ICMP checksum "); + return -NF_ACCEPT; + case CHECKSUM_NONE: + if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { + if (LOG_INVALID(IPPROTO_ICMP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_icmp: bad ICMP checksum "); + return -NF_ACCEPT; + } + default: + break; + } + +checksum_skipped: + /* + * 18 is the highest 'known' ICMP type. Anything else is a mystery + * + * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently + * discarded. + */ + if (icmph.type > NR_ICMP_TYPES) { + if (LOG_INVALID(IPPROTO_ICMP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_icmp: invalid ICMP type "); + return -NF_ACCEPT; + } + + /* Need to track icmp error message? */ + if (icmph.type != ICMP_DEST_UNREACH + && icmph.type != ICMP_SOURCE_QUENCH + && icmph.type != ICMP_TIME_EXCEEDED + && icmph.type != ICMP_PARAMETERPROB + && icmph.type != ICMP_REDIRECT) + return NF_ACCEPT; + + return icmp_error_message(skb, ctinfo, hooknum); +} + +struct ip_conntrack_protocol ip_conntrack_protocol_icmp = +{ + .proto = IPPROTO_ICMP, + .name = "icmp", + .pkt_to_tuple = icmp_pkt_to_tuple, + .invert_tuple = icmp_invert_tuple, + .print_tuple = icmp_print_tuple, + .print_conntrack = icmp_print_conntrack, + .packet = icmp_packet, + .new = icmp_new, + .error = icmp_error, +}; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 73fe0401d5ce..6344fa0c53b2 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -268,7 +268,15 @@ static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp, return between(exp->seq, ntohl(tcph.seq), ntohl(tcph.seq) + datalen); } -struct ip_conntrack_protocol ip_conntrack_protocol_tcp -= { { NULL, NULL }, IPPROTO_TCP, "tcp", - tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack, - tcp_packet, tcp_new, NULL, tcp_exp_matches_pkt, NULL }; +struct ip_conntrack_protocol ip_conntrack_protocol_tcp = +{ + .proto = IPPROTO_TCP, + .name = "tcp", + .pkt_to_tuple = tcp_pkt_to_tuple, + .invert_tuple = tcp_invert_tuple, + .print_tuple = tcp_print_tuple, + .print_conntrack = tcp_print_conntrack, + .packet = tcp_packet, + .new = tcp_new, + .exp_matches_pkt = tcp_exp_matches_pkt, +}; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index a69e14b5c9a2..85ea29203f7a 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include unsigned long ip_ct_udp_timeout = 30*HZ; @@ -81,7 +83,60 @@ static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb) return 1; } -struct ip_conntrack_protocol ip_conntrack_protocol_udp -= { { NULL, NULL }, IPPROTO_UDP, "udp", - udp_pkt_to_tuple, udp_invert_tuple, udp_print_tuple, udp_print_conntrack, - udp_packet, udp_new, NULL, NULL, NULL }; +static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct iphdr *iph = skb->nh.iph; + unsigned int udplen = skb->len - iph->ihl * 4; + struct udphdr hdr; + + /* Header is too small? */ + if (skb_copy_bits(skb, iph->ihl*4, &hdr, sizeof(hdr)) != 0) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_udp: short packet "); + return -NF_ACCEPT; + } + + /* Truncated/malformed packets */ + if (ntohs(hdr.len) > udplen || ntohs(hdr.len) < sizeof(hdr)) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_udp: truncated/malformed packet "); + return -NF_ACCEPT; + } + + /* Packet with no checksum */ + if (!hdr.check) + return NF_ACCEPT; + + /* Checksum invalid? Ignore. + * We skip checking packets on the outgoing path + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. + * FIXME: Source route IP option packets --RR */ + if (hooknum == NF_IP_PRE_ROUTING + && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, iph->ihl*4, udplen, 0))) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_udp: bad UDP checksum "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} + +struct ip_conntrack_protocol ip_conntrack_protocol_udp = +{ + .proto = IPPROTO_UDP, + .name = "udp", + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .print_conntrack = udp_print_conntrack, + .packet = udp_packet, + .new = udp_new, + .error = udp_error, +}; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index a5dc49b128e7..f3e173db4daa 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -432,6 +432,11 @@ extern unsigned long ip_ct_icmp_timeout; /* From ip_conntrack_proto_icmp.c */ extern unsigned long ip_ct_generic_timeout; +/* Log invalid packets of a given protocol */ +unsigned int ip_ct_log_invalid = 0; +static int log_invalid_proto_min = 0; +static int log_invalid_proto_max = 255; + static struct ctl_table_header *ip_ct_sysctl_header; static ctl_table ip_ct_sysctl_table[] = { @@ -547,6 +552,17 @@ static ctl_table ip_ct_sysctl_table[] = { .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, + .procname = "ip_conntrack_log_invalid", + .data = &ip_ct_log_invalid, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &log_invalid_proto_min, + .extra2 = &log_invalid_proto_max, + }, { .ctl_name = 0 } }; diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c index fbd8b9bfeb28..48227fe19dc2 100644 --- a/net/ipv4/netfilter/ip_fw_compat_masq.c +++ b/net/ipv4/netfilter/ip_fw_compat_masq.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -144,7 +145,8 @@ check_for_demasq(struct sk_buff **pskb) switch ((*pskb)->nh.iph->protocol) { case IPPROTO_ICMP: /* ICMP errors. */ - ct = icmp_error_track(*pskb, &ctinfo, NF_IP_PRE_ROUTING); + protocol->error(*pskb, &ctinfo, NF_IP_PRE_ROUTING); + ct = (struct ip_conntrack *)(*pskb)->nfct->master; if (ct) { /* We only do SNAT in the compatibility layer. So we can manipulate ICMP errors from @@ -165,7 +167,8 @@ check_for_demasq(struct sk_buff **pskb) case IPPROTO_UDP: IP_NF_ASSERT(((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) == 0); - if (!get_tuple((*pskb)->nh.iph, *pskb, (*pskb)->nh.iph->ihl*4, &tuple, protocol)) { + if (!ip_ct_get_tuple((*pskb)->nh.iph, *pskb, + (*pskb)->nh.iph->ihl*4, &tuple, protocol)) { if (net_ratelimit()) printk("ip_fw_compat_masq: Can't get tuple\n"); return NF_ACCEPT; -- cgit v1.2.3 From 51b03285cde06db01f5135517d925e7eb32cc060 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 09:06:51 -0700 Subject: [NETFILTER]: Add conntrack runtime statistics This patch adds some runtime-statistics to the connection tracking core, pretty similar to what 'rtstat' does for the routing cache. This was the last patch in this incremental set. The only thing I still have pending at this time is the tcp window tracking code. Signed-off-by: Martin Josefsson Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 20 ++++++ net/ipv4/netfilter/ip_conntrack_core.c | 55 ++++++++++++---- net/ipv4/netfilter/ip_conntrack_standalone.c | 96 +++++++++++++++++++++++++++- 3 files changed, 158 insertions(+), 13 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 824f10875720..73cd72fdd3fe 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -284,6 +284,26 @@ static inline int is_confirmed(struct ip_conntrack *ct) } extern unsigned int ip_conntrack_htable_size; + +struct ip_conntrack_stat +{ + unsigned int searched; + unsigned int found; + unsigned int new; + unsigned int invalid; + unsigned int ignore; + unsigned int delete; + unsigned int delete_list; + unsigned int insert; + unsigned int insert_failed; + unsigned int drop; + unsigned int early_drop; + unsigned int icmp_error; + unsigned int expect_new; + unsigned int expect_create; + unsigned int expect_delete; +}; + /* eg. PROVIDES_CONNTRACK(ftp); */ #define PROVIDES_CONNTRACK(name) \ diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 8d23d6829cef..353223e65d12 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -35,6 +35,7 @@ #include #include #include +#include #include /* This rwlock protects the main hash table, protocol/helper/expected @@ -58,6 +59,7 @@ DECLARE_RWLOCK(ip_conntrack_lock); DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock); +static atomic_t ip_conntrack_count = ATOMIC_INIT(0); void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); @@ -65,12 +67,13 @@ LIST_HEAD(protocol_list); static LIST_HEAD(helpers); unsigned int ip_conntrack_htable_size = 0; int ip_conntrack_max; -static atomic_t ip_conntrack_count = ATOMIC_INIT(0); struct list_head *ip_conntrack_hash; static kmem_cache_t *ip_conntrack_cachep; static kmem_cache_t *ip_conntrack_expect_cachep; struct ip_conntrack ip_conntrack_untracked; +DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); + extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, @@ -179,6 +182,7 @@ destroy_expect(struct ip_conntrack_expect *exp) IP_NF_ASSERT(!timer_pending(&exp->timeout)); kmem_cache_free(ip_conntrack_expect_cachep, exp); + __get_cpu_var(ip_conntrack_stat).expect_delete++; } inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) @@ -347,12 +351,15 @@ destroy_conntrack(struct nf_conntrack *nfct) DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); kmem_cache_free(ip_conntrack_cachep, ct); atomic_dec(&ip_conntrack_count); + __get_cpu_var(ip_conntrack_stat).delete++; } static void death_by_timeout(unsigned long ul_conntrack) { struct ip_conntrack *ct = (void *)ul_conntrack; + __get_cpu_var(ip_conntrack_stat).delete_list++; + WRITE_LOCK(&ip_conntrack_lock); clean_from_lists(ct); WRITE_UNLOCK(&ip_conntrack_lock); @@ -375,13 +382,19 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, { struct ip_conntrack_tuple_hash *h; unsigned int hash = hash_conntrack(tuple); + /* use per_cpu() to avoid multiple calls to smp_processor_id() */ + unsigned int cpu = smp_processor_id(); MUST_BE_READ_LOCKED(&ip_conntrack_lock); - h = LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - tuple, ignored_conntrack); - return h; + list_for_each_entry(h, &ip_conntrack_hash[hash], list) { + if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + per_cpu(ip_conntrack_stat, cpu).found++; + return h; + } + per_cpu(ip_conntrack_stat, cpu).searched++; + } + + return NULL; } /* Find a connection corresponding to a tuple. */ @@ -475,10 +488,12 @@ __ip_conntrack_confirm(struct nf_ct_info *nfct) atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); WRITE_UNLOCK(&ip_conntrack_lock); + __get_cpu_var(ip_conntrack_stat).insert++; return NF_ACCEPT; } WRITE_UNLOCK(&ip_conntrack_lock); + __get_cpu_var(ip_conntrack_stat).insert_failed++; return NF_DROP; } @@ -522,6 +537,7 @@ static int early_drop(struct list_head *chain) if (del_timer(&h->ctrack->timeout)) { death_by_timeout((unsigned long)h->ctrack); dropped = 1; + __get_cpu_var(ip_conntrack_stat).early_drop++; } ip_conntrack_put(h->ctrack); return dropped; @@ -650,11 +666,16 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, if (expected->expectfn) expected->expectfn(conntrack); + + __get_cpu_var(ip_conntrack_stat).expect_new++; goto ret; - } else + } else { conntrack->helper = ip_ct_find_helper(&repl_tuple); + __get_cpu_var(ip_conntrack_stat).new++; + } + end: atomic_inc(&ip_conntrack_count); WRITE_UNLOCK(&ip_conntrack_lock); @@ -755,8 +776,10 @@ unsigned int ip_conntrack_in(unsigned int hooknum, #endif /* Previously seen (loopback or untracked)? Ignore. */ - if ((*pskb)->nfct) + if ((*pskb)->nfct) { + __get_cpu_var(ip_conntrack_stat).ignore++; return NF_ACCEPT; + } proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); @@ -764,16 +787,22 @@ unsigned int ip_conntrack_in(unsigned int hooknum, * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (proto->error != NULL - && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) + && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) { + __get_cpu_var(ip_conntrack_stat).icmp_error++; return -ret; + } - if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) + if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) { /* Not valid part of a connection */ + __get_cpu_var(ip_conntrack_stat).invalid++; return NF_ACCEPT; + } - if (IS_ERR(ct)) + if (IS_ERR(ct)) { /* Too stressed to deal. */ + __get_cpu_var(ip_conntrack_stat).drop++; return NF_DROP; + } IP_NF_ASSERT((*pskb)->nfct); @@ -782,6 +811,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, /* Invalid */ nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; + __get_cpu_var(ip_conntrack_stat).invalid++; return NF_ACCEPT; } @@ -789,6 +819,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, ret = ct->helper->help(*pskb, ct, ctinfo); if (ret == -1) { /* Invalid */ + __get_cpu_var(ip_conntrack_stat).invalid++; nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; return NF_ACCEPT; @@ -992,6 +1023,8 @@ out: ip_conntrack_expect_insert(expect, related_to); WRITE_UNLOCK(&ip_conntrack_lock); + __get_cpu_var(ip_conntrack_stat).expect_create++; + return ret; } diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index f3e173db4daa..74ae27430c17 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -44,6 +45,9 @@ MODULE_LICENSE("GPL"); +extern atomic_t ip_conntrack_count; +DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); + static int kill_proto(const struct ip_conntrack *i, void *data) { return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == @@ -283,6 +287,86 @@ static struct file_operations exp_file_ops = { .release = seq_release }; +static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + int cpu; + + for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu; + return &per_cpu(ip_conntrack_stat, cpu); + } + + return NULL; +} + +static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + int cpu; + + for (cpu = *pos + 1; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu; + return &per_cpu(ip_conntrack_stat, cpu); + } + + return NULL; +} + +static void ct_cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int ct_cpu_seq_show(struct seq_file *seq, void *v) +{ + unsigned int nr_conntracks = atomic_read(&ip_conntrack_count); + struct ip_conntrack_stat *st = v; + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x %08x %08x \n", + nr_conntracks, + st->searched, + st->found, + st->new, + st->invalid, + st->ignore, + st->delete, + st->delete_list, + st->insert, + st->insert_failed, + st->drop, + st->early_drop, + st->icmp_error, + + st->expect_new, + st->expect_create, + st->expect_delete + ); + return 0; +} + +static struct seq_operations ct_cpu_seq_ops = { + .start = ct_cpu_seq_start, + .next = ct_cpu_seq_next, + .stop = ct_cpu_seq_stop, + .show = ct_cpu_seq_show, +}; + +static int ct_cpu_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ct_cpu_seq_ops); +} + +static struct file_operations ct_cpu_seq_fops = { + .owner = THIS_MODULE, + .open = ct_cpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + static unsigned int ip_confirm(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, @@ -608,7 +692,7 @@ static ctl_table ip_ct_net_table[] = { #endif static int init_or_cleanup(int init) { - struct proc_dir_entry *proc, *proc_exp; + struct proc_dir_entry *proc, *proc_exp, *proc_stat; int ret = 0; if (!init) goto cleanup; @@ -625,10 +709,16 @@ static int init_or_cleanup(int init) if (!proc_exp) goto cleanup_proc; proc_exp->proc_fops = &exp_file_ops; + proc_stat = proc_net_fops_create("ip_conntrack_stat", S_IRUGO, + &ct_cpu_seq_fops); + if (!proc_stat) + goto cleanup_proc_exp; + proc_stat->owner = THIS_MODULE; + ret = nf_register_hook(&ip_conntrack_defrag_ops); if (ret < 0) { printk("ip_conntrack: can't register pre-routing defrag hook.\n"); - goto cleanup_proc_exp; + goto cleanup_proc_stat; } ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops); if (ret < 0) { @@ -680,6 +770,8 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_conntrack_defrag_local_out_ops); cleanup_defragops: nf_unregister_hook(&ip_conntrack_defrag_ops); + cleanup_proc_stat: + proc_net_remove("ip_conntrack_stat"); cleanup_proc_exp: proc_net_remove("ip_conntrack_exp"); cleanup_proc: -- cgit v1.2.3 From 753ae03fd51b7731e60d6a8dda4c8a919cc8cec2 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Jul 2004 09:08:32 -0700 Subject: [NETFILTER]: Add tcp window tracking This is the tcp window tracking patch, incremental to all previous changes. It is now by default enabled (i.e. in 'conservative' mode). If you think it's better to leave it disabled ('liberal' mode), you can change ip_conntrack_tcp_be_liberal to a different default value. Cheers, Signed-off-by: Jozsef Kadlecski Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 5 + include/linux/netfilter_ipv4/ip_conntrack_tcp.h | 37 +- net/ipv4/netfilter/ip_conntrack_core.c | 7 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 1001 ++++++++++++++++++++--- net/ipv4/netfilter/ip_conntrack_standalone.c | 36 + 5 files changed, 973 insertions(+), 113 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 73cd72fdd3fe..29bfa38006fe 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -262,6 +262,11 @@ extern void ip_ct_refresh_acct(struct ip_conntrack *ct, unsigned long extra_jiffies); /* These are for NAT. Icky. */ +/* Update TCP window tracking data when NAT mangles the packet */ +extern int ip_conntrack_tcp_update(struct sk_buff *skb, + struct ip_conntrack *conntrack, + int dir); + /* Call me when a conntrack is destroyed. */ extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack); diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h b/include/linux/netfilter_ipv4/ip_conntrack_tcp.h index d6698c911e11..0ab4590a0b16 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tcp.h @@ -4,25 +4,44 @@ enum tcp_conntrack { TCP_CONNTRACK_NONE, - TCP_CONNTRACK_ESTABLISHED, TCP_CONNTRACK_SYN_SENT, TCP_CONNTRACK_SYN_RECV, + TCP_CONNTRACK_ESTABLISHED, TCP_CONNTRACK_FIN_WAIT, - TCP_CONNTRACK_TIME_WAIT, - TCP_CONNTRACK_CLOSE, TCP_CONNTRACK_CLOSE_WAIT, TCP_CONNTRACK_LAST_ACK, + TCP_CONNTRACK_TIME_WAIT, + TCP_CONNTRACK_CLOSE, TCP_CONNTRACK_LISTEN, - TCP_CONNTRACK_MAX + TCP_CONNTRACK_MAX, + TCP_CONNTRACK_IGNORE +}; + +/* Window scaling is advertised by the sender */ +#define IP_CT_TCP_STATE_FLAG_WINDOW_SCALE 0x01 + +/* SACK is permitted by the sender */ +#define IP_CT_TCP_FLAG_SACK_PERM 0x02 + +struct ip_ct_tcp_state { + u_int32_t td_end; /* max of seq + len */ + u_int32_t td_maxend; /* max of ack + max(win, 1) */ + u_int32_t td_maxwin; /* max(win) */ + u_int8_t td_scale; /* window scale factor */ + u_int8_t loose; /* used when connection picked up from the middle */ + u_int8_t flags; /* per direction state flags */ }; struct ip_ct_tcp { - enum tcp_conntrack state; - - /* Poor man's window tracking: sequence number of valid ACK - handshake completion packet */ - u_int32_t handshake_ack; + struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */ + u_int8_t state; /* state of the connection (enum tcp_conntrack) */ + /* For detecting stale connections */ + u_int8_t last_dir; /* Direction of the last packet (enum ip_conntrack_dir) */ + u_int8_t retrans; /* Number of retransmitted packets */ + u_int8_t last_index; /* Index of the last packet */ + u_int32_t last_seq; /* Last sequence number seen in dir */ + u_int32_t last_end; /* Last seq + len */ }; #endif /* _IP_CONNTRACK_TCP_H */ diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 353223e65d12..34a7bbcc67d8 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -807,12 +807,13 @@ unsigned int ip_conntrack_in(unsigned int hooknum, IP_NF_ASSERT((*pskb)->nfct); ret = proto->packet(ct, *pskb, ctinfo); - if (ret == -1) { - /* Invalid */ + if (ret < 0) { + /* Invalid: inverse of the return code tells + * the netfilter core what to do*/ nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; __get_cpu_var(ip_conntrack_stat).invalid++; - return NF_ACCEPT; + return -ret; } if (ret != NF_DROP && ct->helper) { diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 6344fa0c53b2..a47351ba3e52 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -4,8 +4,22 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * + * Jozsef Kadlecsik : + * - Real stateful connection tracking + * - Modified state transitions table + * - Window scaling support added + * - SACK support added + * + * Willy Tarreau: + * - State table bugfixes + * - More robust state changes + * - Tuning timer parameters + * + * version 2.2 */ +#include #include #include #include @@ -14,16 +28,18 @@ #include #include #include -#include +#include #include +#include #include #include #include #if 0 #define DEBUGP printk +#define DEBUGP_VARS #else #define DEBUGP(format, args...) #endif @@ -31,28 +47,40 @@ /* Protects conntrack->proto.tcp */ static DECLARE_RWLOCK(tcp_lock); -/* FIXME: Examine ipfilter's timeouts and conntrack transitions more - closely. They're more complex. --RR */ +/* "Be conservative in what you do, + be liberal in what you accept from others." + If it's non-zero, we mark only out of window RST segments as INVALID. */ +int ip_ct_tcp_be_liberal = 0; -/* Actually, I believe that neither ipmasq (where this code is stolen - from) nor ipfilter do it exactly right. A new conntrack machine taking - into account packet loss (which creates uncertainty as to exactly - the conntrack of the connection) is required. RSN. --RR */ +/* When connection is picked up from the middle, how many packets are required + to pass in each direction when we assume we are in sync - if any side uses + window scaling, we lost the game. + If it is set to zero, we disable picking up already established + connections. */ +int ip_ct_tcp_loose = 3; + +/* Max number of the retransmitted packets without receiving an (acceptable) + ACK from the destination. If this number is reached, a shorter timer + will be started. */ +int ip_ct_tcp_max_retrans = 3; + + /* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR */ static const char *tcp_conntrack_names[] = { "NONE", - "ESTABLISHED", "SYN_SENT", "SYN_RECV", + "ESTABLISHED", "FIN_WAIT", - "TIME_WAIT", - "CLOSE", "CLOSE_WAIT", "LAST_ACK", + "TIME_WAIT", + "CLOSE", "LISTEN" }; - -#define SECS *HZ + +#define SECS * HZ #define MINS * 60 SECS #define HOURS * 60 MINS #define DAYS * 24 HOURS @@ -66,55 +94,204 @@ unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS; unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS; unsigned long ip_ct_tcp_timeout_close = 10 SECS; +/* RFC1122 says the R2 limit should be at least 100 seconds. + Linux uses 15 packets as limit, which corresponds + to ~13-30min depending on RTO. */ +unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS; + static unsigned long * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ - &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ + &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ - &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ - &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ + &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ + &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ NULL, /* TCP_CONNTRACK_LISTEN */ }; #define sNO TCP_CONNTRACK_NONE -#define sES TCP_CONNTRACK_ESTABLISHED #define sSS TCP_CONNTRACK_SYN_SENT #define sSR TCP_CONNTRACK_SYN_RECV +#define sES TCP_CONNTRACK_ESTABLISHED #define sFW TCP_CONNTRACK_FIN_WAIT -#define sTW TCP_CONNTRACK_TIME_WAIT -#define sCL TCP_CONNTRACK_CLOSE #define sCW TCP_CONNTRACK_CLOSE_WAIT #define sLA TCP_CONNTRACK_LAST_ACK +#define sTW TCP_CONNTRACK_TIME_WAIT +#define sCL TCP_CONNTRACK_CLOSE #define sLI TCP_CONNTRACK_LISTEN #define sIV TCP_CONNTRACK_MAX +#define sIG TCP_CONNTRACK_IGNORE -static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = { +/* What TCP flags are set from RST/SYN/FIN/ACK. */ +enum tcp_bit_set { + TCP_SYN_SET, + TCP_SYNACK_SET, + TCP_FIN_SET, + TCP_ACK_SET, + TCP_RST_SET, + TCP_NONE_SET, +}; + +/* + * The TCP state transition table needs a few words... + * + * We are the man in the middle. All the packets go through us + * but might get lost in transit to the destination. + * It is assumed that the destinations can't receive segments + * we haven't seen. + * + * The checked segment is in window, but our windows are *not* + * equivalent with the ones of the sender/receiver. We always + * try to guess the state of the current sender. + * + * The meaning of the states are: + * + * NONE: initial state + * SYN_SENT: SYN-only packet seen + * SYN_RECV: SYN-ACK packet seen + * ESTABLISHED: ACK packet seen + * FIN_WAIT: FIN packet seen + * CLOSE_WAIT: ACK seen (after FIN) + * LAST_ACK: FIN seen (after FIN) + * TIME_WAIT: last ACK seen + * CLOSE: closed connection + * + * LISTEN state is not used. + * + * Packets marked as IGNORED (sIG): + * if they may be either invalid or valid + * and the receiver may send back a connection + * closing RST or a SYN/ACK. + * + * Packets marked as INVALID (sIV): + * if they are invalid + * or we do not support the request (simultaneous open) + */ +static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { -/* ORIGINAL */ -/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ -/*syn*/ {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI }, -/*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI }, -/*ack*/ {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES }, -/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL }, -/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } +/* ORIGINAL */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, +/* + * sNO -> sSS Initialize a new connection + * sSS -> sSS Retransmitted SYN + * sSR -> sIG Late retransmitted SYN? + * sES -> sIG Error: SYNs in window outside the SYN_SENT state + * are errors. Receiver will reply with RST + * and close the connection. + * Or we are not in sync and hold a dead connection. + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sSS Reopened connection (RFC 1122). + * sCL -> sSS + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* + * A SYN/ACK from the client is always invalid: + * - either it tries to set up a simultaneous open, which is + * not supported; + * - or the firewall has just been inserted between the two hosts + * during the session set-up. The SYN will be retransmitted + * by the true client (or it'll time out). + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, +/* + * sNO -> sIV Too late and no reason to do anything... + * sSS -> sIV Client migth not send FIN in this state: + * we enforce waiting for a SYN/ACK reply first. + * sSR -> sFW Close started. + * sES -> sFW + * sFW -> sLA FIN seen in both directions, waiting for + * the last ACK. + * Migth be a retransmitted FIN as well... + * sCW -> sLA + * sLA -> sLA Retransmitted FIN. Remain in the same state. + * sTW -> sTW + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* + * sNO -> sES Assumed. + * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. + * sSR -> sES Established state is reached. + * sES -> sES :-) + * sFW -> sCW Normal close request answered by ACK. + * sCW -> sCW + * sLA -> sTW Last ACK detected. + * sTW -> sTW Retransmitted last ACK. Remain in the same state. + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } }, { -/* REPLY */ -/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ -/*syn*/ {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }, -/*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI }, -/*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI }, -/*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI }, -/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } - } +/* REPLY */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* + * sNO -> sIV Never reached. + * sSS -> sIV Simultaneous open, not supported + * sSR -> sIV Simultaneous open, not supported. + * sES -> sIV Server may not initiate a connection. + * sFW -> sIV + * sCW -> sIV + * sLA -> sIV + * sTW -> sIV Reopened connection, but server may not do it. + * sCL -> sIV + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, +/* + * sSS -> sSR Standard open. + * sSR -> sSR Retransmitted SYN/ACK. + * sES -> sIG Late retransmitted SYN/ACK? + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sIG + * sCL -> sIG + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, +/* + * sSS -> sIV Server might not send FIN in this state. + * sSR -> sFW Close started. + * sES -> sFW + * sFW -> sLA FIN seen in both directions. + * sCW -> sLA + * sLA -> sLA Retransmitted FIN. + * sTW -> sTW + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*ack*/ { sIV, sIV, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* + * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. + * sSR -> sIV Simultaneous open. + * sES -> sES :-) + * sFW -> sCW Normal close request answered by ACK. + * sCW -> sCW + * sLA -> sTW Last ACK detected. + * sTW -> sTW Retransmitted last ACK. + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + } }; static int tcp_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple) + unsigned int dataoff, + struct ip_conntrack_tuple *tuple) { struct tcphdr hdr; @@ -160,11 +337,488 @@ static unsigned int tcp_print_conntrack(char *buffer, static unsigned int get_conntrack_index(const struct tcphdr *tcph) { - if (tcph->rst) return 3; - else if (tcph->syn) return 0; - else if (tcph->fin) return 1; - else if (tcph->ack) return 2; - else return 4; + if (tcph->rst) return TCP_RST_SET; + else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); + else if (tcph->fin) return TCP_FIN_SET; + else if (tcph->ack) return TCP_ACK_SET; + else return TCP_NONE_SET; +} + +/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering + in IP Filter' by Guido van Rooij. + + http://www.nluug.nl/events/sane2000/papers.html + http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz + + The boundaries and the conditions are slightly changed: + + td_maxend = max(sack + max(win,1)) seen in reply packets + td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets + td_end = max(seq + len) seen in sent packets + + I. Upper bound for valid data: seq + len <= sender.td_maxend + II. Lower bound for valid data: seq >= sender.td_end - receiver.td_maxwin + III. Upper bound for valid ack: sack <= receiver.td_end + IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW + + where sack is the highest right edge of sack block found in the packet. + + The upper bound limit for a valid ack is not ignored - + we doesn't have to deal with fragments. +*/ + +static inline __u32 segment_seq_plus_len(__u32 seq, + size_t len, + struct iphdr *iph, + struct tcphdr *tcph) + { + return (seq + len - (iph->ihl + tcph->doff)*4 + + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); +} + +/* Fixme: what about big packets? */ +#define MAXACKWINCONST 66000 +#define MAXACKWINDOW(sender) \ + ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ + : MAXACKWINCONST) + +/* + * Simplified tcp_parse_options routine from tcp_input.c + */ +static void tcp_options(struct tcphdr *tcph, + struct ip_ct_tcp_state *state) +{ + unsigned char *ptr = (unsigned char *)(tcph + 1); + int length = (tcph->doff*4) - sizeof(struct tcphdr); + + state->td_scale = + state->flags = 0; + + while (length > 0) { + int opcode=*ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + opsize=*ptr++; + if (opsize < 2) /* "silly options" */ + return; + if (opsize > length) + break; /* don't parse partial options */ + + if (opcode == TCPOPT_SACK_PERM + && opsize == TCPOLEN_SACK_PERM) + state->flags |= IP_CT_TCP_FLAG_SACK_PERM; + else if (opcode == TCPOPT_WINDOW + && opsize == TCPOLEN_WINDOW) { + state->td_scale = *(u_int8_t *)ptr; + + if (state->td_scale > 14) { + /* See RFC1323 */ + state->td_scale = 14; + } + state->flags |= + IP_CT_TCP_STATE_FLAG_WINDOW_SCALE; + } + ptr += opsize - 2; + length -= opsize; + } + } +} + +static void tcp_sack(struct tcphdr *tcph, __u32 *sack) +{ + __u32 tmp; + unsigned char *ptr; + int length = (tcph->doff*4) - sizeof(struct tcphdr); + + /* Fast path for timestamp-only option */ + if (length == TCPOLEN_TSTAMP_ALIGNED*4 + && *(__u32 *)(tcph + 1) == + __constant_ntohl((TCPOPT_NOP << 24) + | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) + | TCPOLEN_TIMESTAMP)) + return; + + ptr = (unsigned char *)(tcph + 1); + while (length > 0) { + int opcode=*ptr++; + int opsize, i; + + switch (opcode) { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + opsize=*ptr++; + if (opsize < 2) /* "silly options" */ + return; + if (opsize > length) + break; /* don't parse partial options */ + + if (opcode == TCPOPT_SACK + && opsize >= (TCPOLEN_SACK_BASE + + TCPOLEN_SACK_PERBLOCK) + && !((opsize - TCPOLEN_SACK_BASE) + % TCPOLEN_SACK_PERBLOCK)) { + for (i = 0; + i < (opsize - TCPOLEN_SACK_BASE); + i += TCPOLEN_SACK_PERBLOCK) { + tmp = ntohl(*((u_int32_t *)(ptr+i)+1)); + + if (after(tmp, *sack)) + *sack = tmp; + } + return; + } + ptr += opsize - 2; + length -= opsize; + } + } +} + +static int tcp_in_window(struct ip_ct_tcp *state, + enum ip_conntrack_dir dir, + unsigned int *index, + const struct sk_buff *skb, + struct iphdr *iph, + struct tcphdr *tcph) +{ + struct ip_ct_tcp_state *sender = &state->seen[dir]; + struct ip_ct_tcp_state *receiver = &state->seen[!dir]; + __u32 seq, ack, sack, end, win, swin; + int res; + + /* + * Get the required data from the packet. + */ + seq = ntohl(tcph->seq); + ack = sack = ntohl(tcph->ack_seq); + win = ntohs(tcph->window); + end = segment_seq_plus_len(seq, skb->len, iph, tcph); + + if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) + tcp_sack(tcph, &sack); + + DEBUGP("tcp_in_window: START\n"); + DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "seq=%u ack=%u sack=%u win=%u end=%u\n", + NIPQUAD(iph->saddr), ntohs(tcph->source), + NIPQUAD(iph->daddr), ntohs(tcph->dest), + seq, ack, sack, win, end); + DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + if (sender->td_end == 0) { + /* + * Initialize sender data. + */ + if (tcph->syn && tcph->ack) { + /* + * Outgoing SYN-ACK in reply to a SYN. + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(tcph, sender); + /* + * RFC 1323: + * Both sides must send the Window Scale option + * to enable window scaling in either direction. + */ + if (!(sender->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE + && receiver->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE)) + sender->td_scale = + receiver->td_scale = 0; + } else { + /* + * We are in the middle of a connection, + * its history is lost for us. + * Let's try to use the data from the packet. + */ + sender->td_end = end; + sender->td_maxwin = (win == 0 ? 1 : win); + sender->td_maxend = end + sender->td_maxwin; + } + } else if (state->state == TCP_CONNTRACK_SYN_SENT + && dir == IP_CT_DIR_ORIGINAL + && after(end, sender->td_end)) { + /* + * RFC 793: "if a TCP is reinitialized ... then it need + * not wait at all; it must only be sure to use sequence + * numbers larger than those recently used." + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(tcph, sender); + } + + if (!(tcph->ack)) { + /* + * If there is no ACK, just pretend it was set and OK. + */ + ack = sack = receiver->td_end; + } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == + (TCP_FLAG_ACK|TCP_FLAG_RST)) + && (ack == 0)) { + /* + * Broken TCP stacks, that set ACK in RST packets as well + * with zero ack value. + */ + ack = sack = receiver->td_end; + } + + if (seq == end) + /* + * Packets contains no data: we assume it is valid + * and check the ack value only. + */ + seq = end = sender->td_end; + + DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n", + NIPQUAD(iph->saddr), ntohs(tcph->source), + NIPQUAD(iph->daddr), ntohs(tcph->dest), + seq, ack, sack, win, end, + after(end, sender->td_maxend) && before(seq, sender->td_maxend) + ? sender->td_maxend : end); + DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + /* Ignore data over the right edge of the receiver's window. */ + if (after(end, sender->td_maxend) && + before(seq, sender->td_maxend)) { + end = sender->td_maxend; + if (*index == TCP_FIN_SET) + *index = TCP_ACK_SET; + } + DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", + before(end, sender->td_maxend + 1) + || before(seq, sender->td_maxend + 1), + after(seq, sender->td_end - receiver->td_maxwin - 1) + || after(end, sender->td_end - receiver->td_maxwin - 1), + before(sack, receiver->td_end + 1), + after(ack, receiver->td_end - MAXACKWINDOW(sender))); + + if (sender->loose || receiver->loose || + (before(end, sender->td_maxend + 1) && + after(seq, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + /* + * Take into account window scaling (RFC 1323). + */ + if (!tcph->syn) + win <<= sender->td_scale; + + /* + * Update sender data. + */ + swin = win + (sack - ack); + if (sender->td_maxwin < swin) + sender->td_maxwin = swin; + if (after(end, sender->td_end)) + sender->td_end = end; + if (after(sack + win, receiver->td_maxend - 1)) { + receiver->td_maxend = sack + win; + if (win == 0) + receiver->td_maxend++; + } + + /* + * Check retransmissions. + */ + if (*index == TCP_ACK_SET) { + if (state->last_dir == dir + && state->last_seq == seq + && state->last_end == end) + state->retrans++; + else { + state->last_dir = dir; + state->last_seq = seq; + state->last_end = end; + state->retrans = 0; + } + } + /* + * Close the window of disabled window tracking :-) + */ + if (sender->loose) + sender->loose--; + + res = 1; + } else { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: %s ", + before(end, sender->td_maxend + 1) ? + after(seq, sender->td_end - receiver->td_maxwin - 1) ? + before(ack, receiver->td_end + 1) ? + after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" + : "ACK is under the lower bound (possibly overly delayed ACK)" + : "ACK is over the upper bound (ACKed data has never seen yet)" + : "SEQ is under the lower bound (retransmitted already ACKed data)" + : "SEQ is over the upper bound (over the window of the receiver)"); + + res = ip_ct_tcp_be_liberal && !tcph->rst; + } + + DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " + "receiver end=%u maxend=%u maxwin=%u\n", + res, sender->td_end, sender->td_maxend, sender->td_maxwin, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin); + + return res; +} + +#ifdef CONFIG_IP_NF_NAT_NEEDED +/* Update sender->td_end after NAT successfully mangled the packet */ +int ip_conntrack_tcp_update(struct sk_buff *skb, + struct ip_conntrack *conntrack, + int dir) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4; + __u32 end; +#ifdef DEBUGP_VARS + struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; + struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; +#endif + + end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph); + + WRITE_LOCK(&tcp_lock); + /* + * We have to worry for the ack in the reply packet only... + */ + if (after(end, conntrack->proto.tcp.seen[dir].td_end)) + conntrack->proto.tcp.seen[dir].td_end = end; + conntrack->proto.tcp.last_end = end; + WRITE_UNLOCK(&tcp_lock); + DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + return 1; +} + +EXPORT_SYMBOL(ip_conntrack_tcp_update); +#endif + +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 +#define TH_ECE 0x40 +#define TH_CWR 0x80 + +/* table of valid flag combinations - ECE and CWR are always valid */ +static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = +{ + [TH_SYN] = 1, + [TH_SYN|TH_ACK] = 1, + [TH_RST] = 1, + [TH_RST|TH_ACK] = 1, + [TH_RST|TH_ACK|TH_PUSH] = 1, + [TH_FIN|TH_ACK] = 1, + [TH_ACK] = 1, + [TH_ACK|TH_PUSH] = 1, + [TH_ACK|TH_URG] = 1, + [TH_ACK|TH_URG|TH_PUSH] = 1, + [TH_FIN|TH_ACK|TH_PUSH] = 1, + [TH_FIN|TH_ACK|TH_URG] = 1, + [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1, +}; + +/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ +static int tcp_error(struct sk_buff *skb, + enum ip_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr tcph; + unsigned int tcplen = skb->len - iph->ihl * 4; + u_int8_t tcpflags; + + /* Smaller that minimal TCP header? */ + if (skb_copy_bits(skb, iph->ihl * 4, &tcph, sizeof(tcph)) != 0) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: short packet "); + return -NF_ACCEPT; + } + + /* Not whole TCP header or malformed packet */ + if (tcph.doff*4 < sizeof(struct tcphdr) || tcplen < tcph.doff*4) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: truncated/malformed packet "); + return -NF_ACCEPT; + } + + /* Checksum invalid? Ignore. + * We skip checking packets on the outgoing path + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. + */ + /* FIXME: Source route IP option packets --RR */ + if (hooknum == NF_IP_PRE_ROUTING + && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: bad TCP checksum "); + return -NF_ACCEPT; + } + + /* Check TCP flags. */ + tcpflags = (((u_int8_t *)&tcph)[13] & ~(TH_ECE|TH_CWR)); + if (!tcp_valid_flags[tcpflags]) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: invalid TCP flag combination "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} + +static inline void copy_whole_tcp_header(const struct sk_buff *skb, + unsigned char *buff) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)buff; + + /* tcp_error guarantees for us that the packet is not malformed */ + skb_copy_bits(skb, iph->ihl * 4, buff, sizeof(*tcph)); + skb_copy_bits(skb, iph->ihl * 4 + sizeof(*tcph), + buff + sizeof(*tcph), + tcph->doff * 4 - sizeof(*tcph)); } /* Returns verdict for packet, or -1 for invalid. */ @@ -172,88 +826,232 @@ static int tcp_packet(struct ip_conntrack *conntrack, const struct sk_buff *skb, enum ip_conntrack_info ctinfo) { - enum tcp_conntrack newconntrack, oldtcpstate; - struct tcphdr tcph; + enum tcp_conntrack new_state, old_state; + enum ip_conntrack_dir dir; + struct iphdr *iph = skb->nh.iph; + unsigned char buff[15 * 4]; + struct tcphdr *tcph = (struct tcphdr *)buff; + unsigned long timeout; + unsigned int index; + + copy_whole_tcp_header(skb, buff); + + WRITE_LOCK(&tcp_lock); + old_state = conntrack->proto.tcp.state; + dir = CTINFO2DIR(ctinfo); + index = get_conntrack_index(tcph); + new_state = tcp_conntracks[dir][index][old_state]; - if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0) - return -1; - if (skb->len < skb->nh.iph->ihl * 4 + tcph.doff * 4) - return -1; - - /* If only reply is a RST, we can consider ourselves not to - have an established connection: this is a fairly common - problem case, so we can delete the conntrack - immediately. --RR */ - if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph.rst) { - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long)conntrack); + switch (new_state) { + case TCP_CONNTRACK_IGNORE: + /* Either SYN in ORIGINAL, or SYN/ACK in REPLY direction. */ + if (index == TCP_SYNACK_SET + && conntrack->proto.tcp.last_index == TCP_SYN_SET + && conntrack->proto.tcp.last_dir != dir + && after(ntohl(tcph->ack_seq), + conntrack->proto.tcp.last_seq)) { + /* This SYN/ACK acknowledges a SYN that we earlier + * ignored as invalid. This means that the client and + * the server are both in sync, while the firewall is + * not. We kill this session and block the SYN/ACK so + * that the client cannot but retransmit its SYN and + * thus initiate a clean new session. + */ + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: killing out of sync session "); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return -NF_DROP; + } + conntrack->proto.tcp.last_index = index; + conntrack->proto.tcp.last_dir = dir; + conntrack->proto.tcp.last_seq = ntohl(tcph->seq); + + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: invalid SYN (ignored) "); return NF_ACCEPT; + case TCP_CONNTRACK_MAX: + /* Invalid packet */ + DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", + dir, get_conntrack_index(tcph), + old_state); + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: invalid state "); + return -NF_ACCEPT; + case TCP_CONNTRACK_SYN_SENT: + if (old_state >= TCP_CONNTRACK_TIME_WAIT) { + /* Attempt to reopen a closed connection. + * Delete this connection and look up again. */ + WRITE_UNLOCK(&tcp_lock); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return -NF_REPEAT; + } + break; + case TCP_CONNTRACK_CLOSE: + if (index == TCP_RST_SET + && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET + && after(ntohl(tcph->ack_seq), + conntrack->proto.tcp.last_seq)) { + /* Ignore RST closing down invalid SYN + we had let trough. */ + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_ct_tcp: invalid RST (ignored) "); + return NF_ACCEPT; + } + /* Just fall trough */ + default: + /* Keep compilers happy. */ + break; } - WRITE_LOCK(&tcp_lock); - oldtcpstate = conntrack->proto.tcp.state; - newconntrack - = tcp_conntracks - [CTINFO2DIR(ctinfo)] - [get_conntrack_index(&tcph)][oldtcpstate]; - - /* Invalid */ - if (newconntrack == TCP_CONNTRACK_MAX) { - DEBUGP("ip_conntrack_tcp: Invalid dir=%i index=%u conntrack=%u\n", - CTINFO2DIR(ctinfo), get_conntrack_index(&tcph), - conntrack->proto.tcp.state); + if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, + skb, iph, tcph)) { WRITE_UNLOCK(&tcp_lock); - return -1; + return -NF_ACCEPT; } + /* From now on we have got in-window packets */ + + /* If FIN was trimmed off, we don't change state. */ + conntrack->proto.tcp.last_index = index; + new_state = tcp_conntracks[dir][index][old_state]; - conntrack->proto.tcp.state = newconntrack; + DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", + NIPQUAD(iph->saddr), ntohs(tcph->source), + NIPQUAD(iph->daddr), ntohs(tcph->dest), + (tcph->syn ? 1 : 0), (tcph->ack ? 1 : 0), + (tcph->fin ? 1 : 0), (tcph->rst ? 1 : 0), + old_state, new_state); - /* Poor man's window tracking: record SYN/ACK for handshake check */ - if (oldtcpstate == TCP_CONNTRACK_SYN_SENT - && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY - && tcph.syn && tcph.ack) { - conntrack->proto.tcp.handshake_ack - = htonl(ntohl(tcph.seq) + 1); - goto out; - } - - /* Set ASSURED if we see valid ack in ESTABLISHED after SYN_RECV */ - if (oldtcpstate == TCP_CONNTRACK_SYN_RECV - && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL - && tcph.ack && !tcph.syn - && tcph.ack_seq == conntrack->proto.tcp.handshake_ack) - set_bit(IPS_ASSURED_BIT, &conntrack->status); + conntrack->proto.tcp.state = new_state; + timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans + && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans + ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; + WRITE_UNLOCK(&tcp_lock); -out: WRITE_UNLOCK(&tcp_lock); - ip_ct_refresh_acct(conntrack, ctinfo, skb, *tcp_timeouts[newconntrack]); + if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { + /* If only reply is a RST, we can consider ourselves not to + have an established connection: this is a fairly common + problem case, so we can delete the conntrack + immediately. --RR */ + if (tcph->rst) { + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return NF_ACCEPT; + } + } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) + && (old_state == TCP_CONNTRACK_SYN_RECV + || old_state == TCP_CONNTRACK_ESTABLISHED) + && new_state == TCP_CONNTRACK_ESTABLISHED) { + /* Set ASSURED if we see see valid ack in ESTABLISHED + after SYN_RECV or a valid answer for a picked up + connection. */ + set_bit(IPS_ASSURED_BIT, &conntrack->status); + } + ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); return NF_ACCEPT; } - -/* Called when a new connection for this protocol found. */ -static int tcp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb) + + /* Called when a new connection for this protocol found. */ +static int tcp_new(struct ip_conntrack *conntrack, + const struct sk_buff *skb) { - enum tcp_conntrack newconntrack; - struct tcphdr tcph; - - if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0) - return -1; + enum tcp_conntrack new_state; + struct iphdr *iph = skb->nh.iph; + unsigned char buff[15 * 4]; + struct tcphdr *tcph = (struct tcphdr *)buff; +#ifdef DEBUGP_VARS + struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; + struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; +#endif + copy_whole_tcp_header(skb, buff); + /* Don't need lock here: this conntrack not in circulation yet */ - newconntrack - = tcp_conntracks[0][get_conntrack_index(&tcph)] + new_state + = tcp_conntracks[0][get_conntrack_index(tcph)] [TCP_CONNTRACK_NONE]; /* Invalid: delete conntrack */ - if (newconntrack == TCP_CONNTRACK_MAX) { - DEBUGP("ip_conntrack_tcp: invalid new deleting.\n"); + if (new_state >= TCP_CONNTRACK_MAX) { + DEBUGP("ip_ct_tcp: invalid new deleting.\n"); return 0; } - conntrack->proto.tcp.state = newconntrack; + if (new_state == TCP_CONNTRACK_SYN_SENT) { + /* SYN packet */ + conntrack->proto.tcp.seen[0].td_end = + segment_seq_plus_len(ntohl(tcph->seq), skb->len, + iph, tcph); + conntrack->proto.tcp.seen[0].td_maxwin = ntohs(tcph->window); + if (conntrack->proto.tcp.seen[0].td_maxwin == 0) + conntrack->proto.tcp.seen[0].td_maxwin = 1; + conntrack->proto.tcp.seen[0].td_maxend = + conntrack->proto.tcp.seen[0].td_end; + + tcp_options(tcph, &conntrack->proto.tcp.seen[0]); + conntrack->proto.tcp.seen[1].flags = 0; + conntrack->proto.tcp.seen[0].loose = + conntrack->proto.tcp.seen[1].loose = 0; + } else if (ip_ct_tcp_loose == 0) { + /* Don't try to pick up connections. */ + return 0; + } else { + /* + * We are in the middle of a connection, + * its history is lost for us. + * Let's try to use the data from the packet. + */ + conntrack->proto.tcp.seen[0].td_end = + segment_seq_plus_len(ntohl(tcph->seq), skb->len, + iph, tcph); + conntrack->proto.tcp.seen[0].td_maxwin = ntohs(tcph->window); + if (conntrack->proto.tcp.seen[0].td_maxwin == 0) + conntrack->proto.tcp.seen[0].td_maxwin = 1; + conntrack->proto.tcp.seen[0].td_maxend = + conntrack->proto.tcp.seen[0].td_end + + conntrack->proto.tcp.seen[0].td_maxwin; + conntrack->proto.tcp.seen[0].td_scale = 0; + + /* We assume SACK. Should we assume window scaling too? */ + conntrack->proto.tcp.seen[0].flags = + conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; + conntrack->proto.tcp.seen[0].loose = + conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose; + } + + conntrack->proto.tcp.seen[1].td_end = 0; + conntrack->proto.tcp.seen[1].td_maxend = 0; + conntrack->proto.tcp.seen[1].td_maxwin = 1; + conntrack->proto.tcp.seen[1].td_scale = 0; + + /* tcp_packet will set them */ + conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; + conntrack->proto.tcp.last_index = TCP_NONE_SET; + + DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); return 1; } - + static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp, const struct sk_buff *skb) { @@ -261,7 +1059,7 @@ static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp, struct tcphdr tcph; unsigned int datalen; - if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0) + if (skb_copy_bits(skb, iph->ihl * 4, &tcph, sizeof(tcph)) != 0) return 0; datalen = skb->len - iph->ihl*4 - tcph.doff*4; @@ -279,4 +1077,5 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = .packet = tcp_packet, .new = tcp_new, .exp_matches_pkt = tcp_exp_matches_pkt, + .error = tcp_error, }; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 74ae27430c17..4068958482f4 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -505,6 +505,10 @@ extern unsigned long ip_ct_tcp_timeout_close_wait; extern unsigned long ip_ct_tcp_timeout_last_ack; extern unsigned long ip_ct_tcp_timeout_time_wait; extern unsigned long ip_ct_tcp_timeout_close; +extern unsigned long ip_ct_tcp_timeout_max_retrans; +extern int ip_ct_tcp_loose; +extern int ip_ct_tcp_be_liberal; +extern int ip_ct_tcp_max_retrans; /* From ip_conntrack_proto_udp.c */ extern unsigned long ip_ct_udp_timeout; @@ -647,6 +651,38 @@ static ctl_table ip_ct_sysctl_table[] = { .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, + .procname = "ip_conntrack_tcp_timeout_max_retrans", + .data = &ip_ct_tcp_timeout_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, + .procname = "ip_conntrack_tcp_loose", + .data = &ip_ct_tcp_loose, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, + .procname = "ip_conntrack_tcp_be_liberal", + .data = &ip_ct_tcp_be_liberal, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, + .procname = "ip_conntrack_tcp_max_retrans", + .data = &ip_ct_tcp_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; -- cgit v1.2.3 From 36b91e835685eab7a2ce5ec52068f9a498e1e754 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sun, 25 Jul 2004 02:36:20 -0700 Subject: [NETFILTER]: Missing sysctl.h bits from tcp window tracking changes. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/sysctl.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 38acd5d4b691..ec4cedb886d9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -410,6 +410,11 @@ enum NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12, NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13, NET_IPV4_NF_CONNTRACK_BUCKETS=14, + NET_IPV4_NF_CONNTRACK_LOG_INVALID=15, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, + NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17, + NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18, + NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19, }; /* /proc/sys/net/ipv6 */ -- cgit v1.2.3 From 21b7bdf3c5b4a99573fcdc4609c469cf779d99c4 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sun, 1 Aug 2004 05:18:41 -0700 Subject: [NETFILTER]: New ip_conntrack_sctp Incremental to all other patches so far, there is also the new SCTP conntrack helper by Kiran Kumar. Please apply for 2.6.9 ++, thanks. Signed-off-by: Kiran Kumar Immidi Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 2 + include/linux/netfilter_ipv4/ip_conntrack_sctp.h | 25 + include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 6 + include/linux/sysctl.h | 7 + net/ipv4/netfilter/Kconfig | 4 + net/ipv4/netfilter/Makefile | 3 + net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 650 ++++++++++++++++++++++ 7 files changed, 697 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_sctp.h create mode 100644 net/ipv4/netfilter/ip_conntrack_proto_sctp.c diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 29bfa38006fe..28b61a71bce9 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -51,10 +51,12 @@ enum ip_conntrack_status { #include #include +#include /* per conntrack: protocol private data */ union ip_conntrack_proto { /* insert conntrack proto private data here */ + struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct ip_ct_icmp icmp; }; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_sctp.h b/include/linux/netfilter_ipv4/ip_conntrack_sctp.h new file mode 100644 index 000000000000..7a8d869321f7 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_sctp.h @@ -0,0 +1,25 @@ +#ifndef _IP_CONNTRACK_SCTP_H +#define _IP_CONNTRACK_SCTP_H +/* SCTP tracking. */ + +enum sctp_conntrack { + SCTP_CONNTRACK_NONE, + SCTP_CONNTRACK_CLOSED, + SCTP_CONNTRACK_COOKIE_WAIT, + SCTP_CONNTRACK_COOKIE_ECHOED, + SCTP_CONNTRACK_ESTABLISHED, + SCTP_CONNTRACK_SHUTDOWN_SENT, + SCTP_CONNTRACK_SHUTDOWN_RECD, + SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, + SCTP_CONNTRACK_MAX +}; + +struct ip_ct_sctp +{ + enum sctp_conntrack state; + + u_int32_t vtag[IP_CT_DIR_MAX]; + u_int32_t ttag[IP_CT_DIR_MAX]; +}; + +#endif /* _IP_CONNTRACK_SCTP_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 1e7691189c67..3a71176e2060 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -25,6 +25,9 @@ union ip_conntrack_manip_proto struct { u_int16_t id; } icmp; + struct { + u_int16_t port; + } sctp; }; /* The manipulable part of the tuple. */ @@ -55,6 +58,9 @@ struct ip_conntrack_tuple struct { u_int8_t type, code; } icmp; + struct { + u_int16_t port; + } sctp; } u; /* The protocol. */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index ec4cedb886d9..4de9d88425b5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -415,6 +415,13 @@ enum NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17, NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18, NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, }; /* /proc/sys/net/ipv6 */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 9f29305e3fdf..07c0fb9044b8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -636,5 +636,9 @@ config IP_NF_MATCH_SCTP tristate 'SCTP protocol match support' depends on IP_NF_IPTABLES +config IP_NF_CT_PROTO_SCTP + tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' + depends on IP_NF_CONNTRACK && EXPERIMENTAL + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 05b6be683257..164f4332a72d 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -19,6 +19,9 @@ ipchains-objs := $(ip_nf_compat-objs) ipchains_core.o # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o +# SCTP protocol connection tracking +obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o + # connection tracking helpers obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c new file mode 100644 index 000000000000..70ca8be84e78 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -0,0 +1,650 @@ +/* + * Connection tracking protocol helper module for SCTP. + * + * SCTP is defined in RFC 2960. References to various sections in this code + * are to this RFC. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Added support for proc manipulation of timeouts. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#if 0 +#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) +#else +#define DEBUGP(format, args...) +#endif + +/* Protects conntrack->proto.sctp */ +static DECLARE_RWLOCK(sctp_lock); + +/* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR + + And so for me for SCTP :D -Kiran */ + +static const char *sctp_conntrack_names[] = { + "NONE", + "CLOSED", + "COOKIE_WAIT", + "COOKIE_ECHOED", + "ESTABLISHED", + "SHUTDOWN_SENT", + "SHUTDOWN_RECD", + "SHUTDOWN_ACK_SENT", +}; + +#define SECS * HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + +unsigned long ip_ct_sctp_timeout_closed = 10 SECS; +unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS; +unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS; +unsigned long ip_ct_sctp_timeout_established = 5 DAYS; +unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; +unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; +unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; + +static unsigned long * sctp_timeouts[] += { 0, /* SCTP_CONNTRACK_NONE */ + &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ + &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ + &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */ + &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */ + &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */ + &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */ + &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */ + }; + +#define sNO SCTP_CONNTRACK_NONE +#define sCL SCTP_CONNTRACK_CLOSED +#define sCW SCTP_CONNTRACK_COOKIE_WAIT +#define sCE SCTP_CONNTRACK_COOKIE_ECHOED +#define sES SCTP_CONNTRACK_ESTABLISHED +#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT +#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD +#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT +#define sIV SCTP_CONNTRACK_MAX + +/* + These are the descriptions of the states: + +NOTE: These state names are tantalizingly similar to the states of an +SCTP endpoint. But the interpretation of the states is a little different, +considering that these are the states of the connection and not of an end +point. Please note the subtleties. -Kiran + +NONE - Nothing so far. +COOKIE WAIT - We have seen an INIT chunk in the original direction, or also + an INIT_ACK chunk in the reply direction. +COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. +ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. +SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. +SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. +SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite + to that of the SHUTDOWN chunk. +CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of + the SHUTDOWN chunk. Connection is closed. +*/ + +/* TODO + - I have assumed that the first INIT is in the original direction. + This messes things when an INIT comes in the reply direction in CLOSED + state. + - Check the error type in the reply dir before transitioning from +cookie echoed to closed. + - Sec 5.2.4 of RFC 2960 + - Multi Homing support. +*/ + +/* SCTP conntrack state transitions */ +static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { + { +/* ORIGINAL */ +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ +/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} + }, + { +/* REPLY */ +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} + } +}; + +static int sctp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple) +{ + sctp_sctphdr_t hdr; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + /* Actually only need first 8 bytes. */ + if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0) + return 0; + + tuple->src.u.sctp.port = hdr.source; + tuple->dst.u.sctp.port = hdr.dest; + + return 1; +} + +static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + tuple->src.u.sctp.port = orig->dst.u.sctp.port; + tuple->dst.u.sctp.port = orig->src.u.sctp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int sctp_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + return sprintf(buffer, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.sctp.port), + ntohs(tuple->dst.u.sctp.port)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int sctp_print_conntrack(char *buffer, + const struct ip_conntrack *conntrack) +{ + enum sctp_conntrack state; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + READ_LOCK(&sctp_lock); + state = conntrack->proto.sctp.state; + READ_UNLOCK(&sctp_lock); + + return sprintf(buffer, "%s ", sctp_conntrack_names[state]); +} + +#define for_each_sctp_chunk(skb, sch, offset, count) \ +for (offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t), count = 0; \ + offset < skb->len && !skb_copy_bits(skb, offset, &sch, sizeof(sch)); \ + offset += (htons(sch.length) + 3) & ~3, count++) + +/* Some validity checks to make sure the chunks are fine */ +static int do_basic_checks(struct ip_conntrack *conntrack, + const struct sk_buff *skb, + char *map) +{ + u_int32_t offset, count; + sctp_chunkhdr_t sch; + int flag; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + flag = 0; + + for_each_sctp_chunk (skb, sch, offset, count) { + DEBUGP("Chunk Num: %d Type: %d\n", count, sch.type); + + if (sch.type == SCTP_CID_INIT + || sch.type == SCTP_CID_INIT_ACK + || sch.type == SCTP_CID_SHUTDOWN_COMPLETE) { + flag = 1; + } + + /* Cookie Ack/Echo chunks not the first OR + Init / Init Ack / Shutdown compl chunks not the only chunks */ + if ((sch.type == SCTP_CID_COOKIE_ACK + || sch.type == SCTP_CID_COOKIE_ECHO + || flag) + && count !=0 ) { + DEBUGP("Basic checks failed\n"); + return 1; + } + + if (map) { + set_bit (sch.type, (void *)map); + } + } + + DEBUGP("Basic checks passed\n"); + return 0; +} + +static int new_state(enum ip_conntrack_dir dir, + enum sctp_conntrack cur_state, + int chunk_type) +{ + int i; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + DEBUGP("Chunk type: %d\n", chunk_type); + + switch (chunk_type) { + case SCTP_CID_INIT: + DEBUGP("SCTP_CID_INIT\n"); + i = 0; break; + case SCTP_CID_INIT_ACK: + DEBUGP("SCTP_CID_INIT_ACK\n"); + i = 1; break; + case SCTP_CID_ABORT: + DEBUGP("SCTP_CID_ABORT\n"); + i = 2; break; + case SCTP_CID_SHUTDOWN: + DEBUGP("SCTP_CID_SHUTDOWN\n"); + i = 3; break; + case SCTP_CID_SHUTDOWN_ACK: + DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); + i = 4; break; + case SCTP_CID_ERROR: + DEBUGP("SCTP_CID_ERROR\n"); + i = 5; break; + case SCTP_CID_COOKIE_ECHO: + DEBUGP("SCTP_CID_COOKIE_ECHO\n"); + i = 6; break; + case SCTP_CID_COOKIE_ACK: + DEBUGP("SCTP_CID_COOKIE_ACK\n"); + i = 7; break; + case SCTP_CID_SHUTDOWN_COMPLETE: + DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); + i = 8; break; + default: + /* Other chunks like DATA, SACK, HEARTBEAT and + its ACK do not cause a change in state */ + DEBUGP("Unknown chunk type, Will stay in %s\n", + sctp_conntrack_names[cur_state]); + return cur_state; + } + + DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", + dir, sctp_conntrack_names[cur_state], chunk_type, + sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); + + return sctp_conntracks[dir][i][cur_state]; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int sctp_packet(struct ip_conntrack *conntrack, + const struct sk_buff *skb, + enum ip_conntrack_info ctinfo) +{ + enum sctp_conntrack newconntrack, oldsctpstate; + sctp_sctphdr_t sctph; + sctp_chunkhdr_t sch; + u_int32_t offset, count; + char map[256 / sizeof (char)] = {0}; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &sctph, sizeof(sctph)) != 0) + return -1; + + if (do_basic_checks(conntrack, skb, map) != 0) + return -1; + + /* Check the verification tag (Sec 8.5) */ + if (!test_bit(SCTP_CID_INIT, (void *)map) + && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) + && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) + && !test_bit(SCTP_CID_ABORT, (void *)map) + && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) + && (sctph.vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { + DEBUGP("Verification tag check failed\n"); + return -1; + } + + oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; + for_each_sctp_chunk (skb, sch, offset, count) { + WRITE_LOCK(&sctp_lock); + + /* Special cases of Verification tag check (Sec 8.5.1) */ + if (sch.type == SCTP_CID_INIT) { + /* Sec 8.5.1 (A) */ + if (sctph.vtag != 0) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + } else if (sch.type == SCTP_CID_ABORT) { + /* Sec 8.5.1 (B) */ + if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) + && !(sctph.vtag == conntrack->proto.sctp.vtag + [1 - CTINFO2DIR(ctinfo)])) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + } else if (sch.type == SCTP_CID_SHUTDOWN_COMPLETE) { + /* Sec 8.5.1 (C) */ + if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) + && !(sctph.vtag == conntrack->proto.sctp.vtag + [1 - CTINFO2DIR(ctinfo)] + && (sch.flags & 1))) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + } else if (sch.type == SCTP_CID_COOKIE_ECHO) { + /* Sec 8.5.1 (D) */ + if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + } + + oldsctpstate = conntrack->proto.sctp.state; + newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch.type); + + /* Invalid */ + if (newconntrack == SCTP_CONNTRACK_MAX) { + DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", + CTINFO2DIR(ctinfo), sch.type, oldsctpstate); + WRITE_UNLOCK(&sctp_lock); + return -1; + } + + /* If it is an INIT or an INIT ACK note down the vtag */ + if (sch.type == SCTP_CID_INIT + || sch.type == SCTP_CID_INIT_ACK) { + sctp_inithdr_t inithdr; + + if (skb_copy_bits(skb, offset + sizeof (sctp_chunkhdr_t), + &inithdr, sizeof(inithdr)) != 0) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + DEBUGP("Setting vtag %x for dir %d\n", + inithdr.init_tag, CTINFO2DIR(ctinfo)); + conntrack->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = inithdr.init_tag; + } + + conntrack->proto.sctp.state = newconntrack; + WRITE_UNLOCK(&sctp_lock); + } + + ip_ct_refresh(conntrack, *sctp_timeouts[newconntrack]); + + if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED + && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY + && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { + DEBUGP("Setting assured bit\n"); + set_bit(IPS_ASSURED_BIT, &conntrack->status); + } + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int sctp_new(struct ip_conntrack *conntrack, + const struct sk_buff *skb) +{ + enum sctp_conntrack newconntrack; + sctp_sctphdr_t sctph; + sctp_chunkhdr_t sch; + u_int32_t offset, count; + char map[256 / sizeof (char)] = {0}; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &sctph, sizeof(sctph)) != 0) + return -1; + + if (do_basic_checks(conntrack, skb, map) != 0) + return -1; + + /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ + if ((test_bit (SCTP_CID_ABORT, (void *)map)) + || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) + || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { + return -1; + } + + newconntrack = SCTP_CONNTRACK_MAX; + for_each_sctp_chunk (skb, sch, offset, count) { + /* Don't need lock here: this conntrack not in circulation yet */ + newconntrack = new_state (IP_CT_DIR_ORIGINAL, + SCTP_CONNTRACK_NONE, sch.type); + + /* Invalid: delete conntrack */ + if (newconntrack == SCTP_CONNTRACK_MAX) { + DEBUGP("ip_conntrack_sctp: invalid new deleting.\n"); + return 0; + } + + /* Copy the vtag into the state info */ + if (sch.type == SCTP_CID_INIT) { + if (sctph.vtag == 0) { + sctp_inithdr_t inithdr; + + if (skb_copy_bits(skb, offset + sizeof (sctp_chunkhdr_t), + &inithdr, sizeof(inithdr)) != 0) { + return -1; + } + + DEBUGP("Setting vtag %x for new conn\n", + inithdr.init_tag); + + conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = + inithdr.init_tag; + } else { + /* Sec 8.5.1 (A) */ + return -1; + } + } + /* If it is a shutdown ack OOTB packet, we expect a return + shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ + else { + DEBUGP("Setting vtag %x for new conn OOTB\n", + sctph.vtag); + conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sctph.vtag; + } + + conntrack->proto.sctp.state = newconntrack; + } + + return 1; +} + +static int sctp_exp_matches_pkt(struct ip_conntrack_expect *exp, + const struct sk_buff *skb) +{ + /* To be implemented */ + return 0; +} + +struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { + .list = { NULL, NULL }, + .proto = IPPROTO_SCTP, + .name = "sctp", + .pkt_to_tuple = sctp_pkt_to_tuple, + .invert_tuple = sctp_invert_tuple, + .print_tuple = sctp_print_tuple, + .print_conntrack = sctp_print_conntrack, + .packet = sctp_packet, + .new = sctp_new, + .destroy = NULL, + .exp_matches_pkt = sctp_exp_matches_pkt, + .me = THIS_MODULE +}; + +#ifdef CONFIG_SYSCTL +static ctl_table ip_ct_sysctl_table[] = { + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, + .procname = "ip_conntrack_sctp_timeout_closed", + .data = &ip_ct_sctp_timeout_closed, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, + .procname = "ip_conntrack_sctp_timeout_cookie_wait", + .data = &ip_ct_sctp_timeout_cookie_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, + .procname = "ip_conntrack_sctp_timeout_cookie_echoed", + .data = &ip_ct_sctp_timeout_cookie_echoed, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, + .procname = "ip_conntrack_sctp_timeout_established", + .data = &ip_ct_sctp_timeout_established, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, + .procname = "ip_conntrack_sctp_timeout_shutdown_sent", + .data = &ip_ct_sctp_timeout_shutdown_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, + .procname = "ip_conntrack_sctp_timeout_shutdown_recd", + .data = &ip_ct_sctp_timeout_shutdown_recd, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, + .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", + .data = &ip_ct_sctp_timeout_shutdown_ack_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table ip_ct_netfilter_table[] = { + { + .ctl_name = NET_IPV4_NETFILTER, + .procname = "netfilter", + .mode = 0555, + .child = ip_ct_sysctl_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table ip_ct_ipv4_table[] = { + { + .ctl_name = NET_IPV4, + .procname = "ipv4", + .mode = 0555, + .child = ip_ct_netfilter_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table ip_ct_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = ip_ct_ipv4_table, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *ip_ct_sysctl_header; +#endif + +int __init init(void) +{ + int ret; + + ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp); + if (ret) { + printk("ip_conntrack_proto_sctp: protocol register failed\n"); + goto out; + } + +#ifdef CONFIG_SYSCTL + ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0); + if (ip_ct_sysctl_header == NULL) { + printk("ip_conntrack_proto_sctp: can't register to sysctl.\n"); + goto cleanup; + } +#endif + + return ret; + + cleanup: +#ifdef CONFIG_SYSCTL + ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp); +#endif + out: + DEBUGP("SCTP conntrack module loading %s\n", + ret ? "failed": "succeeded"); + return ret; +} + +void __exit fini(void) +{ + ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp); +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(ip_ct_sysctl_header); +#endif + DEBUGP("SCTP conntrack module unloaded\n"); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Kiran Kumar Immidi"); +MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); -- cgit v1.2.3 From b677b8298b6f52d12504849954d107fba0dc5dc2 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sun, 1 Aug 2004 05:27:26 -0700 Subject: [NETFILTER]: Fix broken debug assertion This patch fixes some more broken netfilter assertions in 2.6.x. Signed-off-by: Patrick McHardy Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_nat_rule.c | 3 ++- net/ipv4/netfilter/ipt_MASQUERADE.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 145b2c57368a..8ee96d10449a 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -132,7 +132,8 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, ct = ip_conntrack_get(*pskb, &ctinfo); /* Connection must be valid and new. */ - IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED + || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); IP_NF_ASSERT(out); return ip_nat_setup_info(ct, targinfo, hooknum); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 54bc4684cc9d..ea02a12d7625 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -92,8 +92,8 @@ masquerade_target(struct sk_buff **pskb, return NF_ACCEPT; ct = ip_conntrack_get(*pskb, &ctinfo); - IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW - || ctinfo == IP_CT_RELATED)); + IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED + || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); mr = targinfo; -- cgit v1.2.3 From a283de08f61afdc2c24706aabcc69fd33747898b Mon Sep 17 00:00:00 2001 From: James Morris Date: Tue, 10 Aug 2004 04:23:07 -0700 Subject: [CRYPTO]: Typo in crypto/Kconfig From Nicolas Kaiser Signed-off-by: James Morris Signed-off-by: David S. Miller --- crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 12429ef28b39..f5bee997bee9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -182,7 +182,7 @@ config CRYPTO_TEA many rounds for security. It is very fast and uses little memory. - Xtendend Tiny Encryption Algorithm is a modifcation to + Xtendend Tiny Encryption Algorithm is a modification to the TEA algorithm to address a potential key weakness in the TEA algorithm. -- cgit v1.2.3 From 8d19f94243e70563d4076602363744a12cfa1b89 Mon Sep 17 00:00:00 2001 From: James Morris Date: Tue, 10 Aug 2004 04:23:40 -0700 Subject: [CRYPTO]: Typo in crypto/twofish.c From Nicolas Kaiser Signed-off-by: James Morris Signed-off-by: David S. Miller --- crypto/twofish.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/twofish.c b/crypto/twofish.c index 5d6d02c1a95d..4efff8cf9958 100644 --- a/crypto/twofish.c +++ b/crypto/twofish.c @@ -1,7 +1,7 @@ /* * Twofish for CryptoAPI * - * Originaly Twofish for GPG + * Originally Twofish for GPG * By Matthew Skala , July 26, 1998 * 256-bit key length added March 20, 1999 * Some modifications to reduce the text size by Werner Koch, April, 1998 @@ -514,7 +514,7 @@ static const u8 calc_sb_tbl[512] = { * preprocessed through q0 and q1 respectively; for longer keys they are the * output of previous stages. j is the index of the first key byte to use. * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2 - * twice, doing the Psuedo-Hadamard Transform, and doing the necessary + * twice, doing the Pseudo-Hadamard Transform, and doing the necessary * rotations. Its parameters are: a, the array to write the results into, * j, the index of the first output entry, k and l, the preprocessed indices * for index 2i, and m and n, the preprocessed indices for index 2i+1. -- cgit v1.2.3 From 3b5de441288610f5f3493eb483bad2246c0bab8a Mon Sep 17 00:00:00 2001 From: James Morris Date: Tue, 10 Aug 2004 04:24:16 -0700 Subject: [CRYPTO]: Typo in crypto/aes.c From Nicolas Kaiser Signed-off-by: James Morris Signed-off-by: David S. Miller --- crypto/aes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/aes.c b/crypto/aes.c index 13b6daa37089..3a26f9c99aee 100644 --- a/crypto/aes.c +++ b/crypto/aes.c @@ -160,7 +160,7 @@ gen_tabs (void) u8 p, q; /* log and power tables for GF(2**8) finite field with - 0x011b as modular polynomial - the simplest prmitive + 0x011b as modular polynomial - the simplest primitive root is 0x03, used here to generate the tables */ for (i = 0, p = 1; i < 256; ++i) { -- cgit v1.2.3 From ff40eaa155654529edc577ff4a43d42287b2f367 Mon Sep 17 00:00:00 2001 From: James Morris Date: Tue, 10 Aug 2004 04:24:48 -0700 Subject: [CRYPTO]: Typo in crypto/scatterwalk.c From Nicolas Kaiser Signed-off-by: James Morris Signed-off-by: David S. Miller --- crypto/scatterwalk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 540dd9ac1504..f6a5c9e5b2e0 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -70,7 +70,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out, { /* walk->data may be pointing the first byte of the next page; however, we know we transfered at least one byte. So, - walk->data - 1 will be a virutual address in the mapped page. */ + walk->data - 1 will be a virtual address in the mapped page. */ if (out) flush_dcache_page(walk->page); -- cgit v1.2.3 From 1dfa6ae264b457a84ccf4936985cc2f2ff46223c Mon Sep 17 00:00:00 2001 From: James Morris Date: Tue, 10 Aug 2004 04:25:19 -0700 Subject: [CRYPTO]: Typo in crypto/blowfish.c From Nicolas Kaiser Signed-off-by: James Morris Signed-off-by: David S. Miller --- crypto/blowfish.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crypto/blowfish.c b/crypto/blowfish.c index 8fd18dcdb679..b6bea821c05f 100644 --- a/crypto/blowfish.c +++ b/crypto/blowfish.c @@ -3,9 +3,9 @@ * * Blowfish Cipher Algorithm, by Bruce Schneier. * http://www.counterpane.com/blowfish.html - * - * Adapated from Kerneli implementation. - * + * + * Adapted from Kerneli implementation. + * * Copyright (c) Herbert Valerio Riedel * Copyright (c) Kyle McMartin * Copyright (c) 2002 James Morris -- cgit v1.2.3 From c079dba38bd6dd5fc322b7a1b58924cde6b29761 Mon Sep 17 00:00:00 2001 From: James Morris Date: Tue, 10 Aug 2004 04:25:56 -0700 Subject: [CRYPTO]: Typo in crypto/tcrypt.h From Nicolas Kaiser Signed-off-by: James Morris Signed-off-by: David S. Miller --- crypto/tcrypt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index 56a825d6539b..81791c1c0142 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -1186,7 +1186,7 @@ struct cipher_testvec tf_cbc_dec_tv_template[] = { /* * Serpent test vectors. These are backwards because Serpent writes - * octect sequences in right-to-left mode. + * octet sequences in right-to-left mode. */ #define SERPENT_ENC_TEST_VECTORS 4 #define SERPENT_DEC_TEST_VECTORS 4 -- cgit v1.2.3 From b4977b447e571b32a0ff1ac8a38bb6fcffa83ad9 Mon Sep 17 00:00:00 2001 From: Jon Oberheide Date: Tue, 10 Aug 2004 04:29:42 -0700 Subject: [CRYPTO]: Email update in crypto/arc4.c --- crypto/arc4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/arc4.c b/crypto/arc4.c index b66eb1f0d001..9efbcaae88a1 100644 --- a/crypto/arc4.c +++ b/crypto/arc4.c @@ -3,7 +3,7 @@ * * ARC4 Cipher Algorithm * - * Jon Oberheide + * Jon Oberheide * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -100,4 +100,4 @@ module_exit(arc4_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ARC4 Cipher Algorithm"); -MODULE_AUTHOR("Jon Oberheide "); +MODULE_AUTHOR("Jon Oberheide "); -- cgit v1.2.3 From 970ee0ae5892212df3a1a283e578ed0485b75868 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Tue, 10 Aug 2004 04:36:25 -0700 Subject: [EBTABLES]: Remove deprecated use of MODULE_PARM. From Stephen Hemminger Minor patch to change from MODULE_PARM to module_param. Using a char for a flag is not necessary, every else just uses an int. Signed-off-by: Stephen Hemminger Signed-off-by: Bart De Schuymer Signed-off-by: David S. Miller --- net/bridge/netfilter/ebt_vlan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index ec111772bbe9..d0853cfc157e 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -21,13 +21,14 @@ #include #include #include +#include #include #include -static unsigned char debug; +static int debug; #define MODULE_VERS "0.6" -MODULE_PARM(debug, "0-1b"); +module_param(debug, int, 0); MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages"); MODULE_AUTHOR("Nick Fedchik "); MODULE_DESCRIPTION("802.1Q match module (ebtables extension), v" -- cgit v1.2.3 From d98e8833683a51ca67ac9efc4edbd837d7282b54 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Aug 2004 06:11:53 -0700 Subject: [IPV4]: Remove all references to IP_ROUTE_NAT support. It is totally broken, nobody is fixing it, and whoever wants to can resurrect it. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 - include/net/route.h | 5 -- net/ipv4/Kconfig | 10 --- net/ipv4/fib_rules.c | 23 +------ net/ipv4/fib_semantics.c | 22 ------- net/ipv4/icmp.c | 10 --- net/ipv4/ip_nat_dumb.c | 166 ----------------------------------------------- net/ipv4/route.c | 54 +-------------- 8 files changed, 2 insertions(+), 289 deletions(-) delete mode 100644 net/ipv4/ip_nat_dumb.c diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f14edafeabbd..59825c399e15 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -242,7 +242,6 @@ extern u32 fib_rules_map_destination(u32 daddr, struct fib_result *res); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); #endif -extern u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags); extern void fib_rules_init(void); #endif diff --git a/include/net/route.h b/include/net/route.h index a5e9c575ea3e..5e0100185d95 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -73,11 +73,6 @@ struct rtable /* Miscellaneous cached information */ __u32 rt_spec_dst; /* RFC1122 specific destination */ struct inet_peer *peer; /* long-living peer info */ - -#ifdef CONFIG_IP_ROUTE_NAT - __u32 rt_src_map; - __u32 rt_dst_map; -#endif }; struct ip_rt_acct diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index de00c668c98c..e9e9f2c982a9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -82,16 +82,6 @@ config IP_ROUTE_FWMARK If you say Y here, you will be able to specify different routes for packets with different mark values (see iptables(8), MARK target). -config IP_ROUTE_NAT - bool "IP: fast network address translation" - depends on IP_MULTIPLE_TABLES - help - If you say Y here, your router will be able to modify source and - destination addresses of packets that pass through it, in a manner - you specify. General information about Network Address Translation - can be gotten from the document - . - config IP_ROUTE_MULTIPATH bool "IP: equal cost multipath" depends on IP_ADVANCED_ROUTER diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 66e78bb4e2d4..ad2481f8fa68 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -176,7 +176,7 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) table_id = rtm->rtm_table; if (table_id == RT_TABLE_UNSPEC) { struct fib_table *table; - if (rtm->rtm_type == RTN_UNICAST || rtm->rtm_type == RTN_NAT) { + if (rtm->rtm_type == RTN_UNICAST) { if ((table = fib_empty_table()) == NULL) return -ENOBUFS; table_id = table->tb_id; @@ -251,26 +251,6 @@ u32 fib_rules_map_destination(u32 daddr, struct fib_result *res) return (daddr&~mask)|res->fi->fib_nh->nh_gw; } -u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags) -{ - struct fib_rule *r = res->r; - - if (r->r_action == RTN_NAT) { - int addrtype = inet_addr_type(r->r_srcmap); - - if (addrtype == RTN_NAT) { - /* Packet is from translated source; remember it */ - saddr = (saddr&~r->r_srcmask)|r->r_srcmap; - *flags |= RTCF_SNAT; - } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { - /* Packet is from masqueraded source; remember it */ - saddr = r->r_srcmap; - *flags |= RTCF_MASQ; - } - } - return saddr; -} - #ifdef CONFIG_NET_CLS_ROUTE u32 fib_rules_tclass(struct fib_result *res) { @@ -334,7 +314,6 @@ FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", FRprintk("tb %d r %d ", r->r_table, r->r_action); switch (r->r_action) { case RTN_UNICAST: - case RTN_NAT: policy = r; break; case RTN_UNREACHABLE: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c1b6060df3f1..51191971eb12 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -124,17 +124,10 @@ static struct .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE, }, /* RTN_THROW */ -#ifdef CONFIG_IP_ROUTE_NAT - { - .error = 0, - .scope = RT_SCOPE_HOST, - }, /* RTN_NAT */ -#else { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, }, /* RTN_NAT */ -#endif { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, @@ -543,15 +536,6 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, #endif } -#ifdef CONFIG_IP_ROUTE_NAT - if (r->rtm_type == RTN_NAT) { - if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) - goto err_inval; - memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4); - goto link_it; - } -#endif - if (fib_props[r->rtm_type].error) { if (rta->rta_gw || rta->rta_oif || rta->rta_mp) goto err_inval; @@ -629,12 +613,6 @@ fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struc res->fi = fi; switch (type) { -#ifdef CONFIG_IP_ROUTE_NAT - case RTN_NAT: - FIB_RES_RESET(*res); - atomic_inc(&fi->fib_clntref); - return 0; -#endif case RTN_UNICAST: case RTN_LOCAL: case RTN_BROADCAST: diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 69261324d4b4..c19758c36310 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -503,16 +503,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) * Construct source address and options. */ -#ifdef CONFIG_IP_ROUTE_NAT - /* - * Restore original addresses if packet has been translated. - */ - if (rt->rt_flags & RTCF_NAT && IPCB(skb_in)->flags & IPSKB_TRANSLATED) { - iph->daddr = rt->fl.fl4_dst; - iph->saddr = rt->fl.fl4_src; - } -#endif - saddr = iph->daddr; if (!(rt->rt_flags & RTCF_LOCAL)) saddr = 0; diff --git a/net/ipv4/ip_nat_dumb.c b/net/ipv4/ip_nat_dumb.c deleted file mode 100644 index b58b5e22d019..000000000000 --- a/net/ipv4/ip_nat_dumb.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Dumb Network Address Translation. - * - * Version: $Id: ip_nat_dumb.c,v 1.11 2000/12/13 18:31:48 davem Exp $ - * - * Authors: Alexey Kuznetsov, - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Fixes: - * Rani Assaf : A zero checksum is a special case - * only in UDP - * Rani Assaf : Added ICMP messages rewriting - * Rani Assaf : Repaired wrong changes, made by ANK. - * - * - * NOTE: It is just working model of real NAT. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -int -ip_do_nat(struct sk_buff *skb) -{ - struct rtable *rt = (struct rtable*)skb->dst; - struct iphdr *iph = skb->nh.iph; - u32 odaddr = iph->daddr; - u32 osaddr = iph->saddr; - u16 check; - - IPCB(skb)->flags |= IPSKB_TRANSLATED; - - /* Rewrite IP header */ - iph->daddr = rt->rt_dst_map; - iph->saddr = rt->rt_src_map; - iph->check = 0; - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - - /* If it is the first fragment, rewrite protocol headers */ - - if (!(iph->frag_off & htons(IP_OFFSET))) { - u16 *cksum; - - switch(iph->protocol) { - case IPPROTO_TCP: - cksum = (u16*)&((struct tcphdr*)(((char*)iph) + (iph->ihl<<2)))->check; - if ((u8*)(cksum+1) > skb->tail) - goto truncated; - check = *cksum; - if (skb->ip_summed != CHECKSUM_HW) - check = ~check; - check = csum_tcpudp_magic(iph->saddr, iph->daddr, 0, 0, check); - check = csum_tcpudp_magic(~osaddr, ~odaddr, 0, 0, ~check); - if (skb->ip_summed == CHECKSUM_HW) - check = ~check; - *cksum = check; - break; - case IPPROTO_UDP: - cksum = (u16*)&((struct udphdr*)(((char*)iph) + (iph->ihl<<2)))->check; - if ((u8*)(cksum+1) > skb->tail) - goto truncated; - if ((check = *cksum) != 0) { - check = csum_tcpudp_magic(iph->saddr, iph->daddr, 0, 0, ~check); - check = csum_tcpudp_magic(~osaddr, ~odaddr, 0, 0, ~check); - *cksum = check ? : 0xFFFF; - } - break; - case IPPROTO_ICMP: - { - struct icmphdr *icmph = (struct icmphdr*)((char*)iph + (iph->ihl<<2)); - struct iphdr *ciph; - u32 idaddr, isaddr; - int updated; - - if ((icmph->type != ICMP_DEST_UNREACH) && - (icmph->type != ICMP_TIME_EXCEEDED) && - (icmph->type != ICMP_PARAMETERPROB)) - break; - - ciph = (struct iphdr *) (icmph + 1); - - if ((u8*)(ciph+1) > skb->tail) - goto truncated; - - isaddr = ciph->saddr; - idaddr = ciph->daddr; - updated = 0; - - if (rt->rt_flags&RTCF_DNAT && ciph->saddr == odaddr) { - ciph->saddr = iph->daddr; - updated = 1; - } - if (rt->rt_flags&RTCF_SNAT) { - if (ciph->daddr != osaddr) { - struct fib_result res; - unsigned flags = 0; - struct flowi fl = { - .iif = skb->dev->ifindex, - .nl_u = - { .ip4_u = - { .daddr = ciph->saddr, - .saddr = ciph->daddr, -#ifdef CONFIG_IP_ROUTE_TOS - .tos = RT_TOS(ciph->tos) -#endif - } }, - .proto = ciph->protocol }; - - /* Use fib_lookup() until we get our own - * hash table of NATed hosts -- Rani - */ - if (fib_lookup(&fl, &res) == 0) { - if (res.r) { - ciph->daddr = fib_rules_policy(ciph->daddr, &res, &flags); - if (ciph->daddr != idaddr) - updated = 1; - } - fib_res_put(&res); - } - } else { - ciph->daddr = iph->saddr; - updated = 1; - } - } - if (updated) { - cksum = &icmph->checksum; - /* Using tcpudp primitive. Why not? */ - check = csum_tcpudp_magic(ciph->saddr, ciph->daddr, 0, 0, ~(*cksum)); - *cksum = csum_tcpudp_magic(~isaddr, ~idaddr, 0, 0, ~check); - } - break; - } - default: - break; - } - } - return NET_RX_SUCCESS; - -truncated: - /* should be return NET_RX_BAD; */ - return -EINVAL; -} diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 15d9eca5384e..44d8056034fc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1387,13 +1387,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt->fl.iif == 0) src = rt->rt_src; else if (fib_lookup(&rt->fl, &res) == 0) { -#ifdef CONFIG_IP_ROUTE_NAT - if (res.type == RTN_NAT) - src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, - RT_SCOPE_UNIVERSE); - else -#endif - src = FIB_RES_PREFSRC(res); + src = FIB_RES_PREFSRC(res); fib_res_put(&res); } else src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, @@ -1497,10 +1491,6 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, #endif rth->fl.fl4_src = saddr; rth->rt_src = saddr; -#ifdef CONFIG_IP_ROUTE_NAT - rth->rt_dst_map = daddr; - rth->rt_src_map = saddr; -#endif #ifdef CONFIG_NET_CLS_ROUTE rth->u.dst.tclassid = itag; #endif @@ -1610,31 +1600,6 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, RT_CACHE_STAT_INC(in_slow_tot); -#ifdef CONFIG_IP_ROUTE_NAT - /* Policy is applied before mapping destination, - but rerouting after map should be made with old source. - */ - - if (1) { - u32 src_map = saddr; - if (res.r) - src_map = fib_rules_policy(saddr, &res, &flags); - - if (res.type == RTN_NAT) { - fl.fl4_dst = fib_rules_map_destination(daddr, &res); - fib_res_put(&res); - free_res = 0; - if (fib_lookup(&fl, &res)) - goto e_inval; - free_res = 1; - if (res.type != RTN_UNICAST) - goto e_inval; - flags |= RTCF_DNAT; - } - fl.fl4_src = src_map; - } -#endif - if (res.type == RTN_BROADCAST) goto brd_input; @@ -1708,12 +1673,6 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, rth->fl.fl4_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; -#ifdef CONFIG_IP_ROUTE_NAT - rth->rt_src_map = fl.fl4_src; - rth->rt_dst_map = fl.fl4_dst; - if (flags&RTCF_DNAT) - rth->rt_gateway = fl.fl4_dst; -#endif rth->rt_iif = rth->fl.iif = dev->ifindex; rth->u.dst.dev = out_dev->dev; @@ -1776,10 +1735,6 @@ local_input: #endif rth->fl.fl4_src = saddr; rth->rt_src = saddr; -#ifdef CONFIG_IP_ROUTE_NAT - rth->rt_dst_map = fl.fl4_dst; - rth->rt_src_map = fl.fl4_src; -#endif #ifdef CONFIG_NET_CLS_ROUTE rth->u.dst.tclassid = itag; #endif @@ -2072,9 +2027,6 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) } free_res = 1; - if (res.type == RTN_NAT) - goto e_inval; - if (res.type == RTN_LOCAL) { if (!fl.fl4_src) fl.fl4_src = fl.fl4_dst; @@ -2164,10 +2116,6 @@ make_route: #endif rth->rt_dst = fl.fl4_dst; rth->rt_src = fl.fl4_src; -#ifdef CONFIG_IP_ROUTE_NAT - rth->rt_dst_map = fl.fl4_dst; - rth->rt_src_map = fl.fl4_src; -#endif rth->rt_iif = oldflp->oif ? : dev_out->ifindex; rth->u.dst.dev = dev_out; dev_hold(dev_out); -- cgit v1.2.3 From 1bffa251c91cbd6ba0c5449286a57cb9059496da Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Aug 2004 02:42:07 -0700 Subject: [IPV4]: Move inetdev/ifa locking over to RCU. Multicast ipv4 address handling still uses rwlock and spinlock synchronization. Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 28 +++++++----- net/ipv4/devinet.c | 112 ++++++++++++++++++++------------------------- net/ipv4/fib_frontend.c | 4 +- net/ipv4/icmp.c | 4 +- net/ipv4/igmp.c | 92 +++++++++++++++++++------------------ net/ipv4/route.c | 6 +-- net/irda/irlan/irlan_eth.c | 4 +- net/sctp/protocol.c | 8 ++-- 8 files changed, 125 insertions(+), 133 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 27a5d0a97dbc..29d1135f1201 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -3,6 +3,8 @@ #ifdef __KERNEL__ +#include + struct ipv4_devconf { int accept_redirects; @@ -31,13 +33,13 @@ extern struct ipv4_devconf ipv4_devconf; struct in_device { - struct net_device *dev; + struct net_device *dev; atomic_t refcnt; - rwlock_t lock; int dead; struct in_ifaddr *ifa_list; /* IP ifaddr chain */ + rwlock_t mc_list_lock; struct ip_mc_list *mc_list; /* IP multicast filter chain */ - rwlock_t mc_lock; /* for mc_tomb */ + spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; unsigned long mr_v2_seen; @@ -50,6 +52,7 @@ struct in_device struct neigh_parms *arp_parms; struct ipv4_devconf cnf; + struct rcu_head rcu_head; }; #define IN_DEV_FORWARD(in_dev) ((in_dev)->cnf.forwarding) @@ -80,6 +83,7 @@ struct in_ifaddr { struct in_ifaddr *ifa_next; struct in_device *ifa_dev; + struct rcu_head rcu_head; u32 ifa_local; u32 ifa_address; u32 ifa_mask; @@ -133,19 +137,16 @@ static __inline__ int bad_mask(u32 mask, u32 addr) #define endfor_ifa(in_dev) } -extern rwlock_t inetdev_lock; - - static __inline__ struct in_device * in_dev_get(const struct net_device *dev) { struct in_device *in_dev; - read_lock(&inetdev_lock); + rcu_read_lock(); in_dev = dev->ip_ptr; if (in_dev) atomic_inc(&in_dev->refcnt); - read_unlock(&inetdev_lock); + rcu_read_unlock(); return in_dev; } @@ -157,11 +158,16 @@ __in_dev_get(const struct net_device *dev) extern void in_dev_finish_destroy(struct in_device *idev); -static __inline__ void -in_dev_put(struct in_device *idev) +static inline void in_dev_rcu_destroy(struct rcu_head *head) +{ + struct in_device *idev = container_of(head, struct in_device, rcu_head); + in_dev_finish_destroy(idev); +} + +static inline void in_dev_put(struct in_device *idev) { if (atomic_dec_and_test(&idev->refcnt)) - in_dev_finish_destroy(idev); + call_rcu(&idev->rcu_head, in_dev_rcu_destroy); } #define __in_dev_put(idev) atomic_dec(&(idev)->refcnt) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 4781dea42dfe..9640915a0717 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -88,12 +88,9 @@ static void devinet_sysctl_register(struct in_device *in_dev, static void devinet_sysctl_unregister(struct ipv4_devconf *p); #endif -int inet_ifa_count; -int inet_dev_count; - /* Locks all the inet devices. */ -rwlock_t inetdev_lock = RW_LOCK_UNLOCKED; +static spinlock_t inetdev_lock = SPIN_LOCK_UNLOCKED; static struct in_ifaddr *inet_alloc_ifa(void) { @@ -101,18 +98,24 @@ static struct in_ifaddr *inet_alloc_ifa(void) if (ifa) { memset(ifa, 0, sizeof(*ifa)); - inet_ifa_count++; + INIT_RCU_HEAD(&ifa->rcu_head); } return ifa; } -static __inline__ void inet_free_ifa(struct in_ifaddr *ifa) +static inline void inet_free_ifa(struct in_ifaddr *ifa) { if (ifa->ifa_dev) - __in_dev_put(ifa->ifa_dev); + in_dev_put(ifa->ifa_dev); kfree(ifa); - inet_ifa_count--; +} + +static void inet_rcu_free_ifa(struct rcu_head *head) +{ + struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); + + inet_free_ifa(ifa); } void in_dev_finish_destroy(struct in_device *idev) @@ -129,7 +132,6 @@ void in_dev_finish_destroy(struct in_device *idev) if (!idev->dead) printk("Freeing alive in_device %p\n", idev); else { - inet_dev_count--; kfree(idev); } } @@ -144,24 +146,23 @@ struct in_device *inetdev_init(struct net_device *dev) if (!in_dev) goto out; memset(in_dev, 0, sizeof(*in_dev)); - in_dev->lock = RW_LOCK_UNLOCKED; + INIT_RCU_HEAD(&in_dev->rcu_head); memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) goto out_kfree; - inet_dev_count++; /* Reference in_dev->dev */ dev_hold(dev); #ifdef CONFIG_SYSCTL neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL); #endif - write_lock_bh(&inetdev_lock); + spin_lock_bh(&inetdev_lock); dev->ip_ptr = in_dev; /* Account for reference dev->ip_ptr */ in_dev_hold(in_dev); - write_unlock_bh(&inetdev_lock); + spin_unlock_bh(&inetdev_lock); #ifdef CONFIG_SYSCTL devinet_sysctl_register(in_dev, &in_dev->cnf); #endif @@ -188,16 +189,16 @@ static void inetdev_destroy(struct in_device *in_dev) while ((ifa = in_dev->ifa_list) != NULL) { inet_del_ifa(in_dev, &in_dev->ifa_list, 0); - inet_free_ifa(ifa); + call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); } #ifdef CONFIG_SYSCTL devinet_sysctl_unregister(&in_dev->cnf); #endif - write_lock_bh(&inetdev_lock); + spin_lock_bh(&inetdev_lock); in_dev->dev->ip_ptr = NULL; /* in_dev_put following below will kill the in_device */ - write_unlock_bh(&inetdev_lock); + spin_unlock_bh(&inetdev_lock); #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(in_dev->arp_parms); @@ -208,16 +209,16 @@ static void inetdev_destroy(struct in_device *in_dev) int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) { - read_lock(&in_dev->lock); + rcu_read_lock(); for_primary_ifa(in_dev) { if (inet_ifa_match(a, ifa)) { if (!b || inet_ifa_match(b, ifa)) { - read_unlock(&in_dev->lock); + rcu_read_unlock(); return 1; } } } endfor_ifa(in_dev); - read_unlock(&in_dev->lock); + rcu_read_unlock(); return 0; } @@ -241,21 +242,21 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, ifap1 = &ifa->ifa_next; continue; } - write_lock_bh(&in_dev->lock); + spin_lock_bh(&inetdev_lock); *ifap1 = ifa->ifa_next; - write_unlock_bh(&in_dev->lock); + spin_unlock_bh(&inetdev_lock); rtmsg_ifa(RTM_DELADDR, ifa); notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); - inet_free_ifa(ifa); + call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); } } /* 2. Unlink it */ - write_lock_bh(&in_dev->lock); + spin_lock_bh(&inetdev_lock); *ifap = ifa1->ifa_next; - write_unlock_bh(&in_dev->lock); + spin_unlock_bh(&inetdev_lock); /* 3. Announce address deletion */ @@ -270,7 +271,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, rtmsg_ifa(RTM_DELADDR, ifa1); notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (destroy) { - inet_free_ifa(ifa1); + call_rcu(&ifa1->rcu_head, inet_rcu_free_ifa); if (!in_dev->ifa_list) inetdev_destroy(in_dev); @@ -285,7 +286,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) ASSERT_RTNL(); if (!ifa->ifa_local) { - inet_free_ifa(ifa); + call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); return 0; } @@ -300,11 +301,11 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) { if (ifa1->ifa_local == ifa->ifa_local) { - inet_free_ifa(ifa); + call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); return -EEXIST; } if (ifa1->ifa_scope != ifa->ifa_scope) { - inet_free_ifa(ifa); + call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); return -EINVAL; } ifa->ifa_flags |= IFA_F_SECONDARY; @@ -317,9 +318,9 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) } ifa->ifa_next = *ifap; - write_lock_bh(&in_dev->lock); + spin_lock_bh(&inetdev_lock); *ifap = ifa; - write_unlock_bh(&in_dev->lock); + spin_unlock_bh(&inetdev_lock); /* Send message first, then call notifier. Notifier will trigger FIB update, so that @@ -339,7 +340,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) if (!in_dev) { in_dev = inetdev_init(dev); if (!in_dev) { - inet_free_ifa(ifa); + call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); return -ENOBUFS; } } @@ -771,12 +772,11 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) u32 addr = 0; struct in_device *in_dev; - read_lock(&inetdev_lock); + rcu_read_lock(); in_dev = __in_dev_get(dev); if (!in_dev) goto out_unlock_inetdev; - read_lock(&in_dev->lock); for_primary_ifa(in_dev) { if (ifa->ifa_scope > scope) continue; @@ -787,8 +787,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) if (!addr) addr = ifa->ifa_local; } endfor_ifa(in_dev); - read_unlock(&in_dev->lock); - read_unlock(&inetdev_lock); + rcu_read_unlock(); if (addr) goto out; @@ -798,30 +797,25 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) in dev_base list. */ read_lock(&dev_base_lock); - read_lock(&inetdev_lock); + rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { if ((in_dev = __in_dev_get(dev)) == NULL) continue; - read_lock(&in_dev->lock); for_primary_ifa(in_dev) { if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) { - read_unlock(&in_dev->lock); addr = ifa->ifa_local; goto out_unlock_both; } } endfor_ifa(in_dev); - read_unlock(&in_dev->lock); } out_unlock_both: - read_unlock(&inetdev_lock); read_unlock(&dev_base_lock); +out_unlock_inetdev: + rcu_read_unlock(); out: return addr; -out_unlock_inetdev: - read_unlock(&inetdev_lock); - goto out; } static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, @@ -874,29 +868,24 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop struct in_device *in_dev; if (dev) { - read_lock(&inetdev_lock); - if ((in_dev = __in_dev_get(dev))) { - read_lock(&in_dev->lock); + rcu_read_lock(); + if ((in_dev = __in_dev_get(dev))) addr = confirm_addr_indev(in_dev, dst, local, scope); - read_unlock(&in_dev->lock); - } - read_unlock(&inetdev_lock); + rcu_read_unlock(); return addr; } read_lock(&dev_base_lock); - read_lock(&inetdev_lock); + rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { if ((in_dev = __in_dev_get(dev))) { - read_lock(&in_dev->lock); addr = confirm_addr_indev(in_dev, dst, local, scope); - read_unlock(&in_dev->lock); if (addr) break; } } - read_unlock(&inetdev_lock); + rcu_read_unlock(); read_unlock(&dev_base_lock); return addr; @@ -1065,12 +1054,12 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) continue; if (idx > s_idx) s_ip_idx = 0; - read_lock(&inetdev_lock); + rcu_read_lock(); if ((in_dev = __in_dev_get(dev)) == NULL) { - read_unlock(&inetdev_lock); + rcu_read_unlock(); continue; } - read_lock(&in_dev->lock); + for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { if (ip_idx < s_ip_idx) @@ -1078,13 +1067,11 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR) <= 0) { - read_unlock(&in_dev->lock); - read_unlock(&inetdev_lock); + rcu_read_unlock(); goto done; } } - read_unlock(&in_dev->lock); - read_unlock(&inetdev_lock); + rcu_read_unlock(); } done: @@ -1138,11 +1125,11 @@ void inet_forward_change(void) read_lock(&dev_base_lock); for (dev = dev_base; dev; dev = dev->next) { struct in_device *in_dev; - read_lock(&inetdev_lock); + rcu_read_lock(); in_dev = __in_dev_get(dev); if (in_dev) in_dev->cnf.forwarding = on; - read_unlock(&inetdev_lock); + rcu_read_unlock(); } read_unlock(&dev_base_lock); @@ -1508,6 +1495,5 @@ EXPORT_SYMBOL(devinet_ioctl); EXPORT_SYMBOL(in_dev_finish_destroy); EXPORT_SYMBOL(inet_select_addr); EXPORT_SYMBOL(inetdev_by_index); -EXPORT_SYMBOL(inetdev_lock); EXPORT_SYMBOL(register_inetaddr_notifier); EXPORT_SYMBOL(unregister_inetaddr_notifier); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f5b008a9d7d0..f13e797c32e8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -172,13 +172,13 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, int ret; no_addr = rpf = 0; - read_lock(&inetdev_lock); + rcu_read_lock(); in_dev = __in_dev_get(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); } - read_unlock(&inetdev_lock); + rcu_read_unlock(); if (in_dev == NULL) goto e_inval; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c19758c36310..0d68d99daad2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -878,7 +878,7 @@ static void icmp_address_reply(struct sk_buff *skb) in_dev = in_dev_get(dev); if (!in_dev) goto out; - read_lock(&in_dev->lock); + rcu_read_lock(); if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { @@ -895,7 +895,7 @@ static void icmp_address_reply(struct sk_buff *skb) NIPQUAD(mask), dev->name, NIPQUAD(rt->rt_src)); } } - read_unlock(&in_dev->lock); + rcu_read_unlock(); in_dev_put(in_dev); out:; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 01db76123d88..d1815d3efd6c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -487,7 +487,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) int type; if (!pmc) { - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; @@ -499,7 +499,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); } else { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -541,8 +541,8 @@ static void igmpv3_send_cr(struct in_device *in_dev) struct sk_buff *skb = NULL; int type, dtype; - read_lock(&in_dev->lock); - write_lock_bh(&in_dev->mc_lock); + read_lock(&in_dev->mc_list_lock); + spin_lock_bh(&in_dev->mc_tomb_lock); /* deleted MCA's */ pmc_prev = NULL; @@ -575,7 +575,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) } else pmc_prev = pmc; } - write_unlock_bh(&in_dev->mc_lock); + spin_unlock_bh(&in_dev->mc_tomb_lock); /* change recs */ for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { @@ -601,7 +601,8 @@ static void igmpv3_send_cr(struct in_device *in_dev) } spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); + if (!skb) return; (void) igmpv3_sendpack(skb); @@ -759,14 +760,14 @@ static void igmp_heard_report(struct in_device *in_dev, u32 group) if (group == IGMP_ALL_HOSTS) return; - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); for (im=in_dev->mc_list; im!=NULL; im=im->next) { if (im->multiaddr == group) { igmp_stop_timer(im); break; } } - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); } static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, @@ -840,7 +841,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * - Use the igmp->igmp_code field as the maximum * delay possible */ - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); for (im=in_dev->mc_list; im!=NULL; im=im->next) { if (group && group != im->multiaddr) continue; @@ -856,7 +857,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, spin_unlock_bh(&im->lock); igmp_mod_timer(im, max_delay); } - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); } int igmp_rcv(struct sk_buff *skb) @@ -982,10 +983,10 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) } spin_unlock_bh(&im->lock); - write_lock_bh(&in_dev->mc_lock); + spin_lock_bh(&in_dev->mc_tomb_lock); pmc->next = in_dev->mc_tomb; in_dev->mc_tomb = pmc; - write_unlock_bh(&in_dev->mc_lock); + spin_unlock_bh(&in_dev->mc_tomb_lock); } static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr) @@ -993,7 +994,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr) struct ip_mc_list *pmc, *pmc_prev; struct ip_sf_list *psf, *psf_next; - write_lock_bh(&in_dev->mc_lock); + spin_lock_bh(&in_dev->mc_tomb_lock); pmc_prev = NULL; for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) { if (pmc->multiaddr == multiaddr) @@ -1006,7 +1007,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr) else in_dev->mc_tomb = pmc->next; } - write_unlock_bh(&in_dev->mc_lock); + spin_unlock_bh(&in_dev->mc_tomb_lock); if (pmc) { for (psf=pmc->tomb; psf; psf=psf_next) { psf_next = psf->sf_next; @@ -1021,10 +1022,10 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) { struct ip_mc_list *pmc, *nextpmc; - write_lock_bh(&in_dev->mc_lock); + spin_lock_bh(&in_dev->mc_tomb_lock); pmc = in_dev->mc_tomb; in_dev->mc_tomb = NULL; - write_unlock_bh(&in_dev->mc_lock); + spin_unlock_bh(&in_dev->mc_tomb_lock); for (; pmc; pmc = nextpmc) { nextpmc = pmc->next; @@ -1033,7 +1034,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(pmc); } /* clear dead sources, too */ - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { struct ip_sf_list *psf, *psf_next; @@ -1046,7 +1047,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(psf); } } - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); } #endif @@ -1167,10 +1168,10 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr) im->gsquery = 0; #endif im->loaded = 0; - write_lock_bh(&in_dev->lock); + write_lock_bh(&in_dev->mc_list_lock); im->next=in_dev->mc_list; in_dev->mc_list=im; - write_unlock_bh(&in_dev->lock); + write_unlock_bh(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1194,9 +1195,9 @@ void ip_mc_dec_group(struct in_device *in_dev, u32 addr) for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { if (i->multiaddr==addr) { if (--i->users == 0) { - write_lock_bh(&in_dev->lock); + write_lock_bh(&in_dev->mc_list_lock); *ip = i->next; - write_unlock_bh(&in_dev->lock); + write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); if (!in_dev->dead) @@ -1251,7 +1252,8 @@ void ip_mc_init_dev(struct in_device *in_dev) in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; #endif - in_dev->mc_lock = RW_LOCK_UNLOCKED; + in_dev->mc_list_lock = RW_LOCK_UNLOCKED; + in_dev->mc_tomb_lock = SPIN_LOCK_UNLOCKED; } /* Device going up */ @@ -1281,17 +1283,17 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); - write_lock_bh(&in_dev->lock); + write_lock_bh(&in_dev->mc_list_lock); while ((i = in_dev->mc_list) != NULL) { in_dev->mc_list = i->next; - write_unlock_bh(&in_dev->lock); + write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); ip_ma_put(i); - write_lock_bh(&in_dev->lock); + write_lock_bh(&in_dev->mc_list_lock); } - write_unlock_bh(&in_dev->lock); + write_unlock_bh(&in_dev->mc_list_lock); } static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) @@ -1391,18 +1393,18 @@ int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); #endif @@ -1527,18 +1529,18 @@ int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); @@ -2095,7 +2097,7 @@ int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto) struct ip_sf_list *psf; int rv = 0; - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); for (im=in_dev->mc_list; im; im=im->next) { if (im->multiaddr == mc_addr) break; @@ -2117,7 +2119,7 @@ int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto) } else rv = 1; /* unspecified source; tentatively allow */ } - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); return rv; } @@ -2141,13 +2143,13 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) in_dev = in_dev_get(state->dev); if (!in_dev) continue; - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); im = in_dev->mc_list; if (im) { state->in_dev = in_dev; break; } - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); in_dev_put(in_dev); } return im; @@ -2159,7 +2161,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li im = im->next; while (!im) { if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->lock); + read_unlock(&state->in_dev->mc_list_lock); in_dev_put(state->in_dev); } state->dev = state->dev->next; @@ -2170,7 +2172,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li state->in_dev = in_dev_get(state->dev); if (!state->in_dev) continue; - read_lock(&state->in_dev->lock); + read_lock(&state->in_dev->mc_list_lock); im = state->in_dev->mc_list; } return im; @@ -2206,7 +2208,7 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->lock); + read_unlock(&state->in_dev->mc_list_lock); in_dev_put(state->in_dev); state->in_dev = NULL; } @@ -2304,7 +2306,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) idev = in_dev_get(state->dev); if (unlikely(idev == NULL)) continue; - read_lock_bh(&idev->lock); + read_lock(&idev->mc_list_lock); im = idev->mc_list; if (likely(im != NULL)) { spin_lock_bh(&im->lock); @@ -2316,7 +2318,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) } spin_unlock_bh(&im->lock); } - read_unlock_bh(&idev->lock); + read_unlock(&idev->mc_list_lock); in_dev_put(idev); } return psf; @@ -2332,7 +2334,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l state->im = state->im->next; while (!state->im) { if (likely(state->idev != NULL)) { - read_unlock_bh(&state->idev->lock); + read_unlock(&state->idev->mc_list_lock); in_dev_put(state->idev); } state->dev = state->dev->next; @@ -2343,7 +2345,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l state->idev = in_dev_get(state->dev); if (!state->idev) continue; - read_lock_bh(&state->idev->lock); + read_lock(&state->idev->mc_list_lock); state->im = state->idev->mc_list; } if (!state->im) @@ -2389,7 +2391,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) state->im = NULL; } if (likely(state->idev != NULL)) { - read_unlock_bh(&state->idev->lock); + read_unlock(&state->idev->mc_list_lock); in_dev_put(state->idev); state->idev = NULL; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 44d8056034fc..1a88f6aeb108 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1855,7 +1855,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, if (MULTICAST(daddr)) { struct in_device *in_dev; - read_lock(&inetdev_lock); + rcu_read_lock(); if ((in_dev = __in_dev_get(dev)) != NULL) { int our = ip_check_mc(in_dev, daddr, saddr, skb->nh.iph->protocol); @@ -1864,12 +1864,12 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) #endif ) { - read_unlock(&inetdev_lock); + rcu_read_unlock(); return ip_route_input_mc(skb, daddr, saddr, tos, dev, our); } } - read_unlock(&inetdev_lock); + rcu_read_unlock(); return -EINVAL; } return ip_route_input_slow(skb, daddr, saddr, tos, dev); diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 11a2955f8e09..272bb36c9ce1 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -306,7 +306,7 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev) in_dev = in_dev_get(dev); if (in_dev == NULL) return; - read_lock(&in_dev->lock); + rcu_read_lock(); if (in_dev->ifa_list) arp_send(ARPOP_REQUEST, ETH_P_ARP, @@ -314,7 +314,7 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev) dev, in_dev->ifa_list->ifa_address, NULL, dev->dev_addr, NULL); - read_unlock(&in_dev->lock); + rcu_read_unlock(); in_dev_put(in_dev); #endif /* CONFIG_INET */ } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 16a57cf9bcca..bae07708eb01 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -148,13 +148,12 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, struct in_ifaddr *ifa; struct sctp_sockaddr_entry *addr; - read_lock(&inetdev_lock); + rcu_read_lock(); if ((in_dev = __in_dev_get(dev)) == NULL) { - read_unlock(&inetdev_lock); + rcu_read_unlock(); return; } - read_lock(&in_dev->lock); for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { /* Add the address to the local list. */ addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); @@ -166,8 +165,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, } } - read_unlock(&in_dev->lock); - read_unlock(&inetdev_lock); + rcu_read_unlock(); } /* Extract our IP addresses from the system and stash them in the -- cgit v1.2.3 From f2868da999e97e006dd1fa875986aee692172d55 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 13 Aug 2004 01:13:55 -0700 Subject: [IPV4]: Fix race in inetdev RCU handling. --- include/linux/inetdevice.h | 8 +------- net/ipv4/devinet.c | 8 +++++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 29d1135f1201..ec751e9fb1c2 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -158,16 +158,10 @@ __in_dev_get(const struct net_device *dev) extern void in_dev_finish_destroy(struct in_device *idev); -static inline void in_dev_rcu_destroy(struct rcu_head *head) -{ - struct in_device *idev = container_of(head, struct in_device, rcu_head); - in_dev_finish_destroy(idev); -} - static inline void in_dev_put(struct in_device *idev) { if (atomic_dec_and_test(&idev->refcnt)) - call_rcu(&idev->rcu_head, in_dev_rcu_destroy); + in_dev_finish_destroy(idev); } #define __in_dev_put(idev) atomic_dec(&(idev)->refcnt) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9640915a0717..70c54ca0c5d9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -177,6 +177,12 @@ out_kfree: goto out; } +static void in_dev_rcu_put(struct rcu_head *head) +{ + struct in_device *idev = container_of(head, struct in_device, rcu_head); + in_dev_put(idev); +} + static void inetdev_destroy(struct in_device *in_dev) { struct in_ifaddr *ifa; @@ -204,7 +210,7 @@ static void inetdev_destroy(struct in_device *in_dev) neigh_sysctl_unregister(in_dev->arp_parms); #endif neigh_parms_release(&arp_tbl, in_dev->arp_parms); - in_dev_put(in_dev); + call_rcu(&in_dev->rcu_head, in_dev_rcu_put); } int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) -- cgit v1.2.3 From 0c537a4cb3883683a747eeea17152886268a0a28 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 15 Aug 2004 05:06:03 -0700 Subject: [ATM]: Missing static in atm. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/atm/clip.c | 4 ++-- net/atm/ipcommon.h | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/net/atm/clip.c b/net/atm/clip.c index 4417df3fafa6..5de7c1fd73b5 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -47,8 +47,8 @@ #endif -struct net_device *clip_devs = NULL; -struct atm_vcc *atmarpd = NULL; +static struct net_device *clip_devs; +static struct atm_vcc *atmarpd; static struct neigh_table clip_tbl; static struct timer_list idle_timer; static int start_timer = 1; diff --git a/net/atm/ipcommon.h b/net/atm/ipcommon.h index bc1675eca081..d72165f60939 100644 --- a/net/atm/ipcommon.h +++ b/net/atm/ipcommon.h @@ -12,9 +12,6 @@ #include #include - -extern struct net_device *clip_devs; - /* * Appends all skbs from "from" to "to". The operation is atomic with respect * to all other skb operations on "from" or "to". -- cgit v1.2.3 From 550cfa2b3636462cb163b1f873b18ac0e7ab14b6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 15 Aug 2004 05:06:49 -0700 Subject: [NET]: Add missing struct net_device forward decl to skbuff.h Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 724d6841d0ae..354f1ff564bb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -89,6 +89,8 @@ #define NET_CALLER(arg) __builtin_return_address(0) #endif +struct net_device; + #ifdef CONFIG_NETFILTER struct nf_conntrack { atomic_t use; -- cgit v1.2.3 From 26b561fb8226c4a33ec74b3a6a5bbde57d50175f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Aug 2004 05:09:07 -0700 Subject: [RBTREE]: Add rb_last() Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rbtree.h | 1 + lib/rbtree.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index fb2088f0bd4a..3ae0c6e140af 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -123,6 +123,7 @@ extern void rb_erase(struct rb_node *, struct rb_root *); extern struct rb_node *rb_next(struct rb_node *); extern struct rb_node *rb_prev(struct rb_node *); extern struct rb_node *rb_first(struct rb_root *); +extern struct rb_node *rb_last(struct rb_root *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, diff --git a/lib/rbtree.c b/lib/rbtree.c index 621552c344e7..14b791ac5089 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -312,6 +312,19 @@ struct rb_node *rb_first(struct rb_root *root) } EXPORT_SYMBOL(rb_first); +struct rb_node *rb_last(struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} +EXPORT_SYMBOL(rb_last); + struct rb_node *rb_next(struct rb_node *node) { /* If we have a right-hand child, go down and then left as far -- cgit v1.2.3 From a01977ff23b35f980a80efcef22babe843f31629 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Aug 2004 05:09:51 -0700 Subject: [NET_SCHED]: Replace eligible list by rbtree in HFSC scheduler. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 109 +++++++++++++++++++-------------------------------- 1 file changed, 41 insertions(+), 68 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 84ef3ab6a843..ed005ab200af 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -133,9 +134,9 @@ struct hfsc_class struct list_head children; /* child classes */ struct Qdisc *qdisc; /* leaf qdisc */ + struct rb_node el_node; /* qdisc's eligible tree member */ struct list_head actlist; /* active children list */ struct list_head alist; /* active children list member */ - struct list_head ellist; /* eligible list member */ struct list_head hlist; /* hash list member */ struct list_head dlist; /* drop list member */ @@ -183,7 +184,7 @@ struct hfsc_sched u16 defcls; /* default class id */ struct hfsc_class root; /* root class */ struct list_head clhash[HFSC_HSIZE]; /* class hash */ - struct list_head eligible; /* eligible list */ + struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ struct sk_buff_head requeue; /* requeued packet */ @@ -219,82 +220,51 @@ do { \ /* - * eligible list holds backlogged classes being sorted by their eligible times. - * there is one eligible list per hfsc instance. + * eligible tree holds backlogged classes being sorted by their eligible times. + * there is one eligible tree per hfsc instance. */ static void -ellist_insert(struct hfsc_class *cl) +eltree_insert(struct hfsc_class *cl) { - struct list_head *head = &cl->sched->eligible; - struct hfsc_class *p; - - /* check the last entry first */ - if (list_empty(head) || - ((p = list_entry(head->prev, struct hfsc_class, ellist)) && - p->cl_e <= cl->cl_e)) { - list_add_tail(&cl->ellist, head); - return; - } + struct rb_node **p = &cl->sched->eligible.rb_node; + struct rb_node *parent = NULL; + struct hfsc_class *cl1; - list_for_each_entry(p, head, ellist) { - if (cl->cl_e < p->cl_e) { - /* insert cl before p */ - list_add_tail(&cl->ellist, &p->ellist); - return; - } + while (*p != NULL) { + parent = *p; + cl1 = rb_entry(parent, struct hfsc_class, el_node); + if (cl->cl_e >= cl1->cl_e) + p = &parent->rb_right; + else + p = &parent->rb_left; } - ASSERT(0); /* should not reach here */ + rb_link_node(&cl->el_node, parent, p); + rb_insert_color(&cl->el_node, &cl->sched->eligible); } static inline void -ellist_remove(struct hfsc_class *cl) +eltree_remove(struct hfsc_class *cl) { - list_del(&cl->ellist); + rb_erase(&cl->el_node, &cl->sched->eligible); } -static void -ellist_update(struct hfsc_class *cl) +static inline void +eltree_update(struct hfsc_class *cl) { - struct list_head *head = &cl->sched->eligible; - struct hfsc_class *p, *last; - - /* - * the eligible time of a class increases monotonically. - * if the next entry has a larger eligible time, nothing to do. - */ - if (cl->ellist.next == head || - ((p = list_entry(cl->ellist.next, struct hfsc_class, ellist)) && - cl->cl_e <= p->cl_e)) - return; - - /* check the last entry */ - last = list_entry(head->prev, struct hfsc_class, ellist); - if (last->cl_e <= cl->cl_e) { - list_move_tail(&cl->ellist, head); - return; - } - - /* - * the new position must be between the next entry - * and the last entry - */ - list_for_each_entry_continue(p, head, ellist) { - if (cl->cl_e < p->cl_e) { - list_move_tail(&cl->ellist, &p->ellist); - return; - } - } - ASSERT(0); /* should not reach here */ + eltree_remove(cl); + eltree_insert(cl); } /* find the class with the minimum deadline among the eligible classes */ static inline struct hfsc_class * -ellist_get_mindl(struct list_head *head, u64 cur_time) +eltree_get_mindl(struct hfsc_sched *q, u64 cur_time) { struct hfsc_class *p, *cl = NULL; + struct rb_node *n; - list_for_each_entry(p, head, ellist) { + for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) { + p = rb_entry(n, struct hfsc_class, el_node); if (p->cl_e > cur_time) break; if (cl == NULL || p->cl_d < cl->cl_d) @@ -305,11 +275,14 @@ ellist_get_mindl(struct list_head *head, u64 cur_time) /* find the class with minimum eligible time among the eligible classes */ static inline struct hfsc_class * -ellist_get_minel(struct list_head *head) +eltree_get_minel(struct hfsc_sched *q) { - if (list_empty(head)) + struct rb_node *n; + + n = rb_first(&q->eligible); + if (n == NULL) return NULL; - return list_entry(head->next, struct hfsc_class, ellist); + return rb_entry(n, struct hfsc_class, el_node); } /* @@ -711,7 +684,7 @@ init_ed(struct hfsc_class *cl, unsigned int next_len) cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); - ellist_insert(cl); + eltree_insert(cl); } static void @@ -720,7 +693,7 @@ update_ed(struct hfsc_class *cl, unsigned int next_len) cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); - ellist_update(cl); + eltree_update(cl); } static inline void @@ -941,7 +914,7 @@ static void set_passive(struct hfsc_class *cl) { if (cl->cl_flags & HFSC_RSC) - ellist_remove(cl); + eltree_remove(cl); list_del(&cl->dlist); @@ -1528,7 +1501,7 @@ hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time) u64 next_time = 0; long delay; - if ((cl = ellist_get_minel(&q->eligible)) != NULL) + if ((cl = eltree_get_minel(q)) != NULL) next_time = cl->cl_e; if (q->root.cl_cfmin != 0) { if (next_time == 0 || next_time > q->root.cl_cfmin) @@ -1559,7 +1532,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) q->defcls = qopt->defcls; for (i = 0; i < HFSC_HSIZE; i++) INIT_LIST_HEAD(&q->clhash[i]); - INIT_LIST_HEAD(&q->eligible); + q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); skb_queue_head_init(&q->requeue); @@ -1641,7 +1614,7 @@ hfsc_reset_qdisc(struct Qdisc *sch) hfsc_reset_class(cl); } __skb_queue_purge(&q->requeue); - INIT_LIST_HEAD(&q->eligible); + q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); del_timer(&q->wd_timer); sch->flags &= ~TCQ_F_THROTTLED; @@ -1749,7 +1722,7 @@ hfsc_dequeue(struct Qdisc *sch) * find the class with the minimum deadline among * the eligible classes. */ - if ((cl = ellist_get_mindl(&q->eligible, cur_time)) != NULL) { + if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { realtime = 1; } else { /* -- cgit v1.2.3 From 1c1393ea6515e7ca7988fa3e509fea3f560087a8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Aug 2004 05:10:33 -0700 Subject: [NET_SCHED]: Replace actlist by rbtrees in HFSC scheduler. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 182 ++++++++++++++++++++++++++------------------------- 1 file changed, 92 insertions(+), 90 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index ed005ab200af..ed7f791a9fce 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -135,8 +135,10 @@ struct hfsc_class struct Qdisc *qdisc; /* leaf qdisc */ struct rb_node el_node; /* qdisc's eligible tree member */ - struct list_head actlist; /* active children list */ - struct list_head alist; /* active children list member */ + struct rb_root vt_tree; /* active children sorted by cl_vt */ + struct rb_node vt_node; /* parent's vt_tree member */ + struct rb_root cf_tree; /* active children sorted by cl_f */ + struct rb_node cf_node; /* parent's cf_heap member */ struct list_head hlist; /* hash list member */ struct list_head dlist; /* drop list member */ @@ -286,84 +288,51 @@ eltree_get_minel(struct hfsc_sched *q) } /* - * active children list holds backlogged child classes being sorted - * by their virtual time. each intermediate class has one active - * children list. + * vttree holds holds backlogged child classes being sorted by their virtual + * time. each intermediate class has one vttree. */ static void -actlist_insert(struct hfsc_class *cl) +vttree_insert(struct hfsc_class *cl) { - struct list_head *head = &cl->cl_parent->actlist; - struct hfsc_class *p; - - /* check the last entry first */ - if (list_empty(head) || - ((p = list_entry(head->prev, struct hfsc_class, alist)) && - p->cl_vt <= cl->cl_vt)) { - list_add_tail(&cl->alist, head); - return; - } + struct rb_node **p = &cl->cl_parent->vt_tree.rb_node; + struct rb_node *parent = NULL; + struct hfsc_class *cl1; - list_for_each_entry(p, head, alist) { - if (cl->cl_vt < p->cl_vt) { - /* insert cl before p */ - list_add_tail(&cl->alist, &p->alist); - return; - } + while (*p != NULL) { + parent = *p; + cl1 = rb_entry(parent, struct hfsc_class, vt_node); + if (cl->cl_vt >= cl1->cl_vt) + p = &parent->rb_right; + else + p = &parent->rb_left; } - ASSERT(0); /* should not reach here */ + rb_link_node(&cl->vt_node, parent, p); + rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree); } static inline void -actlist_remove(struct hfsc_class *cl) +vttree_remove(struct hfsc_class *cl) { - list_del(&cl->alist); + rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree); } -static void -actlist_update(struct hfsc_class *cl) +static inline void +vttree_update(struct hfsc_class *cl) { - struct list_head *head = &cl->cl_parent->actlist; - struct hfsc_class *p, *last; - - /* - * the virtual time of a class increases monotonically. - * if the next entry has a larger virtual time, nothing to do. - */ - if (cl->alist.next == head || - ((p = list_entry(cl->alist.next, struct hfsc_class, alist)) && - cl->cl_vt <= p->cl_vt)) - return; - - /* check the last entry */ - last = list_entry(head->prev, struct hfsc_class, alist); - if (last->cl_vt <= cl->cl_vt) { - list_move_tail(&cl->alist, head); - return; - } - - /* - * the new position must be between the next entry - * and the last entry - */ - list_for_each_entry_continue(p, head, alist) { - if (cl->cl_vt < p->cl_vt) { - list_move_tail(&cl->alist, &p->alist); - return; - } - } - ASSERT(0); /* should not reach here */ + vttree_remove(cl); + vttree_insert(cl); } static inline struct hfsc_class * -actlist_firstfit(struct hfsc_class *cl, u64 cur_time) +vttree_firstfit(struct hfsc_class *cl, u64 cur_time) { struct hfsc_class *p; + struct rb_node *n; - list_for_each_entry(p, &cl->actlist, alist) { - if (p->cl_f <= cur_time) { + for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) { + p = rb_entry(n, struct hfsc_class, vt_node); + if (p->cl_f <= cur_time) return p; - } } return NULL; } @@ -372,14 +341,14 @@ actlist_firstfit(struct hfsc_class *cl, u64 cur_time) * get the leaf class with the minimum vt in the hierarchy */ static struct hfsc_class * -actlist_get_minvt(struct hfsc_class *cl, u64 cur_time) +vttree_get_minvt(struct hfsc_class *cl, u64 cur_time) { /* if root-class's cfmin is bigger than cur_time nothing to do */ if (cl->cl_cfmin > cur_time) return NULL; while (cl->level > 0) { - cl = actlist_firstfit(cl, cur_time); + cl = vttree_firstfit(cl, cur_time); if (cl == NULL) return NULL; /* @@ -391,6 +360,38 @@ actlist_get_minvt(struct hfsc_class *cl, u64 cur_time) return cl; } +static void +cftree_insert(struct hfsc_class *cl) +{ + struct rb_node **p = &cl->cl_parent->cf_tree.rb_node; + struct rb_node *parent = NULL; + struct hfsc_class *cl1; + + while (*p != NULL) { + parent = *p; + cl1 = rb_entry(parent, struct hfsc_class, cf_node); + if (cl->cl_f >= cl1->cl_f) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + rb_link_node(&cl->cf_node, parent, p); + rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree); +} + +static inline void +cftree_remove(struct hfsc_class *cl) +{ + rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree); +} + +static inline void +cftree_update(struct hfsc_class *cl) +{ + cftree_remove(cl); + cftree_insert(cl); +} + /* * service curve support functions * @@ -702,32 +703,25 @@ update_d(struct hfsc_class *cl, unsigned int next_len) cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); } -static void +static inline void update_cfmin(struct hfsc_class *cl) { + struct rb_node *n = rb_first(&cl->cf_tree); struct hfsc_class *p; - u64 cfmin; - if (list_empty(&cl->actlist)) { + if (n == NULL) { cl->cl_cfmin = 0; return; } - cfmin = HT_INFINITY; - list_for_each_entry(p, &cl->actlist, alist) { - if (p->cl_f == 0) { - cl->cl_cfmin = 0; - return; - } - if (p->cl_f < cfmin) - cfmin = p->cl_f; - } - cl->cl_cfmin = cfmin; + p = rb_entry(n, struct hfsc_class, cf_node); + cl->cl_cfmin = p->cl_f; } static void init_vf(struct hfsc_class *cl, unsigned int len) { struct hfsc_class *max_cl, *p; + struct rb_node *n; u64 vt, f, cur_time; int go_active; @@ -740,9 +734,9 @@ init_vf(struct hfsc_class *cl, unsigned int len) go_active = 0; if (go_active) { - if (!list_empty(&cl->cl_parent->actlist)) { - max_cl = list_entry(cl->cl_parent->actlist.prev, - struct hfsc_class, alist); + n = rb_last(&cl->cl_parent->vt_tree); + if (n != NULL) { + max_cl = rb_entry(n, struct hfsc_class,vt_node); /* * set vt to the average of the min and max * classes. if the parent's period didn't @@ -787,7 +781,8 @@ init_vf(struct hfsc_class *cl, unsigned int len) cl->cl_parentperiod++; cl->cl_f = 0; - actlist_insert(cl); + vttree_insert(cl); + cftree_insert(cl); if (cl->cl_flags & HFSC_USC) { /* class has upper limit curve */ @@ -807,6 +802,7 @@ init_vf(struct hfsc_class *cl, unsigned int len) f = max(cl->cl_myf, cl->cl_cfmin); if (f != cl->cl_f) { cl->cl_f = f; + cftree_update(cl); update_cfmin(cl->cl_parent); } } @@ -839,9 +835,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) if (cl->cl_vt > cl->cl_parent->cl_cvtmax) cl->cl_parent->cl_cvtmax = cl->cl_vt; - /* remove this class from the vt list */ - actlist_remove(cl); + /* remove this class from the vt tree */ + vttree_remove(cl); + cftree_remove(cl); update_cfmin(cl->cl_parent); continue; @@ -863,8 +860,8 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) cl->cl_vt = cl->cl_parent->cl_cvtmin; } - /* update the vt list */ - actlist_update(cl); + /* update the vt tree */ + vttree_update(cl); if (cl->cl_flags & HFSC_USC) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, @@ -894,6 +891,7 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) f = max(cl->cl_myf, cl->cl_cfmin); if (f != cl->cl_f) { cl->cl_f = f; + cftree_update(cl); update_cfmin(cl->cl_parent); } } @@ -919,8 +917,8 @@ set_passive(struct hfsc_class *cl) list_del(&cl->dlist); /* - * actlist is now handled in update_vf() so that update_vf(cl, 0, 0) - * needs to be called explicitly to remove a class from actlist + * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) + * needs to be called explicitly to remove a class from vttree. */ } @@ -1144,7 +1142,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->qdisc = &noop_qdisc; cl->stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&cl->children); - INIT_LIST_HEAD(&cl->actlist); + cl->vt_tree = RB_ROOT; + cl->cf_tree = RB_ROOT; sch_tree_lock(sch); list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]); @@ -1544,7 +1543,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) q->root.qdisc = &noop_qdisc; q->root.stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&q->root.children); - INIT_LIST_HEAD(&q->root.actlist); + q->root.vt_tree = RB_ROOT; + q->root.cf_tree = RB_ROOT; list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]); @@ -1591,7 +1591,9 @@ hfsc_reset_class(struct hfsc_class *cl) cl->cl_myfadj = 0; cl->cl_cfmin = 0; cl->cl_nactive = 0; - INIT_LIST_HEAD(&cl->actlist); + + cl->vt_tree = RB_ROOT; + cl->cf_tree = RB_ROOT; qdisc_reset(cl->qdisc); if (cl->cl_flags & HFSC_RSC) @@ -1729,7 +1731,7 @@ hfsc_dequeue(struct Qdisc *sch) * use link-sharing criteria * get the class with the minimum vt in the hierarchy */ - cl = actlist_get_minvt(&q->root, cur_time); + cl = vttree_get_minvt(&q->root, cur_time); if (cl == NULL) { sch->stats.overlimits++; hfsc_schedule_watchdog(sch, cur_time); -- cgit v1.2.3 From 99296150c728f250a5304a591bb08e41a88db9f4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Aug 2004 05:11:12 -0700 Subject: [NET_SCHED]: O(1) children vtoff adjustment in HFSC scheduler Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index ed7f791a9fce..cc561e181892 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -164,6 +164,9 @@ struct hfsc_class adjustment */ u64 cl_vtoff; /* inter-period cumulative vt offset */ u64 cl_cvtmax; /* max child's vt in the last period */ + u64 cl_cvtoff; /* cumulative cvtmax of all periods */ + u64 cl_pcvtoff; /* parent's cvtoff at initalization + time */ struct internal_sc cl_rsc; /* internal real-time service curve */ struct internal_sc cl_fsc; /* internal fair service curve */ @@ -720,7 +723,7 @@ update_cfmin(struct hfsc_class *cl) static void init_vf(struct hfsc_class *cl, unsigned int len) { - struct hfsc_class *max_cl, *p; + struct hfsc_class *max_cl; struct rb_node *n; u64 vt, f, cur_time; int go_active; @@ -752,19 +755,20 @@ init_vf(struct hfsc_class *cl, unsigned int len) } else { /* * first child for a new parent backlog period. - * add parent's cvtmax to vtoff of children - * to make a new vt (vtoff + vt) larger than - * the vt in the last period for all children. + * add parent's cvtmax to cvtoff to make a new + * vt (vtoff + vt) larger than the vt in the + * last period for all children. */ vt = cl->cl_parent->cl_cvtmax; - list_for_each_entry(p, &cl->cl_parent->children, - siblings) - p->cl_vtoff += vt; - cl->cl_vt = 0; + cl->cl_parent->cl_cvtoff += vt; cl->cl_parent->cl_cvtmax = 0; cl->cl_parent->cl_cvtmin = 0; + cl->cl_vt = 0; } + cl->cl_vtoff = cl->cl_parent->cl_cvtoff - + cl->cl_pcvtoff; + /* update the virtual curve */ vt = cl->cl_vt + cl->cl_vtoff; rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt, @@ -1151,6 +1155,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (parent->level == 0) hfsc_purge_queue(sch, parent); hfsc_adjust_levels(parent); + cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); #ifdef CONFIG_NET_ESTIMATOR @@ -1584,6 +1589,8 @@ hfsc_reset_class(struct hfsc_class *cl) cl->cl_vtoff = 0; cl->cl_cvtmin = 0; cl->cl_cvtmax = 0; + cl->cl_cvtoff = 0; + cl->cl_pcvtoff = 0; cl->cl_vtperiod = 0; cl->cl_parentperiod = 0; cl->cl_f = 0; -- cgit v1.2.3 From 1c8641274f7c01434822dd4d79169cdc3170e045 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 05:30:51 -0700 Subject: [IPV6]: Add missing XFRM select in Kconfig. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 23c5759c022d..53f8e348d816 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -59,6 +59,7 @@ config INET6_IPCOMP config IPV6_TUNNEL tristate "IPv6: IPv6-in-IPv6 tunnel" depends on IPV6 + select XFRM ---help--- Support for IPv6-in-IPv6 tunnels described in RFC 2473. -- cgit v1.2.3 From 40964cc0e22a763b262eed365b86f7192c3612fe Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Aug 2004 05:33:16 -0700 Subject: [PKT_SCHED]: cacheline-align qdisc data in qdisc_create() Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1f9bf9d0834c..19ff1b9e3e01 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -389,7 +389,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) { int err; struct rtattr *kind = tca[TCA_KIND-1]; - struct Qdisc *sch = NULL; + void *p = NULL; + struct Qdisc *sch; struct Qdisc_ops *ops; int size; @@ -407,12 +408,18 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) if (ops == NULL) goto err_out; - size = sizeof(*sch) + ops->priv_size; + /* ensure that the Qdisc and the private data are 32-byte aligned */ + size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); + size += ops->priv_size + QDISC_ALIGN_CONST; - sch = kmalloc(size, GFP_KERNEL); + p = kmalloc(size, GFP_KERNEL); err = -ENOBUFS; - if (!sch) + if (!p) goto err_out; + memset(p, 0, size); + sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) + & ~QDISC_ALIGN_CONST); + sch->padded = (char *)sch - (char *)p; /* Grrr... Resolve race condition with module unload */ @@ -420,8 +427,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) if (ops != qdisc_lookup_ops(kind)) goto err_out; - memset(sch, 0, size); - INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); @@ -470,8 +475,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) err_out: *errp = err; - if (sch) - kfree(sch); + if (p) + kfree(p); return NULL; } -- cgit v1.2.3 From 763e4a8e50610b61f1efc8d584e6a13092c458b4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 05:38:31 -0700 Subject: [XFRM_USER]: Fill in x->props algo fields. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index eccc0231faeb..2b42dc25571c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -164,15 +164,24 @@ out: return err; } -static int attach_one_algo(struct xfrm_algo **algpp, struct rtattr *u_arg) +static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, + struct xfrm_algo_desc *(*get_byname)(char *), + struct rtattr *u_arg) { struct rtattr *rta = u_arg; struct xfrm_algo *p, *ualg; + struct xfrm_algo_desc *algo; if (!rta) return 0; ualg = RTA_DATA(rta); + + algo = get_byname(ualg->alg_name); + if (!algo) + return -ENOSYS; + *props = algo->desc.sadb_alg_id; + p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL); if (!p) return -ENOMEM; @@ -225,11 +234,17 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, copy_from_user_state(x, p); - if ((err = attach_one_algo(&x->aalg, xfrma[XFRMA_ALG_AUTH-1]))) + if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, + xfrm_aalg_get_byname, + xfrma[XFRMA_ALG_AUTH-1]))) goto error; - if ((err = attach_one_algo(&x->ealg, xfrma[XFRMA_ALG_CRYPT-1]))) + if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, + xfrm_ealg_get_byname, + xfrma[XFRMA_ALG_CRYPT-1]))) goto error; - if ((err = attach_one_algo(&x->calg, xfrma[XFRMA_ALG_COMP-1]))) + if ((err = attach_one_algo(&x->calg, &x->props.calgo, + xfrm_calg_get_byname, + xfrma[XFRMA_ALG_COMP-1]))) goto error; if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) goto error; -- cgit v1.2.3 From 6567299ad27fb7c0b21d23ccd732b14304184578 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 05:41:47 -0700 Subject: [IPV6]: Fix aalg check in esp. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 497727195c98..c80152a70efc 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -302,8 +302,9 @@ int esp6_init_state(struct xfrm_state *x, void *args) { struct esp_data *esp = NULL; + /* null auth and encryption can have zero length keys */ if (x->aalg) { - if (x->aalg->alg_key_len == 0 || x->aalg->alg_key_len > 512) + if (x->aalg->alg_key_len > 512) goto error; } if (x->ealg == NULL) -- cgit v1.2.3 From 3ded0baffb1ec366917d8d1434711897d5a41294 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 05:43:06 -0700 Subject: [IPSEC]: Move encap check back down to esp4.c In a previous, I moved the encap_type checks in esp4.c from the packet processing path to xfrm_user/af_key. This isn't ideal since those encap types only make sense for esp4. The following patch moves it back into esp4.c. The difference is that it's now done in init_state so that it's only done once rather than per-packet. I've also added encap_type checks for every transform. This means that people attaching encap objects to AH/IPCOMP/IPIP will now get errors. That should be fine as no major KM does this. Please note that the error returned is now EINVAL instead of ENOPROTOOPT. This shouldn't break anything since KMs only test the errno from setsockopt() for NAT-T support rather than add_sa where it would be too late anyway. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 3 +++ net/ipv4/esp4.c | 13 ++++--------- net/ipv4/ipcomp.c | 3 +++ net/ipv4/xfrm4_tunnel.c | 4 ++++ net/ipv6/ah6.c | 3 +++ net/ipv6/esp6.c | 3 +++ net/ipv6/ipcomp6.c | 3 +++ net/ipv6/xfrm6_tunnel.c | 3 +++ net/key/af_key.c | 9 --------- net/xfrm/xfrm_user.c | 9 --------- 10 files changed, 26 insertions(+), 27 deletions(-) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 9784f0376980..b345043f51dd 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -214,6 +214,9 @@ static int ah_init_state(struct xfrm_state *x, void *args) if (x->aalg->alg_key_len > 512) goto error; + if (x->encap) + goto error; + ahp = kmalloc(sizeof(*ahp), GFP_KERNEL); if (ahp == NULL) return -ENOMEM; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 07a594b831d2..c8cd0c7de3ea 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -436,6 +436,7 @@ int esp_init_state(struct xfrm_state *x, void *args) switch (encap->encap_type) { default: + goto error; case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); break; @@ -449,15 +450,9 @@ int esp_init_state(struct xfrm_state *x, void *args) return 0; error: - if (esp) { - if (esp->auth.tfm) - crypto_free_tfm(esp->auth.tfm); - if (esp->auth.work_icv) - kfree(esp->auth.work_icv); - if (esp->conf.tfm) - crypto_free_tfm(esp->conf.tfm); - kfree(esp); - } + x->data = esp; + esp_destroy(x); + x->data = NULL; return -EINVAL; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 7ce7469a3c04..b3885885abf0 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -288,6 +288,9 @@ static int ipcomp_init_state(struct xfrm_state *x, void *args) if (!x->calg) goto out; + if (x->encap) + goto out; + err = -ENOMEM; ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL); if (!ipcd) diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 0d1a0b0c7901..9f04c5706aac 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -84,6 +84,10 @@ static int ipip_init_state(struct xfrm_state *x, void *args) { if (!x->props.mode) return -EINVAL; + + if (x->encap) + return -EINVAL; + x->props.header_len = sizeof(struct iphdr); return 0; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index eda2737e572b..28bac499f839 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -353,6 +353,9 @@ static int ah6_init_state(struct xfrm_state *x, void *args) if (x->aalg->alg_key_len > 512) goto error; + if (x->encap) + goto error; + ahp = kmalloc(sizeof(*ahp), GFP_KERNEL); if (ahp == NULL) return -ENOMEM; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c80152a70efc..eb94426df27e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -310,6 +310,9 @@ int esp6_init_state(struct xfrm_state *x, void *args) if (x->ealg == NULL) goto error; + if (x->encap) + goto error; + esp = kmalloc(sizeof(*esp), GFP_KERNEL); if (esp == NULL) return -ENOMEM; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 04303769d36b..ee62dba1b3ca 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -284,6 +284,9 @@ static int ipcomp6_init_state(struct xfrm_state *x, void *args) if (!x->calg) goto out; + if (x->encap) + goto out; + err = -ENOMEM; ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL); if (!ipcd) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5766a133411a..fb5b34a0d4c4 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -517,6 +517,9 @@ static int xfrm6_tunnel_init_state(struct xfrm_state *x, void *args) if (!x->props.mode) return -EINVAL; + if (x->encap) + return -EINVAL; + x->props.header_len = sizeof(struct ipv6hdr); return 0; diff --git a/net/key/af_key.c b/net/key/af_key.c index fdf75a1ba801..8ca25fd7efe7 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1075,15 +1075,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; natt->encap_type = n_type->sadb_x_nat_t_type_type; - switch (natt->encap_type) { - case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: - break; - default: - err = -ENOPROTOOPT; - goto out; - } - if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { struct sadb_x_nat_t_port* n_port = ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2b42dc25571c..be298cde3022 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -78,15 +78,6 @@ static int verify_encap_tmpl(struct rtattr **xfrma) if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap)) return -EINVAL; - encap = RTA_DATA(rt); - switch (encap->encap_type) { - case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: - break; - default: - return -ENOPROTOOPT; - } - return 0; } -- cgit v1.2.3 From 2b12caa0902157460906650afc9b4f6d8ed2d368 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 05:47:19 -0700 Subject: [IRDA]: Trivial optimization in inetdev handling. No need to hold onto the idev. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/irda/irlan/irlan_eth.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 272bb36c9ce1..04bb8925ac04 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -303,10 +303,10 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev) */ #ifdef CONFIG_INET IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n"); - in_dev = in_dev_get(dev); - if (in_dev == NULL) - return; rcu_read_lock(); + in_dev = __in_dev_get(dev); + if (in_dev == NULL) + goto out; if (in_dev->ifa_list) arp_send(ARPOP_REQUEST, ETH_P_ARP, @@ -314,8 +314,8 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev) dev, in_dev->ifa_list->ifa_address, NULL, dev->dev_addr, NULL); +out: rcu_read_unlock(); - in_dev_put(in_dev); #endif /* CONFIG_INET */ } -- cgit v1.2.3 From 91dfe8ef6d9065630dd0aeb64cd1bdd4990212f9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 05:53:09 -0700 Subject: [IPV4]: inetdev ifa_list handling fixes outside of net/ipv4. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/wan/syncppp.c | 8 ++++---- drivers/net/wireless/strip.c | 23 ++++++++++++++--------- net/core/netpoll.c | 7 +++++-- net/core/pktgen.c | 8 ++++++-- net/econet/af_econet.c | 10 ++++++---- 5 files changed, 35 insertions(+), 21 deletions(-) diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index f7442d52dabe..2329c23af83e 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -50,6 +50,7 @@ #include #include #include +#include #include @@ -767,9 +768,9 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb) struct in_ifaddr *ifa; u32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */ #ifdef CONFIG_INET - if ((in_dev=in_dev_get(dev)) != NULL) + rcu_read_lock(); + if ((in_dev = __in_dev_get(dev)) != NULL) { - read_lock(&in_dev->lock); for (ifa=in_dev->ifa_list; ifa != NULL; ifa=ifa->ifa_next) { if (strcmp(dev->name, ifa->ifa_label) == 0) @@ -779,9 +780,8 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb) break; } } - read_unlock(&in_dev->lock); - in_dev_put(in_dev); } + rcu_read_unlock(); #endif /* I hope both addr and mask are in the net order */ sppp_cisco_send (sp, CISCO_ADDR_REPLY, addr, mask); diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 98cee21f7d84..c9331f589645 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -106,6 +106,7 @@ static const char StripVersion[] = "1.3A-STUART.CHESHIRE"; #include #include #include +#include #include #include @@ -1348,14 +1349,17 @@ static unsigned char *strip_make_packet(unsigned char *buffer, */ if (haddr.c[0] == 0xFF) { u32 brd = 0; - struct in_device *in_dev = in_dev_get(strip_info->dev); - if (in_dev == NULL) + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get(strip_info->dev); + if (in_dev == NULL) { + rcu_read_unlock(); return NULL; - read_lock(&in_dev->lock); + } if (in_dev->ifa_list) brd = in_dev->ifa_list->ifa_broadcast; - read_unlock(&in_dev->lock); - in_dev_put(in_dev); + rcu_read_unlock(); /* arp_query returns 1 if it succeeds in looking up the address, 0 if it fails */ if (!arp_query(haddr.c, brd, strip_info->dev)) { @@ -1500,17 +1504,18 @@ static void strip_send(struct strip *strip_info, struct sk_buff *skb) } if (1) { - struct in_device *in_dev = in_dev_get(strip_info->dev); + struct in_device *in_dev; + brd = addr = 0; + rcu_read_lock(); + in_dev = __in_dev_get(strip_info->dev); if (in_dev) { - read_lock(&in_dev->lock); if (in_dev->ifa_list) { brd = in_dev->ifa_list->ifa_broadcast; addr = in_dev->ifa_list->ifa_local; } - read_unlock(&in_dev->lock); - in_dev_put(in_dev); } + rcu_read_unlock(); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index daac168875a3..713e773eb4af 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -572,16 +573,18 @@ int netpoll_setup(struct netpoll *np) memcpy(np->local_mac, ndev->dev_addr, 6); if (!np->local_ip) { - in_dev = in_dev_get(ndev); + rcu_read_lock(); + in_dev = __in_dev_get(ndev); if (!in_dev) { + rcu_read_unlock(); printk(KERN_ERR "%s: no IP address for %s, aborting\n", np->name, np->dev_name); goto release; } np->local_ip = ntohl(in_dev->ifa_list->ifa_local); - in_dev_put(in_dev); + rcu_read_unlock(); printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", np->name, HIPQUAD(np->local_ip)); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d1a62cddb31a..d3ba2c75e530 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -263,14 +264,17 @@ static struct net_device *setup_inject(struct pktgen_info* info) info->saddr_min = 0; info->saddr_max = 0; if (strlen(info->src_min) == 0) { - struct in_device *in_dev = in_dev_get(odev); + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get(odev); if (in_dev) { if (in_dev->ifa_list) { info->saddr_min = in_dev->ifa_list->ifa_address; info->saddr_max = info->saddr_min; } - in_dev_put(in_dev); } + rcu_read_unlock(); } else { info->saddr_min = in_aton(info->src_min); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 07b4cff2f44d..fc31ae1209d1 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -401,16 +402,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, y.x maps to IP a.b.c.x. This should be replaced with something more flexible and more aware of subnet masks. */ { - struct in_device *idev = in_dev_get(dev); + struct in_device *idev; unsigned long network = 0; + + rcu_read_lock(); + idev = __in_dev_get(dev); if (idev) { - read_lock(&idev->lock); if (idev->ifa_list) network = ntohl(idev->ifa_list->ifa_address) & 0xffffff00; /* !!! */ - read_unlock(&idev->lock); - in_dev_put(idev); } + rcu_read_unlock(); udpdest.sin_addr.s_addr = htonl(network | addr.station); } -- cgit v1.2.3 From c96f60e4894f08787b6f86bef6324c3f3879d6b5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 05:53:47 -0700 Subject: [IPV4]: inetdev ifa_list handling fixes for s390 drivers Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/s390/net/lcs.c | 4 ++-- drivers/s390/net/qeth_main.c | 17 ++++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index cac57951ae28..e4b7bbbeff17 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1002,7 +1002,7 @@ lcs_register_mc_addresses(void *data) in4_dev = in_dev_get(card->dev); if (in4_dev == NULL) return 0; - read_lock(&in4_dev->lock); + read_lock(&in4_dev->mc_list_lock); spin_lock(&card->ipm_lock); /* Check for multicast addresses to be removed. */ list_for_each(l, &card->ipm_list) { @@ -1046,7 +1046,7 @@ lcs_register_mc_addresses(void *data) list_add(&ipm->list, &card->ipm_list); } spin_unlock(&card->ipm_lock); - read_unlock(&in4_dev->lock); + read_unlock(&in4_dev->mc_list_lock); in_dev_put(in4_dev); lcs_fix_multicast_list(card); return 0; diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 17f0f67d8e55..378906eba48f 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -73,6 +73,7 @@ qeth_eyecatcher(void) #include #include #include +#include #include "qeth.h" #include "qeth_mpc.h" @@ -4733,9 +4734,10 @@ qeth_free_vlan_addresses4(struct qeth_card *card, unsigned short vid) QETH_DBF_TEXT(trace, 4, "frvaddr4"); if (!card->vlangrp) return; - in_dev = in_dev_get(card->vlangrp->vlan_devices[vid]); + rcu_read_lock(); + in_dev = __in_dev_get(card->vlangrp->vlan_devices[vid]); if (!in_dev) - return; + goto out; for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next){ addr = qeth_get_addr_buffer(QETH_PROT_IPV4); if (addr){ @@ -4746,7 +4748,8 @@ qeth_free_vlan_addresses4(struct qeth_card *card, unsigned short vid) kfree(addr); } } - in_dev_put(in_dev); +out: + rcu_read_unlock(); } static void @@ -4918,9 +4921,9 @@ qeth_add_vlan_mc(struct qeth_card *card) in_dev = in_dev_get(vg->vlan_devices[i]); if (!in_dev) continue; - read_lock(&in_dev->lock); + read_lock(&in_dev->mc_list_lock); qeth_add_mc(card,in_dev); - read_unlock(&in_dev->lock); + read_unlock(&in_dev->mc_list_lock); in_dev_put(in_dev); } #endif @@ -4935,10 +4938,10 @@ qeth_add_multicast_ipv4(struct qeth_card *card) in4_dev = in_dev_get(card->dev); if (in4_dev == NULL) return; - read_lock(&in4_dev->lock); + read_lock(&in4_dev->mc_list_lock); qeth_add_mc(card, in4_dev); qeth_add_vlan_mc(card); - read_unlock(&in4_dev->lock); + read_unlock(&in4_dev->mc_list_lock); in_dev_put(in4_dev); } -- cgit v1.2.3 From 024338c3b0750b333519a102ffe22b469f1744f4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 15 Aug 2004 09:08:21 -0700 Subject: [IPV4]: Kill inetdev_lock, no longer needed. It no longer protects anything, all users held RTNL semaphore to boot. Also, fix a potential race in the new RCU inetdev code, grab the reference on the idev before attaching it via dev->ip_ptr. Based upon discussions with Herbert Xu. Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 70c54ca0c5d9..ff71688af5c6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -90,8 +90,6 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p); /* Locks all the inet devices. */ -static spinlock_t inetdev_lock = SPIN_LOCK_UNLOCKED; - static struct in_ifaddr *inet_alloc_ifa(void) { struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL); @@ -158,11 +156,12 @@ struct in_device *inetdev_init(struct net_device *dev) neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL); #endif - spin_lock_bh(&inetdev_lock); - dev->ip_ptr = in_dev; + /* Account for reference dev->ip_ptr */ in_dev_hold(in_dev); - spin_unlock_bh(&inetdev_lock); + smp_wmb(); + dev->ip_ptr = in_dev; + #ifdef CONFIG_SYSCTL devinet_sysctl_register(in_dev, &in_dev->cnf); #endif @@ -201,10 +200,8 @@ static void inetdev_destroy(struct in_device *in_dev) #ifdef CONFIG_SYSCTL devinet_sysctl_unregister(&in_dev->cnf); #endif - spin_lock_bh(&inetdev_lock); + in_dev->dev->ip_ptr = NULL; - /* in_dev_put following below will kill the in_device */ - spin_unlock_bh(&inetdev_lock); #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(in_dev->arp_parms); @@ -248,9 +245,8 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, ifap1 = &ifa->ifa_next; continue; } - spin_lock_bh(&inetdev_lock); + *ifap1 = ifa->ifa_next; - spin_unlock_bh(&inetdev_lock); rtmsg_ifa(RTM_DELADDR, ifa); notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); @@ -260,9 +256,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, /* 2. Unlink it */ - spin_lock_bh(&inetdev_lock); *ifap = ifa1->ifa_next; - spin_unlock_bh(&inetdev_lock); /* 3. Announce address deletion */ @@ -324,9 +318,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) } ifa->ifa_next = *ifap; - spin_lock_bh(&inetdev_lock); *ifap = ifa; - spin_unlock_bh(&inetdev_lock); /* Send message first, then call notifier. Notifier will trigger FIB update, so that -- cgit v1.2.3 From 86e3cadd45571fb2d40b798b0c9f47d988b6341d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 09:11:17 -0700 Subject: [IPV4]: Make inet_select_addr() logic clearer. It is harder to see, originally, how the __in_dev_get() == NULL case is handled. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ff71688af5c6..19822b13834f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -773,7 +773,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) rcu_read_lock(); in_dev = __in_dev_get(dev); if (!in_dev) - goto out_unlock_inetdev; + goto no_in_dev; for_primary_ifa(in_dev) { if (ifa->ifa_scope > scope) @@ -785,6 +785,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) if (!addr) addr = ifa->ifa_local; } endfor_ifa(in_dev); +no_in_dev: rcu_read_unlock(); if (addr) @@ -810,7 +811,6 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) } out_unlock_both: read_unlock(&dev_base_lock); -out_unlock_inetdev: rcu_read_unlock(); out: return addr; -- cgit v1.2.3 From 4ce99e97a0dfe8d1f45c115d681e0fa22c333655 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Aug 2004 09:12:16 -0700 Subject: [IPV4]: Simplify ifa free handling code. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 19822b13834f..fc9930460864 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -102,18 +102,17 @@ static struct in_ifaddr *inet_alloc_ifa(void) return ifa; } -static inline void inet_free_ifa(struct in_ifaddr *ifa) +static void inet_rcu_free_ifa(struct rcu_head *head) { + struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); if (ifa->ifa_dev) in_dev_put(ifa->ifa_dev); kfree(ifa); } -static void inet_rcu_free_ifa(struct rcu_head *head) +static inline void inet_free_ifa(struct in_ifaddr *ifa) { - struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); - - inet_free_ifa(ifa); + call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); } void in_dev_finish_destroy(struct in_device *idev) @@ -194,7 +193,7 @@ static void inetdev_destroy(struct in_device *in_dev) while ((ifa = in_dev->ifa_list) != NULL) { inet_del_ifa(in_dev, &in_dev->ifa_list, 0); - call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); + inet_free_ifa(ifa); } #ifdef CONFIG_SYSCTL @@ -250,7 +249,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, rtmsg_ifa(RTM_DELADDR, ifa); notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); - call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); + inet_free_ifa(ifa); } } @@ -271,7 +270,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, rtmsg_ifa(RTM_DELADDR, ifa1); notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (destroy) { - call_rcu(&ifa1->rcu_head, inet_rcu_free_ifa); + inet_free_ifa(ifa1); if (!in_dev->ifa_list) inetdev_destroy(in_dev); @@ -286,7 +285,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) ASSERT_RTNL(); if (!ifa->ifa_local) { - call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); + inet_free_ifa(ifa); return 0; } @@ -301,11 +300,11 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) { if (ifa1->ifa_local == ifa->ifa_local) { - call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); + inet_free_ifa(ifa); return -EEXIST; } if (ifa1->ifa_scope != ifa->ifa_scope) { - call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); + inet_free_ifa(ifa); return -EINVAL; } ifa->ifa_flags |= IFA_F_SECONDARY; @@ -338,7 +337,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) if (!in_dev) { in_dev = inetdev_init(dev); if (!in_dev) { - call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); + inet_free_ifa(ifa); return -ENOBUFS; } } -- cgit v1.2.3 From b9b78dbe1fc9ffd0536323941f3d24fced91b7ab Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 15 Aug 2004 09:16:25 -0700 Subject: [NET]: Enhanced version of net_random(). Here is another alternative, using tansworthe generator. It uses percpu state. The one small semantic change is the net_srandom() only affects the current cpu's seed. The problem was that having it change all cpu's seed would mean adding locking and the only user's today are a couple of places that feed in mac address to try make sure address resolution to collide. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/net.h | 1 + net/core/dev.c | 2 + net/core/utils.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 102 insertions(+), 5 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index cec1482f28e2..80e7fec727e3 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -169,6 +169,7 @@ extern struct socket *sockfd_lookup(int fd, int *err); extern int net_ratelimit(void); extern unsigned long net_random(void); extern void net_srandom(unsigned long); +extern void net_random_init(void); extern int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); diff --git a/net/core/dev.c b/net/core/dev.c index 547469d2ac6f..293123d8fab4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3280,6 +3280,8 @@ static int __init net_dev_init(void) BUG_ON(!dev_boot_phase); + net_random_init(); + if (dev_proc_init()) goto out; diff --git a/net/core/utils.c b/net/core/utils.c index 8058d9c5e236..6093174581fd 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -19,22 +19,116 @@ #include #include #include +#include +#include #include #include -static unsigned long net_rand_seed = 152L; + +/* + This is a maximally equidistributed combined Tausworthe generator + based on code from GNU Scientific Library 1.5 (30 Jun 2004) + + x_n = (s1_n ^ s2_n ^ s3_n) + + s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) + s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) + s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) + + The period of this generator is about 2^88. + + From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + Generators", Mathematics of Computation, 65, 213 (1996), 203--213. + + This is available on the net from L'Ecuyer's home page, + + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + + There is an erratum in the paper "Tables of Maximally + Equidistributed Combined LFSR Generators", Mathematics of + Computation, 68, 225 (1999), 261--269: + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + + ... the k_j most significant bits of z_j must be non- + zero, for each j. (Note: this restriction also applies to the + computer code given in [4], but was mistakenly not mentioned in + that paper.) + + This affects the seeding procedure by imposing the requirement + s1 > 1, s2 > 7, s3 > 15. + +*/ +struct nrnd_state { + u32 s1, s2, s3; +}; + +static DEFINE_PER_CPU(struct nrnd_state, net_rand_state); + +static u32 __net_random(struct nrnd_state *state) +{ +#define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) + + state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); + state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); + state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + + return (state->s1 ^ state->s2 ^ state->s3); +} + +static void __net_srandom(struct nrnd_state *state, unsigned long entropy) +{ + u32 s = state->s1 ^ entropy; + + if (s == 0) + s = 1; /* default seed is 1 */ + +#define LCG(n) (69069 * n) + state->s1 = LCG(s); + state->s2 = LCG(state->s1); + state->s3 = LCG(state->s2); + + /* "warm it up" */ + __net_random(state); + __net_random(state); + __net_random(state); + __net_random(state); + __net_random(state); + __net_random(state); +} + unsigned long net_random(void) { - net_rand_seed=net_rand_seed*69069L+1; - return net_rand_seed^jiffies; + unsigned long r; + struct nrnd_state *state = &get_cpu_var(net_rand_state); + r = __net_random(state); + put_cpu_var(state); + return r; } + void net_srandom(unsigned long entropy) { - net_rand_seed ^= entropy; - net_random(); + struct nrnd_state *state = &get_cpu_var(net_rand_state); + __net_srandom(state, entropy); + put_cpu_var(state); +} + +void __init net_random_init(void) +{ + int i; + unsigned long seed[NR_CPUS]; + + get_random_bytes(seed, sizeof(seed)); + + for (i = 0; i < NR_CPUS; i++) { + struct nrnd_state *state = &per_cpu(net_rand_state,i); + + memset(state, 0, sizeof(*state)); + __net_srandom(state, seed[i]); + } } int net_msg_cost = 5*HZ; -- cgit v1.2.3 From 81af78978136bd54faff7f4b352e9495ed159271 Mon Sep 17 00:00:00 2001 From: Cal Peake Date: Mon, 16 Aug 2004 00:09:34 -0700 Subject: [IPV4]: Delete bogus newline in first TcpExt procsfs line. Signed-off-by: Cal Peake Signed-off-by: David S. Miller --- net/ipv4/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index fca126a09670..912bbcc7f415 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -330,7 +330,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) { int i; - seq_puts(seq, "\nTcpExt:"); + seq_puts(seq, "TcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_net_list[i].name); -- cgit v1.2.3 From ff1880aa5df94ea965cb55688a00a88f091718c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Aug 2004 00:11:08 -0700 Subject: [NET]: Missing header includes and forward declarations. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + include/net/ip6_fib.h | 1 + include/net/ip6_route.h | 2 ++ include/net/neighbour.h | 3 +++ 4 files changed, 7 insertions(+) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 670558170bbd..ee61b0f31174 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -99,6 +99,7 @@ enum { #ifdef __KERNEL__ #include +#include struct netlink_skb_parms { diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 14d41c4baa99..319904518194 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -20,6 +20,7 @@ #include #include #include +#include struct rt6_info; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 75d503dc1b5e..f5229c50d35f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -11,8 +11,10 @@ #include #include +#include #include #include +#include struct pol_chain { int type; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 464203b2abac..2f1c3783f7ba 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -45,6 +45,7 @@ #include #include +#include #include #include @@ -53,6 +54,8 @@ #define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY) #define NUD_CONNECTED (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE) +struct neighbour; + struct neigh_parms { struct neigh_parms *next; -- cgit v1.2.3 From da5a87944029baf452d137eb38e6f2ebf2de088a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 16 Aug 2004 03:03:08 -0700 Subject: [PKT_SCHED]: Resolve race condition with module unload in qdisc_create() This patch resolves the race condition with module unload in qdisc_create by moving try_module_get up to the first qdisc_lookup_ops call. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 19ff1b9e3e01..21cdd83d0f9d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -407,6 +407,9 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) err = -EINVAL; if (ops == NULL) goto err_out; + err = -EBUSY; + if (!try_module_get(ops->owner)) + goto err_out; /* ensure that the Qdisc and the private data are 32-byte aligned */ size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); @@ -415,18 +418,12 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) p = kmalloc(size, GFP_KERNEL); err = -ENOBUFS; if (!p) - goto err_out; + goto err_out2; memset(p, 0, size); sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); sch->padded = (char *)sch - (char *)p; - /* Grrr... Resolve race condition with module unload */ - - err = -EINVAL; - if (ops != qdisc_lookup_ops(kind)) - goto err_out; - INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); @@ -444,7 +441,7 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) handle = qdisc_alloc_handle(dev); err = -ENOMEM; if (handle == 0) - goto err_out; + goto err_out2; } if (handle == TC_H_INGRESS) @@ -452,10 +449,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) else sch->handle = handle; - err = -EBUSY; - if (!try_module_get(ops->owner)) - goto err_out; - /* enqueue is accessed locklessly - make sure it's visible * before we set a netdevice's qdisc pointer to sch */ smp_wmb(); @@ -471,8 +464,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) #endif return sch; } +err_out2: module_put(ops->owner); - err_out: *errp = err; if (p) -- cgit v1.2.3 From 27a245eee1f8f863ccc208edbc6ab90935dc2674 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 16 Aug 2004 03:10:00 -0700 Subject: [PKT_SCHED]: Remove unnecessary memsets in packet schedulers This patch removes some more unnecessary memsets in packet schedulers. The qdisc's private data is already set to 0 in qdisc_create/qdisc_create_dflt. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 1 - net/sched/sch_dsmark.c | 2 -- net/sched/sch_hfsc.c | 1 - net/sched/sch_htb.c | 1 - net/sched/sch_ingress.c | 8 -------- 5 files changed, 13 deletions(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ca08449e7b03..fe530156875a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -573,7 +573,6 @@ static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt) struct atm_qdisc_data *p = PRIV(sch); DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); - memset(p,0,sizeof(*p)); p->flows = &p->link; if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops))) p->link.q = &noop_qdisc; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index e0831a4a4457..28b61f0f87a7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -331,8 +331,6 @@ int dsmark_init(struct Qdisc *sch,struct rtattr *opt) !tb[TCA_DSMARK_INDICES-1] || RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16)) return -EINVAL; - memset(p,0,sizeof(*p)); - p->filter_list = NULL; p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]); if (!p->indices) return -EINVAL; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index cc561e181892..fa1a9e5494c8 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1530,7 +1530,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) return -EINVAL; qopt = RTA_DATA(opt); - memset(q, 0, sizeof(struct hfsc_sched)); sch->stats_lock = &sch->dev->queue_lock; q->defcls = qopt->defcls; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d07dfd8b5cf0..61c8fa4db608 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1277,7 +1277,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) HTB_VER >> 16,HTB_VER & 0xffff,gopt->version); return -EINVAL; } - memset(q,0,sizeof(*q)); q->debug = gopt->debug; HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 30f2176b992d..93ed728f3a02 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -289,9 +289,6 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt) #endif #endif - if (NULL == p) - goto error; - #ifndef CONFIG_NET_CLS_ACT #ifdef CONFIG_NETFILTER if (!nf_registered) { @@ -305,8 +302,6 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt) #endif DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); - memset(p, 0, sizeof(*p)); - p->filter_list = NULL; p->q = &noop_qdisc; return 0; error: @@ -346,9 +341,6 @@ static void ingress_destroy(struct Qdisc *sch) p->filter_list = tp->next; tcf_destroy(tp); } - memset(p, 0, sizeof(*p)); - p->filter_list = NULL; - #if 0 /* for future use */ qdisc_destroy(p->q); -- cgit v1.2.3 From a91f39e100a7d16d589d137352e3e8826d40084e Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Wed, 18 Aug 2004 05:22:00 +0900 Subject: [IPV6] don't try to insert same local route multiple times. Signed-off-by: Hideaki YOSHIFUJI --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 40ad73c5cbb7..04f8d99339bf 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1457,8 +1457,7 @@ ok: spin_unlock(&ifp->lock); if (!(flags&IFA_F_TENTATIVE)) - ipv6_ifa_notify((flags&IFA_F_DEPRECATED) ? - 0 : RTM_NEWADDR, ifp); + ipv6_ifa_notify(0, ifp); } else spin_unlock(&ifp->lock); -- cgit v1.2.3 From b08bb6d2f00fb850901a42d095dea602b06c29ae Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Wed, 18 Aug 2004 05:22:28 +0900 Subject: [IPV6] export rt6_ins() as ip6_ins_rt(). Signed-off-by: Hideaki YOSHIFUJI --- include/net/ip6_route.h | 3 +++ net/ipv6/route.c | 14 +++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 75d503dc1b5e..9e9a8f86f0d0 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -40,6 +40,9 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); extern int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *, void *rtattr); +extern int ip6_ins_rt(struct rt6_info *, + struct nlmsghdr *, + void *rtattr); extern int ip6_del_rt(struct rt6_info *, struct nlmsghdr *, void *rtattr); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2017c69dc9f3..13b2218f2f81 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -336,13 +336,13 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, return NULL; } -/* rt6_ins is called with FREE rt6_lock. +/* ip6_ins_rt is called with FREE rt6_lock. It takes new route entry, the addition fails by any reason the route is freed. In any case, if caller does not hold it, it may be destroyed. */ -static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr) +int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr) { int err; @@ -390,7 +390,7 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, dst_hold(&rt->u.dst); - err = rt6_ins(rt, NULL, NULL); + err = ip6_ins_rt(rt, NULL, NULL); if (err == 0) return rt; @@ -901,7 +901,7 @@ install_route: rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = in6_dev_get(dev); - return rt6_ins(rt, nlh, _rtattr); + return ip6_ins_rt(rt, nlh, _rtattr); out: if (dev) @@ -1054,7 +1054,7 @@ source_ok: nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst)); - if (rt6_ins(nrt, NULL, NULL)) + if (ip6_ins_rt(nrt, NULL, NULL)) goto out; if (rt->rt6i_flags&RTF_CACHE) { @@ -1144,7 +1144,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; - rt6_ins(nrt, NULL, NULL); + ip6_ins_rt(nrt, NULL, NULL); } out: @@ -1336,7 +1336,7 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast) ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - rt6_ins(rt, NULL, NULL); + ip6_ins_rt(rt, NULL, NULL); return 0; } -- cgit v1.2.3 From caab50f0a6f00f2bf2062e4f4a5372a09b3ea525 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Wed, 18 Aug 2004 05:23:02 +0900 Subject: [IPV6] addrconf_dst_alloc() to allocate new route for local address. Signed-Off-By: Hideaki YOSHIFUJI --- include/net/ip6_route.h | 4 ++++ net/ipv6/route.c | 40 ++++++++++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9e9a8f86f0d0..9755fdd0b20d 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -72,6 +72,10 @@ extern struct dst_entry *ndisc_dst_alloc(struct net_device *dev, extern int ndisc_dst_gc(int *more); extern void fib6_force_start_gc(void); +extern struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, + const struct in6_addr *addr, + int anycast); + /* * support functions for ND * diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 13b2218f2f81..e8113f1c527a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1303,23 +1303,26 @@ int ip6_pkt_discard_out(struct sk_buff **pskb) } /* - * Add address + * Allocate a dst for local (unicast / anycast) address. */ -int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast) +struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, + const struct in6_addr *addr, + int anycast) { struct rt6_info *rt = ip6_dst_alloc(); if (rt == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); dev_hold(&loopback_dev); + in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; rt->rt6i_dev = &loopback_dev; - rt->rt6i_idev = in6_dev_get(&loopback_dev); + rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst)); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev); @@ -1331,14 +1334,39 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast) rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); if (rt->rt6i_nexthop == NULL) { dst_free((struct dst_entry *) rt); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; + + return rt; +} + +/* + * Add address + */ + +int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast) +{ + struct inet6_dev *idev; + struct rt6_info *rt; + int err = 0; + + idev = in6_dev_get(&loopback_dev); + + rt = addrconf_dst_alloc(idev, addr, anycast); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto out; + } + ip6_ins_rt(rt, NULL, NULL); - return 0; +out: + if (idev) + in6_dev_put(idev); + return err; } /* Delete address. Warning: you should check that this address -- cgit v1.2.3 From c2de5154fff86f91041a59e6586dd37a4394e3b1 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Wed, 18 Aug 2004 05:23:32 +0900 Subject: [IPV4,IPV6] set idev/rt6i_idev to loopback instead of NULL, to omit checking if it is non-NULL. (Based on hint by David S. Miller ) Signed-off-by: Hideaki YOSHIFUJI --- net/ipv4/route.c | 9 ++++++--- net/ipv6/route.c | 11 ++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 15d9eca5384e..3fcea5fa99eb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1342,9 +1342,12 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, int how) { struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; - if (idev) { - rt->idev = NULL; - in_dev_put(idev); + if (idev && idev->dev != &loopback_dev) { + struct in_device *loopback_idev = in_dev_get(&loopback_dev); + if (loopback_idev) { + rt->idev = loopback_idev; + in_dev_put(idev); + } } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8113f1c527a..fe8789eb9c13 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -155,7 +155,16 @@ static void ip6_dst_destroy(struct dst_entry *dst) static void ip6_dst_ifdown(struct dst_entry *dst, int how) { - ip6_dst_destroy(dst); + struct rt6_info *rt = (struct rt6_info *)dst; + struct inet6_dev *idev = rt->rt6i_idev; + + if (idev != NULL && idev->dev != &loopback_dev) { + struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + if (loopback_idev != NULL) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); + } + } } /* -- cgit v1.2.3 From f3200f7e01252f72344e68fb618f61fed4d07bd7 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Wed, 18 Aug 2004 05:24:09 +0900 Subject: [IPV6] ensure rt6i_idev is non-NULL when setting up new rt6_info{}. Signed-off-by: Hideaki YOSHIFUJI --- net/ipv6/route.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fe8789eb9c13..01703bf111ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -617,8 +617,13 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, struct in6_addr *addr, int (*output)(struct sk_buff **)) { - struct rt6_info *rt = ip6_dst_alloc(); + struct rt6_info *rt; + struct inet6_dev *idev = in6_dev_get(dev); + if (unlikely(idev == NULL)) + return NULL; + + rt = ip6_dst_alloc(); if (unlikely(rt == NULL)) goto out; @@ -629,7 +634,7 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, neigh = ndisc_get_neigh(dev, addr); rt->rt6i_dev = dev; - rt->rt6i_idev = in6_dev_get(dev); + rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; @@ -740,8 +745,9 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr) int err; struct rtmsg *r; struct rtattr **rta; - struct rt6_info *rt; + struct rt6_info *rt = NULL; struct net_device *dev = NULL; + struct inet6_dev *idev = NULL; int addr_type; rta = (struct rtattr **) _rtattr; @@ -753,9 +759,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr) return -EINVAL; #endif if (rtmsg->rtmsg_ifindex) { + err = -ENODEV; dev = dev_get_by_index(rtmsg->rtmsg_ifindex); if (!dev) - return -ENODEV; + goto out; + idev = in6_dev_get(dev); + if (!idev) + goto out; } if (rtmsg->rtmsg_metric == 0) @@ -802,10 +812,17 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr) */ if ((rtmsg->rtmsg_flags&RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { - if (dev) + if (dev && dev != &loopback_dev) { dev_put(dev); - dev = &loopback_dev; - dev_hold(dev); + in6_dev_put(idev); + dev = &loopback_dev; + dev_hold(dev); + idev = in6_dev_get(dev); + if (!idev) { + err = -ENODEV; + goto out; + } + } rt->u.dst.output = ip6_pkt_discard_out; rt->u.dst.input = ip6_pkt_discard; rt->u.dst.error = -ENETUNREACH; @@ -847,7 +864,9 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr) } } else { dev = grt->rt6i_dev; + idev = grt->rt6i_idev; dev_hold(dev); + in6_dev_hold(grt->rt6i_idev); } if (!(grt->rt6i_flags&RTF_GATEWAY)) err = 0; @@ -909,7 +928,7 @@ install_route: if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst)); rt->u.dst.dev = dev; - rt->rt6i_idev = in6_dev_get(dev); + rt->rt6i_idev = idev; return ip6_ins_rt(rt, nlh, _rtattr); out: @@ -1363,6 +1382,10 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast) int err = 0; idev = in6_dev_get(&loopback_dev); + if (!idev) { + err = -ENODEV; + goto out; + } rt = addrconf_dst_alloc(idev, addr, anycast); if (IS_ERR(rt)) { -- cgit v1.2.3 From c375d51751826e53163cda9a3e902724952a2a28 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Wed, 18 Aug 2004 05:24:37 +0900 Subject: [IPV6] take rt6i_idev into account when looking up routes. This is required because we will add "same" routes (except for rt6i_idev) on loopback for routes for local address. Signed-off-by: Hideaki YOSHIFUJI --- net/ipv6/ip6_fib.c | 7 ++++--- net/ipv6/route.c | 10 +++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 65a137241777..169506ba0c40 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -449,9 +449,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * Same priority level */ - if ((iter->rt6i_dev == rt->rt6i_dev) && - (ipv6_addr_cmp(&iter->rt6i_gateway, - &rt->rt6i_gateway) == 0)) { + if (iter->rt6i_dev == rt->rt6i_dev && + iter->rt6i_idev == rt->rt6i_idev && + ipv6_addr_cmp(&iter->rt6i_gateway, + &rt->rt6i_gateway) == 0) { if (!(iter->rt6i_flags&RTF_EXPIRES)) return -EEXIST; iter->rt6i_expires = rt->rt6i_expires; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 01703bf111ff..888b079c60d4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -183,8 +183,16 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, struct net_device *dev = sprt->rt6i_dev; if (dev->ifindex == oif) return sprt; - if (dev->flags&IFF_LOOPBACK) + if (dev->flags & IFF_LOOPBACK) { + if (sprt->rt6i_idev->dev->ifindex != oif) { + if (strict && oif) + continue; + if (local && (!oif || + local->rt6i_idev->dev->ifindex == oif)) + continue; + } local = sprt; + } } if (local) -- cgit v1.2.3 From 6dad59bb4069702e01a4653f321b295450cca5c9 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Wed, 18 Aug 2004 05:25:16 +0900 Subject: [IPV6] refer inet6 device via corresponding local route from address structure. Signed-off-by: Hideaki YOSHIFUJI --- include/net/if_inet6.h | 2 ++ net/ipv6/addrconf.c | 46 ++++++++++++++++++++++++++++++++------------ net/ipv6/anycast.c | 40 +++++++++++++++++++++++++++----------- net/ipv6/route.c | 52 ++------------------------------------------------ 4 files changed, 67 insertions(+), 73 deletions(-) diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 48280b138cb9..76ce5f8b6c1e 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -51,6 +51,7 @@ struct inet6_ifaddr struct timer_list timer; struct inet6_dev *idev; + struct rt6_info *rt; struct inet6_ifaddr *lst_next; /* next addr in addr_lst */ struct inet6_ifaddr *if_next; /* next addr in inet6_dev */ @@ -133,6 +134,7 @@ struct ifacaddr6 { struct in6_addr aca_addr; struct inet6_dev *aca_idev; + struct rt6_info *aca_rt; struct ifacaddr6 *aca_next; int aca_users; atomic_t aca_refcnt; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 04f8d99339bf..7150375908a8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -472,6 +472,8 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) printk("Freeing alive inet6 address %p\n", ifp); return; } + dst_release(&ifp->rt->u.dst); + inet6_ifa_count--; kfree(ifp); } @@ -482,25 +484,33 @@ static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, int scope, unsigned flags) { - struct inet6_ifaddr *ifa; + struct inet6_ifaddr *ifa = NULL; + struct rt6_info *rt; int hash; static spinlock_t lock = SPIN_LOCK_UNLOCKED; + int err = 0; spin_lock_bh(&lock); /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(addr, idev->dev)) { - spin_unlock_bh(&lock); ADBG(("ipv6_add_addr: already assigned\n")); - return ERR_PTR(-EEXIST); + err = -EEXIST; + goto out; } ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); if (ifa == NULL) { - spin_unlock_bh(&lock); ADBG(("ipv6_add_addr: malloc failed\n")); - return ERR_PTR(-ENOBUFS); + err = -ENOBUFS; + goto out; + } + + rt = addrconf_dst_alloc(idev, addr, 0); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto out; } memset(ifa, 0, sizeof(struct inet6_ifaddr)); @@ -517,9 +527,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, read_lock(&addrconf_lock); if (idev->dead) { read_unlock(&addrconf_lock); - spin_unlock_bh(&lock); - kfree(ifa); - return ERR_PTR(-ENODEV); /*XXX*/ + err = -ENODEV; /*XXX*/ + goto out; } inet6_ifa_count++; @@ -553,12 +562,20 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, } #endif + ifa->rt = rt; + in6_ifa_hold(ifa); write_unlock_bh(&idev->lock); read_unlock(&addrconf_lock); +out: spin_unlock_bh(&lock); - notifier_call_chain(&inet6addr_chain,NETDEV_UP,ifa); + if (unlikely(err == 0)) + notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); + else { + kfree(ifa); + ifa = ERR_PTR(err); + } return ifa; } @@ -2981,7 +2998,9 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: - ip6_rt_addr_add(&ifp->addr, ifp->idev->dev, 0); + dst_hold(&ifp->rt->u.dst); + if (ip6_ins_rt(ifp->rt, NULL, NULL)) + dst_release(&ifp->rt->u.dst); break; case RTM_DELADDR: addrconf_leave_solict(ifp->idev->dev, &ifp->addr); @@ -2992,8 +3011,11 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (!ipv6_addr_any(&addr)) ipv6_dev_ac_dec(ifp->idev->dev, &addr); } - if (!ipv6_chk_addr(&ifp->addr, ifp->idev->dev, 1)) - ip6_rt_addr_del(&ifp->addr, ifp->idev->dev); + dst_hold(&ifp->rt->u.dst); + if (ip6_del_rt(ifp->rt, NULL, NULL)) + dst_free(&ifp->rt->u.dst); + else + dst_release(&ifp->rt->u.dst); break; } } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 5b1e4d959f4a..537dc37be239 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -293,6 +293,7 @@ static void aca_put(struct ifacaddr6 *ac) { if (atomic_dec_and_test(&ac->aca_refcnt)) { in6_dev_put(ac->aca_idev); + dst_release(&ac->aca_rt->u.dst); kfree(ac); } } @@ -304,6 +305,8 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) { struct ifacaddr6 *aca; struct inet6_dev *idev; + struct rt6_info *rt; + int err; idev = in6_dev_get(dev); @@ -312,17 +315,15 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) write_lock_bh(&idev->lock); if (idev->dead) { - write_unlock_bh(&idev->lock); - in6_dev_put(idev); - return -ENODEV; + err = -ENODEV; + goto out; } for (aca = idev->ac_list; aca; aca = aca->aca_next) { if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0) { aca->aca_users++; - write_unlock_bh(&idev->lock); - in6_dev_put(idev); - return 0; + err = 0; + goto out; } } @@ -333,15 +334,22 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC); if (aca == NULL) { - write_unlock_bh(&idev->lock); - in6_dev_put(idev); - return -ENOMEM; + err = -ENOMEM; + goto out; + } + + rt = addrconf_dst_alloc(idev, addr, 1); + if (IS_ERR(rt)) { + kfree(aca); + err = PTR_ERR(rt); + goto out; } memset(aca, 0, sizeof(struct ifacaddr6)); ipv6_addr_copy(&aca->aca_addr, addr); aca->aca_idev = idev; + aca->aca_rt = rt; aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; @@ -352,12 +360,18 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) idev->ac_list = aca; write_unlock_bh(&idev->lock); - ip6_rt_addr_add(&aca->aca_addr, dev, 1); + dst_hold(&rt->u.dst); + if (ip6_ins_rt(rt, NULL, NULL)) + dst_release(&rt->u.dst); addrconf_join_solict(dev, &aca->aca_addr); aca_put(aca); return 0; +out: + write_unlock_bh(&idev->lock); + in6_dev_put(idev); + return err; } /* @@ -396,7 +410,11 @@ int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) write_unlock_bh(&idev->lock); addrconf_leave_solict(dev, &aca->aca_addr); - ip6_rt_addr_del(&aca->aca_addr, dev); + dst_hold(&aca->aca_rt->u.dst); + if (ip6_del_rt(aca->aca_rt, NULL, NULL)) + dst_free(&aca->aca_rt->u.dst); + else + dst_release(&aca->aca_rt->u.dst); aca_put(aca); in6_dev_put(idev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 888b079c60d4..77e9de707e96 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1376,57 +1376,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - return rt; -} - -/* - * Add address - */ - -int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast) -{ - struct inet6_dev *idev; - struct rt6_info *rt; - int err = 0; - - idev = in6_dev_get(&loopback_dev); - if (!idev) { - err = -ENODEV; - goto out; - } - - rt = addrconf_dst_alloc(idev, addr, anycast); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - goto out; - } - - ip6_ins_rt(rt, NULL, NULL); - -out: - if (idev) - in6_dev_put(idev); - return err; -} - -/* Delete address. Warning: you should check that this address - disappeared before calling this function. - */ - -int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev) -{ - struct rt6_info *rt; - int err = -ENOENT; - - rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1); - if (rt) { - if (rt->rt6i_dst.plen == 128) - err = ip6_del_rt(rt, NULL, NULL); - else - dst_release(&rt->u.dst); - } + atomic_set(&rt->u.dst.__refcnt, 1); - return err; + return rt; } static int fib6_ifdown(struct rt6_info *rt, void *arg) -- cgit v1.2.3 From 00356691a86728b947454f1a5962d29b068108a9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 00:28:05 -0700 Subject: [IPV4]: Fix theoretical loop on SMP in ip_evictor(). Snapshot the amount of work to do, and just do it. In this way we avoid a theoretical loop whereby one cpu sits in ip_evictor() tossing fragments while another keeps adding a fragment just as we bring ip_frag_mem down below the low threshold. Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 6fd69feffce4..b9f1586ae455 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -169,14 +169,18 @@ static void ipfrag_secret_rebuild(unsigned long dummy) atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ /* Memory Tracking Functions. */ -static __inline__ void frag_kfree_skb(struct sk_buff *skb) +static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) { + if (work) + *work -= skb->truesize; atomic_sub(skb->truesize, &ip_frag_mem); kfree_skb(skb); } -static __inline__ void frag_free_queue(struct ipq *qp) +static __inline__ void frag_free_queue(struct ipq *qp, int *work) { + if (work) + *work -= sizeof(struct ipq); atomic_sub(sizeof(struct ipq), &ip_frag_mem); kfree(qp); } @@ -195,7 +199,7 @@ static __inline__ struct ipq *frag_alloc_queue(void) /* Destruction primitives. */ /* Complete destruction of ipq. */ -static void ip_frag_destroy(struct ipq *qp) +static void ip_frag_destroy(struct ipq *qp, int *work) { struct sk_buff *fp; @@ -207,18 +211,18 @@ static void ip_frag_destroy(struct ipq *qp) while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp); + frag_kfree_skb(fp, work); fp = xp; } /* Finally, release the queue descriptor itself. */ - frag_free_queue(qp); + frag_free_queue(qp, work); } -static __inline__ void ipq_put(struct ipq *ipq) +static __inline__ void ipq_put(struct ipq *ipq, int *work) { if (atomic_dec_and_test(&ipq->refcnt)) - ip_frag_destroy(ipq); + ip_frag_destroy(ipq, work); } /* Kill ipq entry. It is not destroyed immediately, @@ -243,10 +247,13 @@ static void ip_evictor(void) { struct ipq *qp; struct list_head *tmp; + int work; - for(;;) { - if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh) - return; + work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; + if (work <= 0) + return; + + while (work > 0) { read_lock(&ipfrag_lock); if (list_empty(&ipq_lru_list)) { read_unlock(&ipfrag_lock); @@ -262,7 +269,7 @@ static void ip_evictor(void) ipq_kill(qp); spin_unlock(&qp->lock); - ipq_put(qp); + ipq_put(qp, &work); IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); } } @@ -294,7 +301,7 @@ static void ip_expire(unsigned long arg) } out: spin_unlock(&qp->lock); - ipq_put(qp); + ipq_put(qp, NULL); } /* Creation primitives. */ @@ -317,7 +324,7 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) atomic_inc(&qp->refcnt); write_unlock(&ipfrag_lock); qp_in->last_in |= COMPLETE; - ipq_put(qp_in); + ipq_put(qp_in, NULL); return qp; } } @@ -506,7 +513,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->fragments = next; qp->meat -= free_it->len; - frag_kfree_skb(free_it); + frag_kfree_skb(free_it, NULL); } } @@ -657,7 +664,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) ret = ip_frag_reasm(qp, dev); spin_unlock(&qp->lock); - ipq_put(qp); + ipq_put(qp, NULL); return ret; } -- cgit v1.2.3 From d796f4b707bcd053637ca9abc6f4ca661314fcdb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 00:34:14 -0700 Subject: [IPV6]: ip6_evictor() has same problem as ip_evictor(). Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 836d2ae8464e..e07da9ee8990 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -195,14 +195,18 @@ static void ip6_frag_secret_rebuild(unsigned long dummy) atomic_t ip6_frag_mem = ATOMIC_INIT(0); /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb) +static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { + if (work) + *work -= skb->truesize; atomic_sub(skb->truesize, &ip6_frag_mem); kfree_skb(skb); } -static inline void frag_free_queue(struct frag_queue *fq) +static inline void frag_free_queue(struct frag_queue *fq, int *work) { + if (work) + *work -= sizeof(struct frag_queue); atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem); kfree(fq); } @@ -220,7 +224,7 @@ static inline struct frag_queue *frag_alloc_queue(void) /* Destruction primitives. */ /* Complete destruction of fq. */ -static void ip6_frag_destroy(struct frag_queue *fq) +static void ip6_frag_destroy(struct frag_queue *fq, int *work) { struct sk_buff *fp; @@ -232,17 +236,17 @@ static void ip6_frag_destroy(struct frag_queue *fq) while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp); + frag_kfree_skb(fp, work); fp = xp; } - frag_free_queue(fq); + frag_free_queue(fq, work); } -static __inline__ void fq_put(struct frag_queue *fq) +static __inline__ void fq_put(struct frag_queue *fq, int *work) { if (atomic_dec_and_test(&fq->refcnt)) - ip6_frag_destroy(fq); + ip6_frag_destroy(fq, work); } /* Kill fq entry. It is not destroyed immediately, @@ -264,10 +268,13 @@ static void ip6_evictor(void) { struct frag_queue *fq; struct list_head *tmp; + int work; - for(;;) { - if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) - return; + work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh; + if (work <= 0) + return; + + while(work > 0) { read_lock(&ip6_frag_lock); if (list_empty(&ip6_frag_lru_list)) { read_unlock(&ip6_frag_lock); @@ -283,7 +290,7 @@ static void ip6_evictor(void) fq_kill(fq); spin_unlock(&fq->lock); - fq_put(fq); + fq_put(fq, &work); IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); } } @@ -320,7 +327,7 @@ static void ip6_frag_expire(unsigned long data) } out: spin_unlock(&fq->lock); - fq_put(fq); + fq_put(fq, NULL); } /* Creation primitives. */ @@ -340,7 +347,7 @@ static struct frag_queue *ip6_frag_intern(unsigned int hash, atomic_inc(&fq->refcnt); write_unlock(&ip6_frag_lock); fq_in->last_in |= COMPLETE; - fq_put(fq_in); + fq_put(fq_in, NULL); return fq; } } @@ -539,7 +546,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->fragments = next; fq->meat -= free_it->len; - frag_kfree_skb(free_it); + frag_kfree_skb(free_it, NULL); } } @@ -734,7 +741,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) ret = ip6_frag_reasm(fq, skbp, nhoffp, dev); spin_unlock(&fq->lock); - fq_put(fq); + fq_put(fq, NULL); return ret; } -- cgit v1.2.3 From e0316144b33f085e0084f0b38f705445ba37a681 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Aug 2004 00:35:21 -0700 Subject: [ATALK]: Fix build with SYSCTL=n Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/atalk.h | 29 ++++++++++++++++++++++++++--- net/appletalk/Makefile | 3 ++- net/appletalk/atalk_proc.c | 13 ------------- net/appletalk/ddp.c | 10 ---------- net/appletalk/sysctl_net_atalk.c | 18 ++---------------- 5 files changed, 30 insertions(+), 43 deletions(-) diff --git a/include/linux/atalk.h b/include/linux/atalk.h index c4d2a0949177..2a9b82002591 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -191,10 +191,13 @@ extern int aarp_send_ddp(struct net_device *dev, extern void aarp_send_probe(struct net_device *dev, struct atalk_addr *addr); extern void aarp_device_down(struct net_device *dev); +extern void aarp_probe_network(struct atalk_iface *atif); +extern int aarp_proxy_probe_network(struct atalk_iface *atif, + struct atalk_addr *sa); +extern void aarp_proxy_remove(struct net_device *dev, + struct atalk_addr *sa); -#ifdef MODULE -extern void aarp_cleanup_module(void); -#endif /* MODULE */ +extern void aarp_cleanup_module(void); #define at_sk(__sk) ((struct atalk_sock *)(__sk)->sk_protinfo) @@ -209,8 +212,28 @@ extern rwlock_t atalk_interfaces_lock; extern struct atalk_route atrtr_default; +extern struct file_operations atalk_seq_arp_fops; + +extern int sysctl_aarp_expiry_time; +extern int sysctl_aarp_tick_time; +extern int sysctl_aarp_retransmit_limit; +extern int sysctl_aarp_resolve_time; + +#ifdef CONFIG_SYSCTL +extern void atalk_register_sysctl(void); +extern void atalk_unregister_sysctl(void); +#else +#define atalk_register_sysctl() do { } while(0) +#define atalk_unregister_sysctl() do { } while(0) +#endif + +#ifdef CONFIG_PROC_FS extern int atalk_proc_init(void); extern void atalk_proc_exit(void); +#else +#define atalk_proc_init() 0 +#define atalk_proc_exit() do { } while(0) +#endif /* CONFIG_PROC_FS */ #endif /* __KERNEL__ */ #endif /* __LINUX_ATALK_H__ */ diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile index cbe8a6a0bef0..d179728ad522 100644 --- a/net/appletalk/Makefile +++ b/net/appletalk/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_ATALK) += appletalk.o -appletalk-y := aarp.o ddp.o atalk_proc.o +appletalk-y := aarp.o ddp.o +appletalk-$(CONFIG_PROC_FS) += atalk_proc.o appletalk-$(CONFIG_SYSCTL) += sysctl_net_atalk.o diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 1e00a582277d..4d20501fad77 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -15,8 +15,6 @@ #include #include -#ifdef CONFIG_PROC_FS -extern struct file_operations atalk_seq_arp_fops; static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos) { @@ -321,14 +319,3 @@ void __exit atalk_proc_exit(void) remove_proc_entry("arp", atalk_proc_dir); remove_proc_entry("atalk", proc_net); } - -#else /* CONFIG_PROC_FS */ -int __init atalk_proc_init(void) -{ - return 0; -} - -void __exit atalk_proc_exit(void) -{ -} -#endif /* CONFIG_PROC_FS */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 4185d7b8ed02..588cbe1ec16f 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -61,16 +61,6 @@ #include #include -extern void aarp_cleanup_module(void); - -extern void aarp_probe_network(struct atalk_iface *atif); -extern int aarp_proxy_probe_network(struct atalk_iface *atif, - struct atalk_addr *sa); -extern void aarp_proxy_remove(struct net_device *dev, struct atalk_addr *sa); - -extern void atalk_register_sysctl(void); -extern void atalk_unregister_sysctl(void); - struct datalink_proto *ddp_dl, *aarp_dl; static struct proto_ops atalk_dgram_ops; diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index 25b33f670499..af7f0604395d 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -7,13 +7,9 @@ */ #include - -#ifdef CONFIG_SYSCTL #include -extern int sysctl_aarp_expiry_time; -extern int sysctl_aarp_tick_time; -extern int sysctl_aarp_retransmit_limit; -extern int sysctl_aarp_resolve_time; +#include +#include static struct ctl_table atalk_table[] = { { @@ -85,13 +81,3 @@ void atalk_unregister_sysctl(void) { unregister_sysctl_table(atalk_table_header); } - -#else /* CONFIG_PROC_FS */ -void atalk_register_sysctl(void) -{ -} - -void atalk_unregister_sysctl(void) -{ -} -#endif /* CONFIG_PROC_FS */ -- cgit v1.2.3 From f1c51e9f4e7d8e301098a2935060a43c02e56449 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Wed, 18 Aug 2004 00:36:22 -0700 Subject: [DECONET]: Fix build with SYSCTL=n Signed-off-by: Hideaki YOSHIFUJI Signed-off-by: David S. Miller --- net/decnet/dn_dev.c | 122 ++++++++++++++++++++++++++-------------------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 11b0f0c6d45c..5a05efb83092 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -247,21 +247,6 @@ static struct dn_dev_sysctl_table { }, {0}} }; -static inline __u16 mtu2blksize(struct net_device *dev) -{ - u32 blksize = dev->mtu; - if (blksize > 0xffff) - blksize = 0xffff; - - if (dev->type == ARPHRD_ETHER || - dev->type == ARPHRD_PPP || - dev->type == ARPHRD_IPGRE || - dev->type == ARPHRD_LOOPBACK) - blksize -= 2; - - return (__u16)blksize; -} - static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms) { struct dn_dev_sysctl_table *t; @@ -314,52 +299,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } } -struct net_device *dn_dev_get_default(void) -{ - struct net_device *dev; - read_lock(&dndev_lock); - dev = decnet_default_device; - if (dev) { - if (dev->dn_ptr) - dev_hold(dev); - else - dev = NULL; - } - read_unlock(&dndev_lock); - return dev; -} - -int dn_dev_set_default(struct net_device *dev, int force) -{ - struct net_device *old = NULL; - int rv = -EBUSY; - if (!dev->dn_ptr) - return -ENODEV; - write_lock(&dndev_lock); - if (force || decnet_default_device == NULL) { - old = decnet_default_device; - decnet_default_device = dev; - rv = 0; - } - write_unlock(&dndev_lock); - if (old) - dev_put(dev); - return rv; -} - -static void dn_dev_check_default(struct net_device *dev) -{ - write_lock(&dndev_lock); - if (dev == decnet_default_device) { - decnet_default_device = NULL; - } else { - dev = NULL; - } - write_unlock(&dndev_lock); - if (dev) - dev_put(dev); -} - static int dn_forwarding_proc(ctl_table *table, int write, struct file *filep, void __user *buffer, @@ -454,6 +393,21 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms * #endif /* CONFIG_SYSCTL */ +static inline __u16 mtu2blksize(struct net_device *dev) +{ + u32 blksize = dev->mtu; + if (blksize > 0xffff) + blksize = 0xffff; + + if (dev->type == ARPHRD_ETHER || + dev->type == ARPHRD_PPP || + dev->type == ARPHRD_IPGRE || + dev->type == ARPHRD_LOOPBACK) + blksize -= 2; + + return (__u16)blksize; +} + static struct dn_ifaddr *dn_dev_alloc_ifa(void) { struct dn_ifaddr *ifa; @@ -635,6 +589,52 @@ rarok: goto done; } +struct net_device *dn_dev_get_default(void) +{ + struct net_device *dev; + read_lock(&dndev_lock); + dev = decnet_default_device; + if (dev) { + if (dev->dn_ptr) + dev_hold(dev); + else + dev = NULL; + } + read_unlock(&dndev_lock); + return dev; +} + +int dn_dev_set_default(struct net_device *dev, int force) +{ + struct net_device *old = NULL; + int rv = -EBUSY; + if (!dev->dn_ptr) + return -ENODEV; + write_lock(&dndev_lock); + if (force || decnet_default_device == NULL) { + old = decnet_default_device; + decnet_default_device = dev; + rv = 0; + } + write_unlock(&dndev_lock); + if (old) + dev_put(dev); + return rv; +} + +static void dn_dev_check_default(struct net_device *dev) +{ + write_lock(&dndev_lock); + if (dev == decnet_default_device) { + decnet_default_device = NULL; + } else { + dev = NULL; + } + write_unlock(&dndev_lock); + if (dev) + dev_put(dev); +} + static struct dn_dev *dn_dev_by_index(int ifindex) { struct net_device *dev; -- cgit v1.2.3 From 592b64d7d5a06d9855ea7b039a898328109e606d Mon Sep 17 00:00:00 2001 From: William Lee Irwin III Date: Wed, 18 Aug 2004 00:39:07 -0700 Subject: [RXRPC]: Fix build with SYSCTL=n Signed-off-by: William Lee Irwin III Signed-off-by: David S. Miller --- include/rxrpc/rxrpc.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/rxrpc/rxrpc.h b/include/rxrpc/rxrpc.h index df6595c32c37..e9c690964cea 100644 --- a/include/rxrpc/rxrpc.h +++ b/include/rxrpc/rxrpc.h @@ -16,10 +16,17 @@ extern uint32_t rxrpc_epoch; +#ifdef CONFIG_SYSCTL extern int rxrpc_ktrace; extern int rxrpc_kdebug; extern int rxrpc_kproto; extern int rxrpc_knet; +#else +#define rxrpc_ktrace 0 +#define rxrpc_kdebug 0 +#define rxrpc_kproto 0 +#define rxrpc_knet 0 +#endif extern int rxrpc_sysctl_init(void); extern void rxrpc_sysctl_cleanup(void); -- cgit v1.2.3 From 1126996a1f42e82a334339ff099a1580888d3a9a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Aug 2004 00:43:08 -0700 Subject: [XFRM]: Kill unused flow_hash This patch removes a left-over from the days when the flow cache lived in xfrm_policy.c. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aaf74999a1f3..fdf3409b9209 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -304,47 +304,6 @@ extern int xfrm_register_km(struct xfrm_mgr *km); extern int xfrm_unregister_km(struct xfrm_mgr *km); -#define XFRM_FLOWCACHE_HASH_SIZE 1024 - -static inline u32 __flow_hash4(struct flowi *fl) -{ - u32 hash = fl->fl4_src ^ fl->fl_ip_sport; - - hash = ((hash & 0xF0F0F0F0) >> 4) | ((hash & 0x0F0F0F0F) << 4); - - hash ^= fl->fl4_dst ^ fl->fl_ip_dport; - hash ^= (hash >> 10); - hash ^= (hash >> 20); - return hash & (XFRM_FLOWCACHE_HASH_SIZE-1); -} - -static inline u32 __flow_hash6(struct flowi *fl) -{ - u32 hash = fl->fl6_src.s6_addr32[2] ^ - fl->fl6_src.s6_addr32[3] ^ - fl->fl_ip_sport; - - hash = ((hash & 0xF0F0F0F0) >> 4) | ((hash & 0x0F0F0F0F) << 4); - - hash ^= fl->fl6_dst.s6_addr32[2] ^ - fl->fl6_dst.s6_addr32[3] ^ - fl->fl_ip_dport; - hash ^= (hash >> 10); - hash ^= (hash >> 20); - return hash & (XFRM_FLOWCACHE_HASH_SIZE-1); -} - -static inline u32 flow_hash(struct flowi *fl, unsigned short family) -{ - switch (family) { - case AF_INET: - return __flow_hash4(fl); - case AF_INET6: - return __flow_hash6(fl); - } - return 0; /*XXX*/ -} - extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; static inline void xfrm_pol_hold(struct xfrm_policy *policy) -- cgit v1.2.3 From af597aa67c3e0e785fcfbcc3bddfe6643b7e49cb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Aug 2004 00:51:44 -0700 Subject: [IPSEC]: Call xfrm6_rcv in xfrm6_tunnel_rcv This patch reuses the code in xfrm6_input.c for receiving xfrm6_tunnel packets. This removes duplicate code as well as fixing the bugs unique to xfrm6_tunnel_input. For example, it didn't move the MAC header down. Nor did it do anything with ECN. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + net/ipv6/xfrm6_input.c | 15 ++++++++++++--- net/ipv6/xfrm6_tunnel.c | 51 +++---------------------------------------------- 3 files changed, 16 insertions(+), 51 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fdf3409b9209..5c51339eec13 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -780,6 +780,7 @@ extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff **pskb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); +extern int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi); extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 0791594f8878..92e74233fcdb 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -9,6 +9,7 @@ * IPv6 support */ +#include #include #include #include @@ -25,11 +26,11 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } -int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) { struct sk_buff *skb = *pskb; int err; - u32 spi, seq; + u32 seq; struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; @@ -40,7 +41,8 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) nhoff = *nhoffp; nexthdr = skb->nh.raw[nhoff]; - if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) + seq = 0; + if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) goto drop; do { @@ -137,3 +139,10 @@ drop: kfree_skb(skb); return -1; } + +EXPORT_SYMBOL(xfrm6_rcv_spi); + +int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +{ + return xfrm6_rcv_spi(pskb, nhoffp, 0); +} diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index fb5b34a0d4c4..a9736d2ea721 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -356,17 +356,6 @@ static int xfrm6_tunnel_output(struct sk_buff **pskb) static int xfrm6_tunnel_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) { - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return -EINVAL; - - skb->mac.raw = skb->nh.raw; - skb->nh.raw = skb->data; - dst_release(skb->dst); - skb->dst = NULL; - skb->protocol = htons(ETH_P_IPV6); - skb->pkt_type = PACKET_HOST; - netif_rx(skb); - return 0; } @@ -413,49 +402,15 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) { struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler = xfrm6_tunnel_handler; - struct xfrm_state *x = NULL; struct ipv6hdr *iph = skb->nh.ipv6h; - int err = 0; u32 spi; /* device-like_ip6ip6_handler() */ - if (handler) { - err = handler->handler(pskb, nhoffp); - if (!err) - goto out; - } + if (handler && handler->handler(pskb, nhoffp) == 0) + return 0; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, - spi, - IPPROTO_IPV6, AF_INET6); - - if (!x) - goto drop; - - spin_lock(&x->lock); - - if (unlikely(x->km.state != XFRM_STATE_VALID)) - goto drop_unlock; - - err = xfrm6_tunnel_input(x, NULL, skb); - if (err) - goto drop_unlock; - - x->curlft.bytes += skb->len; - x->curlft.packets++; - spin_unlock(&x->lock); - xfrm_state_put(x); - -out: - return 0; - -drop_unlock: - spin_unlock(&x->lock); - xfrm_state_put(x); -drop: - kfree_skb(skb); - return -1; + return xfrm6_rcv_spi(pskb, nhoffp, spi); } static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v1.2.3 From 3a98412d7434c8996805135753cc5ba31e28e93d Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Thu, 19 Aug 2004 16:39:42 +0900 Subject: [XFRM] Fix selector comparison against icmp{,v6} flows. Signed-off-by: Hideaki YOSHIFUJI --- include/net/xfrm.h | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aaf74999a1f3..7bf0b25d844b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -462,13 +462,51 @@ static __inline__ int addr_match(void *token1, void *token2, int prefixlen) return 1; } +static __inline__ +u16 xfrm_flowi_sport(struct flowi *fl) +{ + u16 port; + switch(fl->proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + port = fl->fl_ip_sport; + break; + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + port = htons(fl->fl_icmp_type); + break; + default: + port = 0; /*XXX*/ + } + return port; +} + +static __inline__ +u16 xfrm_flowi_dport(struct flowi *fl) +{ + u16 port; + switch(fl->proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + port = fl->fl_ip_dport; + break; + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + port = htons(fl->fl_icmp_code); + break; + default: + port = 0; /*XXX*/ + } + return port; +} + static inline int __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) { return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && - !((fl->fl_ip_dport^sel->dport)&sel->dport_mask) && - !((fl->fl_ip_sport^sel->sport)&sel->sport_mask) && + !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && (fl->proto == sel->proto || !sel->proto) && (fl->oif == sel->ifindex || !sel->ifindex); } @@ -478,8 +516,8 @@ __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) { return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && - !((fl->fl_ip_dport^sel->dport)&sel->dport_mask) && - !((fl->fl_ip_sport^sel->sport)&sel->sport_mask) && + !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && (fl->proto == sel->proto || !sel->proto) && (fl->oif == sel->ifindex || !sel->ifindex); } -- cgit v1.2.3 From 682d34843723ad0382d8a0422d47aff0d75d242e Mon Sep 17 00:00:00 2001 From: Masahide Nakamura Date: Thu, 19 Aug 2004 16:42:00 +0900 Subject: [IPV6] XFRM: decode icmpv6 session. Signed-off-by: Masahide Nakamura Signed-off-by: Hideaki YOSHIFUJI --- net/ipv6/xfrm6_policy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ab4e40b0ab76..6c1cb74e6a6f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -213,6 +213,16 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) fl->proto = nexthdr; return; + case IPPROTO_ICMPV6: + if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) { + u8 *icmp = (u8 *)exthdr; + + fl->fl_icmp_type = icmp[0]; + fl->fl_icmp_code = icmp[1]; + } + fl->proto = nexthdr; + return; + /* XXX Why are there these headers? */ case IPPROTO_AH: case IPPROTO_ESP: -- cgit v1.2.3 From 9c7e9d2df831c24c4b8d98cc5d3cca04d02c4eab Mon Sep 17 00:00:00 2001 From: Masahide Nakamura Date: Thu, 19 Aug 2004 16:43:52 +0900 Subject: [IPV6] XFRM: probe icmpv6 type/code when sending packets via raw socket. Signed-off-by: Masahide Nakamura Signed-off-by: Hideaki YOSHIFUJI --- net/ipv6/raw.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 031989611932..eb6480aea025 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -555,6 +555,52 @@ error: IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); return err; } + +static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +{ + struct iovec *iov; + u8 *type = NULL; + u8 *code = NULL; + int probed = 0; + int i; + + if (!msg->msg_iov) + return; + + for (i = 0; i < msg->msg_iovlen; i++) { + iov = &msg->msg_iov[i]; + if (!iov) + continue; + + switch (fl->proto) { + case IPPROTO_ICMPV6: + /* check if one-byte field is readable or not. */ + if (iov->iov_base && iov->iov_len < 1) + break; + + if (!type) { + type = iov->iov_base; + /* check if code field is readable or not. */ + if (iov->iov_len > 1) + code = type + 1; + } else if (!code) + code = iov->iov_base; + + if (type && code) { + fl->fl_icmp_type = *type; + fl->fl_icmp_code = *code; + probed = 1; + } + break; + default: + probed = 1; + break; + } + if (probed) + break; + } +} + static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -674,6 +720,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = fl6_merge_options(&opt_space, flowlabel, opt); fl.proto = proto; + rawv6_probe_proto_opt(&fl, msg); + ipv6_addr_copy(&fl.fl6_dst, daddr); if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); -- cgit v1.2.3 From 838eed90c5935a2d7ad3fc9524fdc9b9b0fad20e Mon Sep 17 00:00:00 2001 From: Masahide Nakamura Date: Thu, 19 Aug 2004 16:45:29 +0900 Subject: [IPV4] XFRM: decode icmp session. Signed-off-by: Masahide Nakamura Signed-off-by: Hideaki YOSHIFUJI --- net/ipv4/xfrm4_policy.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3aacce604561..edd813dade15 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -183,6 +183,15 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) } break; + case IPPROTO_ICMP: + if (pskb_may_pull(skb, xprth + 2 - skb->data)) { + u8 *icmp = xprth; + + fl->fl_icmp_type = icmp[0]; + fl->fl_icmp_code = icmp[1]; + } + break; + case IPPROTO_ESP: if (pskb_may_pull(skb, xprth + 4 - skb->data)) { u32 *ehdr = (u32 *)xprth; -- cgit v1.2.3 From e80234f87017ded92c98ed297b79543d0110b871 Mon Sep 17 00:00:00 2001 From: Masahide Nakamura Date: Thu, 19 Aug 2004 16:51:21 +0900 Subject: [IPV4] XFRM: probe icmp type/code when sending packets via raw socket. Signed-off-by: Masahide Nakamura Signed-off-by: Hideaki YOSHIFUJI --- net/ipv4/raw.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1cfd749d651e..5511700f5950 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -323,6 +323,51 @@ error: return err; } +static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +{ + struct iovec *iov; + u8 *type = NULL; + u8 *code = NULL; + int probed = 0; + int i; + + if (!msg->msg_iov) + return; + + for (i = 0; i < msg->msg_iovlen; i++) { + iov = &msg->msg_iov[i]; + if (!iov) + continue; + + switch (fl->proto) { + case IPPROTO_ICMP: + /* check if one-byte field is readable or not. */ + if (iov->iov_base && iov->iov_len < 1) + break; + + if (!type) { + type = iov->iov_base; + /* check if code field is readable or not. */ + if (iov->iov_len > 1) + code = type + 1; + } else if (!code) + code = iov->iov_base; + + if (type && code) { + fl->fl_icmp_type = *type; + fl->fl_icmp_code = *code; + probed = 1; + } + break; + default: + probed = 1; + break; + } + if (probed) + break; + } +} + static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -429,6 +474,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, }; + raw_probe_proto_opt(&fl, msg); + err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) -- cgit v1.2.3 From 1f68baf8b1176e1b46e1aebbcffce2cf76d90429 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Thu, 19 Aug 2004 16:56:49 +0900 Subject: [IPV4] XFRM: don't probe icmp type/code for hdrincl sockets. Signed-off-by: Hideaki YOSHIFUJI --- net/ipv4/raw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5511700f5950..6742e162a6f5 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -474,7 +474,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, }; - raw_probe_proto_opt(&fl, msg); + if (!inet->hdrincl) + raw_probe_proto_opt(&fl, msg); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } -- cgit v1.2.3 From 45aa67b068883fea21f8461f20e77ab3604cc310 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 03:07:13 -0700 Subject: [NETFILTER]: Convert SCTP conntrack over to ip_ct_refresh_acct(). Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 70ca8be84e78..8296e7c52cf8 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -403,7 +403,7 @@ static int sctp_packet(struct ip_conntrack *conntrack, WRITE_UNLOCK(&sctp_lock); } - ip_ct_refresh(conntrack, *sctp_timeouts[newconntrack]); + ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY -- cgit v1.2.3 From 7cbe718bc1b04b3547c77ce143a1c5110d0ff28d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 03:09:57 -0700 Subject: [NETFILTER]: Export ip_conntrack_count for ip_conntrack_standalone. Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 34a7bbcc67d8..f6def5a4b491 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -59,7 +59,10 @@ DECLARE_RWLOCK(ip_conntrack_lock); DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock); -static atomic_t ip_conntrack_count = ATOMIC_INIT(0); + +/* ip_conntrack_standalone needs this */ +atomic_t ip_conntrack_count = ATOMIC_INIT(0); +EXPORT_SYMBOL(ip_conntrack_count); void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); -- cgit v1.2.3 From 4638004e22f76ac6e6b086b2575acf4d71340678 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 03:11:57 -0700 Subject: [NETFILTER]: Need to export ip_ct_log_invalid to modules. --- net/ipv4/netfilter/ip_conntrack_standalone.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 4068958482f4..86010ea65de1 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -905,3 +905,4 @@ EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_put); +EXPORT_SYMBOL(ip_ct_log_invalid); -- cgit v1.2.3 From 274cac6df0970c5504e94b4e17b946cdab945e49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 03:15:29 -0700 Subject: [NET]: Add skb_header_pointer, and use it where possible. This greatly improves netfilter performance where the wanted header area is in the linear SKB data area, therefore no copy into the temporary buffer is needed. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 14 +++ net/bridge/netfilter/ebt_among.c | 45 ++++--- net/bridge/netfilter/ebt_arp.c | 51 ++++---- net/bridge/netfilter/ebt_arpreply.c | 32 +++-- net/bridge/netfilter/ebt_ip.c | 31 ++--- net/bridge/netfilter/ebt_log.c | 54 +++++---- net/bridge/netfilter/ebt_stp.c | 22 ++-- net/bridge/netfilter/ebt_vlan.c | 9 +- net/core/filter.c | 21 ++-- net/core/netfilter.c | 9 +- net/ipv4/icmp.c | 31 +++-- net/ipv4/ipvs/ip_vs_core.c | 104 +++++++++-------- net/ipv4/ipvs/ip_vs_proto.c | 42 ++++--- net/ipv4/ipvs/ip_vs_proto_ah.c | 9 +- net/ipv4/ipvs/ip_vs_proto_esp.c | 9 +- net/ipv4/ipvs/ip_vs_proto_icmp.c | 35 +++--- net/ipv4/ipvs/ip_vs_proto_tcp.c | 44 ++++--- net/ipv4/ipvs/ip_vs_proto_udp.c | 42 ++++--- net/ipv4/ipvs/ip_vs_xmit.c | 9 +- net/ipv4/netfilter/ip_conntrack_amanda.c | 12 +- net/ipv4/netfilter/ip_conntrack_ftp.c | 31 ++--- net/ipv4/netfilter/ip_conntrack_irc.c | 26 +++-- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 11 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 115 +++++++++--------- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 9 +- net/ipv4/netfilter/ip_conntrack_tftp.c | 10 +- net/ipv4/netfilter/ip_nat_standalone.c | 10 +- net/ipv4/netfilter/ip_nat_tftp.c | 25 ++-- net/ipv4/netfilter/ip_tables.c | 53 +++++---- net/ipv4/netfilter/ipchains_core.c | 42 ++++--- net/ipv4/netfilter/ipfwadm_core.c | 42 ++++--- net/ipv4/netfilter/ipt_ECN.c | 29 +++-- net/ipv4/netfilter/ipt_LOG.c | 168 ++++++++++++++------------- net/ipv4/netfilter/ipt_REJECT.c | 15 +-- net/ipv4/netfilter/ipt_ah.c | 11 +- net/ipv4/netfilter/ipt_ecn.c | 17 +-- net/ipv4/netfilter/ipt_esp.c | 11 +- net/ipv4/netfilter/ipt_multiport.c | 19 +-- net/ipv4/netfilter/ipt_tcpmss.c | 30 +++-- net/ipv6/exthdrs_core.c | 27 ++--- net/ipv6/icmp.c | 17 +-- net/rxrpc/call.c | 57 +++++---- net/sunrpc/xprt.c | 8 +- security/selinux/hooks.c | 72 ++++++------ 44 files changed, 842 insertions(+), 638 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 354f1ff564bb..256c05c11298 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1107,6 +1107,20 @@ extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + int hlen = skb_headlen(skb); + + if (offset + len <= hlen) + return skb->data + offset; + + if (skb_copy_bits(skb, offset, buffer, len) < 0) + return NULL; + + return buffer; +} + extern void skb_init(void); extern void skb_add_mtu(int mtu); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 727311dfd884..78aa491ceb90 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -73,20 +73,27 @@ static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr) { if (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_IP)) { - struct iphdr iph; + struct iphdr _iph, *ih; - if (skb_copy_bits(skb, 0, &iph, sizeof(iph))) + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) return -1; - *addr = iph.daddr; + *addr = ih->daddr; } else if (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_ARP)) { - struct arphdr arph; + struct arphdr _arph, *ah; + uint32_t buf, *bp; - if (skb_copy_bits(skb, 0, &arph, sizeof(arph)) || - arph.ar_pln != sizeof(uint32_t) || arph.ar_hln != ETH_ALEN) + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL || + ah->ar_pln != sizeof(uint32_t) || + ah->ar_hln != ETH_ALEN) return -1; - if (skb_copy_bits(skb, sizeof(struct arphdr) + - 2 * ETH_ALEN + sizeof(uint32_t), addr, sizeof(uint32_t))) + bp = skb_header_pointer(skb, sizeof(struct arphdr) + + 2 * ETH_ALEN + sizeof(uint32_t), + sizeof(uint32_t), &buf); + if (bp == NULL) return -1; + *addr = *bp; } return 0; } @@ -94,20 +101,26 @@ static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr) static int get_ip_src(const struct sk_buff *skb, uint32_t *addr) { if (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_IP)) { - struct iphdr iph; + struct iphdr _iph, *ih; - if (skb_copy_bits(skb, 0, &iph, sizeof(iph))) + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) return -1; - *addr = iph.saddr; + *addr = ih->saddr; } else if (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_ARP)) { - struct arphdr arph; + struct arphdr _arph, *ah; + uint32_t buf, *bp; - if (skb_copy_bits(skb, 0, &arph, sizeof(arph)) || - arph.ar_pln != sizeof(uint32_t) || arph.ar_hln != ETH_ALEN) + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL || + ah->ar_pln != sizeof(uint32_t) || + ah->ar_hln != ETH_ALEN) return -1; - if (skb_copy_bits(skb, sizeof(struct arphdr) + - ETH_ALEN, addr, sizeof(uint32_t))) + bp = skb_header_pointer(skb, sizeof(struct arphdr) + + ETH_ALEN, sizeof(uint32_t), &buf); + if (bp == NULL) return -1; + *addr = *bp; } return 0; } diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index eb675848fbc3..e913cac50066 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -19,72 +19,79 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_arp_info *info = (struct ebt_arp_info *)data; - struct arphdr arph; + struct arphdr _arph, *ah; - if (skb_copy_bits(skb, 0, &arph, sizeof(arph))) + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL) return EBT_NOMATCH; if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != - arph.ar_op, EBT_ARP_OPCODE)) + ah->ar_op, EBT_ARP_OPCODE)) return EBT_NOMATCH; if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != - arph.ar_hrd, EBT_ARP_HTYPE)) + ah->ar_hrd, EBT_ARP_HTYPE)) return EBT_NOMATCH; if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != - arph.ar_pro, EBT_ARP_PTYPE)) + ah->ar_pro, EBT_ARP_PTYPE)) return EBT_NOMATCH; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) { - uint32_t addr; + uint32_t _addr, *ap; /* IPv4 addresses are always 4 bytes */ - if (arph.ar_pln != sizeof(uint32_t)) + if (ah->ar_pln != sizeof(uint32_t)) return EBT_NOMATCH; if (info->bitmask & EBT_ARP_SRC_IP) { - if (skb_copy_bits(skb, sizeof(struct arphdr) + - arph.ar_hln, &addr, sizeof(addr))) + ap = skb_header_pointer(skb, sizeof(struct arphdr) + + ah->ar_hln, sizeof(_addr), + &_addr); + if (ap == NULL) return EBT_NOMATCH; - if (FWINV(info->saddr != (addr & info->smsk), + if (FWINV(info->saddr != (*ap & info->smsk), EBT_ARP_SRC_IP)) return EBT_NOMATCH; } if (info->bitmask & EBT_ARP_DST_IP) { - if (skb_copy_bits(skb, sizeof(struct arphdr) + - 2*arph.ar_hln + sizeof(uint32_t), &addr, - sizeof(addr))) + ap = skb_header_pointer(skb, sizeof(struct arphdr) + + 2*ah->ar_hln+sizeof(uint32_t), + sizeof(_addr), &_addr); + if (ap == NULL) return EBT_NOMATCH; - if (FWINV(info->daddr != (addr & info->dmsk), + if (FWINV(info->daddr != (*ap & info->dmsk), EBT_ARP_DST_IP)) return EBT_NOMATCH; } } if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { - unsigned char mac[ETH_ALEN]; + unsigned char _mac[ETH_ALEN], *mp; uint8_t verdict, i; /* MAC addresses are 6 bytes */ - if (arph.ar_hln != ETH_ALEN) + if (ah->ar_hln != ETH_ALEN) return EBT_NOMATCH; if (info->bitmask & EBT_ARP_SRC_MAC) { - if (skb_copy_bits(skb, sizeof(struct arphdr), &mac, - ETH_ALEN)) + mp = skb_header_pointer(skb, sizeof(struct arphdr), + sizeof(_mac), &_mac); + if (mp == NULL) return EBT_NOMATCH; verdict = 0; for (i = 0; i < 6; i++) - verdict |= (mac[i] ^ info->smaddr[i]) & + verdict |= (mp[i] ^ info->smaddr[i]) & info->smmsk[i]; if (FWINV(verdict != 0, EBT_ARP_SRC_MAC)) return EBT_NOMATCH; } if (info->bitmask & EBT_ARP_DST_MAC) { - if (skb_copy_bits(skb, sizeof(struct arphdr) + - arph.ar_hln + arph.ar_pln, &mac, ETH_ALEN)) + mp = skb_header_pointer(skb, sizeof(struct arphdr) + + ah->ar_hln + ah->ar_pln, + sizeof(_mac), &_mac); + if (mp == NULL) return EBT_NOMATCH; verdict = 0; for (i = 0; i < 6; i++) - verdict |= (mac[i] ^ info->dmaddr[i]) & + verdict |= (mp[i] ^ info->dmaddr[i]) & info->dmmsk[i]; if (FWINV(verdict != 0, EBT_ARP_DST_MAC)) return EBT_NOMATCH; diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 87ba30dd090f..95189f02fcc0 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -20,30 +20,38 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, const void *data, unsigned int datalen) { struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; - u32 sip, dip; - struct arphdr ah; - unsigned char sha[ETH_ALEN]; + u32 _sip, *siptr, _dip, *diptr; + struct arphdr _ah, *ap; + unsigned char _sha[ETH_ALEN], *shp; struct sk_buff *skb = *pskb; - if (skb_copy_bits(skb, 0, &ah, sizeof(ah))) + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); + if (ap == NULL) return EBT_DROP; - if (ah.ar_op != __constant_htons(ARPOP_REQUEST) || ah.ar_hln != ETH_ALEN - || ah.ar_pro != __constant_htons(ETH_P_IP) || ah.ar_pln != 4) + if (ap->ar_op != __constant_htons(ARPOP_REQUEST) || + ap->ar_hln != ETH_ALEN || + ap->ar_pro != __constant_htons(ETH_P_IP) || + ap->ar_pln != 4) return EBT_CONTINUE; - if (skb_copy_bits(skb, sizeof(ah), &sha, ETH_ALEN)) + shp = skb_header_pointer(skb, sizeof(_ah), ETH_ALEN, &_sha); + if (shp == NULL) return EBT_DROP; - if (skb_copy_bits(skb, sizeof(ah) + ETH_ALEN, &sip, sizeof(sip))) + siptr = skb_header_pointer(skb, sizeof(_ah) + ETH_ALEN, + sizeof(_sip), &_sip); + if (siptr == NULL) return EBT_DROP; - if (skb_copy_bits(skb, sizeof(ah) + 2 * ETH_ALEN + sizeof(sip), - &dip, sizeof(dip))) + diptr = skb_header_pointer(skb, + sizeof(_ah) + 2 * ETH_ALEN + sizeof(_sip), + sizeof(_dip), &_dip); + if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, sip, (struct net_device *)in, - dip, sha, info->mac, sha); + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in, + *diptr, shp, info->mac, shp); return info->target; } diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 7bab7d065bd3..0b2f19943dac 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -28,41 +28,44 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, unsigned int datalen) { struct ebt_ip_info *info = (struct ebt_ip_info *)data; - union {struct iphdr iph; struct tcpudphdr ports;} u; + struct iphdr _iph, *ih; + struct tcpudphdr _ports, *pptr; - if (skb_copy_bits(skb, 0, &u.iph, sizeof(u.iph))) + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) return EBT_NOMATCH; if (info->bitmask & EBT_IP_TOS && - FWINV(info->tos != u.iph.tos, EBT_IP_TOS)) + FWINV(info->tos != ih->tos, EBT_IP_TOS)) return EBT_NOMATCH; if (info->bitmask & EBT_IP_SOURCE && - FWINV((u.iph.saddr & info->smsk) != + FWINV((ih->saddr & info->smsk) != info->saddr, EBT_IP_SOURCE)) return EBT_NOMATCH; if ((info->bitmask & EBT_IP_DEST) && - FWINV((u.iph.daddr & info->dmsk) != + FWINV((ih->daddr & info->dmsk) != info->daddr, EBT_IP_DEST)) return EBT_NOMATCH; if (info->bitmask & EBT_IP_PROTO) { - if (FWINV(info->protocol != u.iph.protocol, EBT_IP_PROTO)) + if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) return EBT_NOMATCH; if (!(info->bitmask & EBT_IP_DPORT) && !(info->bitmask & EBT_IP_SPORT)) return EBT_MATCH; - if (skb_copy_bits(skb, u.iph.ihl*4, &u.ports, - sizeof(u.ports))) + pptr = skb_header_pointer(skb, ih->ihl*4, + sizeof(_ports), &_ports); + if (pptr == NULL) return EBT_NOMATCH; if (info->bitmask & EBT_IP_DPORT) { - u.ports.dst = ntohs(u.ports.dst); - if (FWINV(u.ports.dst < info->dport[0] || - u.ports.dst > info->dport[1], + u32 dst = ntohs(pptr->dst); + if (FWINV(dst < info->dport[0] || + dst > info->dport[1], EBT_IP_DPORT)) return EBT_NOMATCH; } if (info->bitmask & EBT_IP_SPORT) { - u.ports.src = ntohs(u.ports.src); - if (FWINV(u.ports.src < info->sport[0] || - u.ports.src > info->sport[1], + u32 src = ntohs(pptr->src); + if (FWINV(src < info->sport[0] || + src > info->sport[1], EBT_IP_SPORT)) return EBT_NOMATCH; } diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 2da7c682744d..407dfdbaf688 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -78,23 +78,29 @@ static void ebt_log(const struct sk_buff *skb, const struct net_device *in, if ((info->bitmask & EBT_LOG_IP) && skb->mac.ethernet->h_proto == htons(ETH_P_IP)){ - if (skb_copy_bits(skb, 0, &u.iph, sizeof(u.iph))) { + struct iphdr _iph, *ih; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) { printk(" INCOMPLETE IP header"); goto out; } printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u,", - NIPQUAD(u.iph.saddr), NIPQUAD(u.iph.daddr)); + NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); printk(" IP tos=0x%02X, IP proto=%d", u.iph.tos, - u.iph.protocol); - if (u.iph.protocol == IPPROTO_TCP || - u.iph.protocol == IPPROTO_UDP) { - if (skb_copy_bits(skb, u.iph.ihl*4, &u.ports, - sizeof(u.ports))) { + ih->protocol); + if (ih->protocol == IPPROTO_TCP || + ih->protocol == IPPROTO_UDP) { + struct tcpudphdr _ports, *pptr; + + pptr = skb_header_pointer(skb, ih->ihl*4, + sizeof(_ports), &_ports); + if (pptr == NULL) { printk(" INCOMPLETE TCP/UDP header"); goto out; } - printk(" SPT=%u DPT=%u", ntohs(u.ports.src), - ntohs(u.ports.dst)); + printk(" SPT=%u DPT=%u", ntohs(pptr->src), + ntohs(pptr->dst)); } goto out; } @@ -102,32 +108,38 @@ static void ebt_log(const struct sk_buff *skb, const struct net_device *in, if ((info->bitmask & EBT_LOG_ARP) && ((skb->mac.ethernet->h_proto == __constant_htons(ETH_P_ARP)) || (skb->mac.ethernet->h_proto == __constant_htons(ETH_P_RARP)))) { - if (skb_copy_bits(skb, 0, &u.arph, sizeof(u.arph))) { + struct arphdr _arph, *ah; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL) { printk(" INCOMPLETE ARP header"); goto out; } printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d", - ntohs(u.arph.ar_hrd), ntohs(u.arph.ar_pro), - ntohs(u.arph.ar_op)); + ntohs(ah->ar_hrd), ntohs(ah->ar_pro), + ntohs(ah->ar_op)); /* If it's for Ethernet and the lengths are OK, * then log the ARP payload */ - if (u.arph.ar_hrd == __constant_htons(1) && - u.arph.ar_hln == ETH_ALEN && - u.arph.ar_pln == sizeof(uint32_t)) { - if (skb_copy_bits(skb, sizeof(u.arph), &u.arpp, - sizeof(u.arpp))) { + if (ah->ar_hrd == __constant_htons(1) && + ah->ar_hln == ETH_ALEN && + ah->ar_pln == sizeof(uint32_t)) { + struct arppayload _arpp, *ap; + + ap = skb_header_pointer(skb, sizeof(u.arph), + sizeof(_arpp), &_arpp); + if (ap == NULL) { printk(" INCOMPLETE ARP payload"); goto out; } printk(" ARP MAC SRC="); - print_MAC(u.arpp.mac_src); + print_MAC(ap->mac_src); printk(" ARP IP SRC=%u.%u.%u.%u", - myNIPQUAD(u.arpp.ip_src)); + myNIPQUAD(ap->ip_src)); printk(" ARP MAC DST="); - print_MAC(u.arpp.mac_dst); + print_MAC(ap->mac_dst); printk(" ARP IP DST=%u.%u.%u.%u", - myNIPQUAD(u.arpp.ip_dst)); + myNIPQUAD(ap->ip_dst)); } } out: diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index d0299efa1001..f8a8cdec16ee 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -122,26 +122,30 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_stp_info *info = (struct ebt_stp_info *)data; - struct stp_header stph; + struct stp_header _stph, *sp; uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; - if (skb_copy_bits(skb, 0, &stph, sizeof(stph))) + + sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); + if (sp == NULL) return EBT_NOMATCH; /* The stp code only considers these */ - if (memcmp(&stph, header, sizeof(header))) + if (memcmp(sp, header, sizeof(header))) return EBT_NOMATCH; if (info->bitmask & EBT_STP_TYPE - && FWINV(info->type != stph.type, EBT_STP_TYPE)) + && FWINV(info->type != sp->type, EBT_STP_TYPE)) return EBT_NOMATCH; - if (stph.type == BPDU_TYPE_CONFIG && + if (sp->type == BPDU_TYPE_CONFIG && info->bitmask & EBT_STP_CONFIG_MASK) { - struct stp_config_pdu stpc; + struct stp_config_pdu _stpc, *st; - if (skb_copy_bits(skb, sizeof(stph), &stpc, sizeof(stpc))) - return EBT_NOMATCH; - return ebt_filter_config(info, &stpc); + st = skb_header_pointer(skb, sizeof(_stph), + sizeof(_stpc), &_stpc); + if (st == NULL) + return EBT_NOMATCH; + return ebt_filter_config(info, st); } return EBT_MATCH; } diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index ec111772bbe9..0982ec1c6730 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -48,7 +48,7 @@ ebt_filter_vlan(const struct sk_buff *skb, const void *data, unsigned int datalen) { struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; - struct vlan_hdr frame; + struct vlan_hdr _frame, *fp; unsigned short TCI; /* Whole TCI, given from parsed frame */ unsigned short id; /* VLAN ID, given from frame TCI */ @@ -56,7 +56,8 @@ ebt_filter_vlan(const struct sk_buff *skb, /* VLAN encapsulated Type/Length field, given from orig frame */ unsigned short encap; - if (skb_copy_bits(skb, 0, &frame, sizeof(frame))) + fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); + if (fp == NULL) return EBT_NOMATCH; /* Tag Control Information (TCI) consists of the following elements: @@ -66,10 +67,10 @@ ebt_filter_vlan(const struct sk_buff *skb, * (CFI) is a single bit flag value. Currently ignored. * - VLAN Identifier (VID). The VID is encoded as * an unsigned binary number. */ - TCI = ntohs(frame.h_vlan_TCI); + TCI = ntohs(fp->h_vlan_TCI); id = TCI & VLAN_VID_MASK; prio = (TCI >> 13) & 0x7; - encap = frame.h_vlan_encapsulated_proto; + encap = fp->h_vlan_encapsulated_proto; /* Checking VLAN Identifier (VID) */ if (GET_BITMASK(EBT_VLAN_ID)) diff --git a/net/core/filter.c b/net/core/filter.c index 9c2a95080768..f3b88205ace2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -183,9 +183,10 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) continue; } } else { - u32 tmp; - if (!skb_copy_bits(skb, k, &tmp, 4)) { - A = ntohl(tmp); + u32 _tmp, *p; + p = skb_header_pointer(skb, k, 4, &_tmp); + if (p != NULL) { + A = ntohl(*p); continue; } } @@ -208,9 +209,10 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) continue; } } else { - u16 tmp; - if (!skb_copy_bits(skb, k, &tmp, 2)) { - A = ntohs(tmp); + u16 _tmp, *p; + p = skb_header_pointer(skb, k, 2, &_tmp); + if (p != NULL) { + A = ntohs(*p); continue; } } @@ -233,9 +235,10 @@ load_b: continue; } } else { - u8 tmp; - if (!skb_copy_bits(skb, k, &tmp, 1)) { - A = tmp; + u8 _tmp, *p; + p = skb_header_pointer(skb, k, 1, &_tmp); + if (p != NULL) { + A = *p; continue; } } diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 58632d189f52..f91292063325 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -695,11 +695,12 @@ int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) /* DaveM says protocol headers are also modifiable. */ switch ((*pskb)->nh.iph->protocol) { case IPPROTO_TCP: { - struct tcphdr hdr; - if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4, - &hdr, sizeof(hdr)) != 0) + struct tcphdr _hdr, *hp; + hp = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, + sizeof(_hdr), &_hdr); + if (hp == NULL) goto copy_skb; - if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4) + if (writable_len <= (*pskb)->nh.iph->ihl*4 + hp->doff*4) goto pull_skb; goto copy_skb; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 69261324d4b4..9221cfbc9e27 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -478,20 +478,25 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) * ICMP error */ if (iph->protocol == IPPROTO_ICMP) { - u8 inner_type; - - if (skb_copy_bits(skb_in, - skb_in->nh.raw + (iph->ihl << 2) + - offsetof(struct icmphdr, type) - - skb_in->data, &inner_type, 1)) + u8 _inner_type, *itp; + + itp = skb_header_pointer(skb_in, + skb_in->nh.raw + + (iph->ihl << 2) + + offsetof(struct icmphdr, + type) - + skb_in->data, + sizeof(_inner_type), + &_inner_type); + if (itp == NULL) goto out; /* * Assume any unknown ICMP type is an error. This * isn't specified by the RFC, but think about it.. */ - if (inner_type > NR_ICMP_TYPES || - icmp_pointers[inner_type].error) + if (*itp > NR_ICMP_TYPES || + icmp_pointers[*itp].error) goto out; } } @@ -880,7 +885,6 @@ static void icmp_address_reply(struct sk_buff *skb) struct net_device *dev = skb->dev; struct in_device *in_dev; struct in_ifaddr *ifa; - u32 mask; if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) goto out; @@ -892,17 +896,20 @@ static void icmp_address_reply(struct sk_buff *skb) if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { - if (skb_copy_bits(skb, 0, &mask, 4)) + u32 _mask, *mp; + + mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); + if (mp == NULL) BUG(); for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { - if (mask == ifa->ifa_mask && + if (*mp == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa)) break; } if (!ifa && net_ratelimit()) { printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from " "%s/%u.%u.%u.%u\n", - NIPQUAD(mask), dev->name, NIPQUAD(rt->rt_src)); + NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); } } read_unlock(&in_dev->lock); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 9f4b94f0768d..323a1e7746b8 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -383,21 +383,23 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; - __u16 ports[2]; + __u16 _ports[2], *pptr; - if (skb_copy_bits(skb, iph->ihl*4, ports, sizeof(ports)) < 0) + pptr = skb_header_pointer(skb, iph->ihl*4, + sizeof(_ports), _ports); + if (pptr == NULL) return NULL; /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) - return ip_vs_sched_persist(svc, skb, ports); + return ip_vs_sched_persist(svc, skb, pptr); /* * Non-persistent service */ - if (!svc->fwmark && ports[1] != svc->port) { + if (!svc->fwmark && pptr[1] != svc->port) { if (!svc->port) IP_VS_ERR("Schedule: port zero only supported " "in persistent services, " @@ -415,9 +417,9 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * Create a connection entry. */ cp = ip_vs_conn_new(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1], - dest->addr, dest->port?dest->port:ports[1], + iph->saddr, pptr[0], + iph->daddr, pptr[1], + dest->addr, dest->port?dest->port:pptr[1], 0, dest); if (cp == NULL) @@ -444,10 +446,12 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { - __u16 ports[2]; + __u16 _ports[2], *pptr; struct iphdr *iph = skb->nh.iph; - if (skb_copy_bits(skb, iph->ihl*4, ports, sizeof(ports)) < 0) { + pptr = skb_header_pointer(skb, iph->ihl*4, + sizeof(_ports), _ports); + if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; } @@ -465,8 +469,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* create a new connection entry */ IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n"); cp = ip_vs_conn_new(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1], + iph->saddr, pptr[0], + iph->daddr, pptr[1], 0, 0, IP_VS_CONN_F_BYPASS, NULL); @@ -494,7 +498,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (ports[1] != FTPPORT)) { + if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { ip_vs_service_put(svc); return NF_ACCEPT; } @@ -607,8 +611,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) { struct sk_buff *skb = *pskb; struct iphdr *iph; - struct icmphdr icmph; - struct iphdr ciph; /* The ip header contained within the ICMP */ + struct icmphdr _icmph, *ic; + struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; @@ -625,11 +629,12 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) iph = skb->nh.iph; offset = ihl = iph->ihl * 4; - if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0) + ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", - icmph.type, ntohs(icmp_id(&icmph)), + ic->type, ntohs(icmp_id(ic)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); /* @@ -639,33 +644,34 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ - if ((icmph.type != ICMP_DEST_UNREACH) && - (icmph.type != ICMP_SOURCE_QUENCH) && - (icmph.type != ICMP_TIME_EXCEEDED)) { + if ((ic->type != ICMP_DEST_UNREACH) && + (ic->type != ICMP_SOURCE_QUENCH) && + (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ - offset += sizeof(icmph); - if (skb_copy_bits(skb, offset, &ciph, sizeof(ciph)) < 0) + offset += sizeof(_icmph); + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(ciph.protocol); + pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ - if (unlikely(ciph.frag_off & __constant_htons(IP_OFFSET) && + if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); - offset += ciph.ihl * 4; + offset += cih->ihl * 4; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(skb, pp, cih, offset, 1); if (!cp) return NF_ACCEPT; @@ -685,7 +691,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) goto out; } - if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol) + if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); if (!ip_vs_make_skb_writable(pskb, offset)) goto out; @@ -707,11 +713,13 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) static inline int is_tcp_reset(const struct sk_buff *skb) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; - if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) < 0) + th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, + sizeof(_tcph), &_tcph); + if (th == NULL) return 0; - return tcph.rst; + return th->rst; } /* @@ -777,12 +785,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP)) { - __u16 ports[2]; + __u16 _ports[2], *pptr; - if (skb_copy_bits(skb, ihl, ports, sizeof(ports)) < 0) + pptr = skb_header_pointer(skb, ihl, + sizeof(_ports), _ports); + if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_lookup_real_service(iph->protocol, - iph->saddr, ports[0])) { + iph->saddr, pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST @@ -866,8 +876,8 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) { struct sk_buff *skb = *pskb; struct iphdr *iph; - struct icmphdr icmph; - struct iphdr ciph; /* The ip header contained within the ICMP */ + struct icmphdr _icmph, *ic; + struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; @@ -884,11 +894,12 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) iph = skb->nh.iph; offset = ihl = iph->ihl * 4; - if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0) + ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", - icmph.type, ntohs(icmp_id(&icmph)), + ic->type, ntohs(icmp_id(ic)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); /* @@ -898,33 +909,34 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ - if ((icmph.type != ICMP_DEST_UNREACH) && - (icmph.type != ICMP_SOURCE_QUENCH) && - (icmph.type != ICMP_TIME_EXCEEDED)) { + if ((ic->type != ICMP_DEST_UNREACH) && + (ic->type != ICMP_SOURCE_QUENCH) && + (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ - offset += sizeof(icmph); - if (skb_copy_bits(skb, offset, &ciph, sizeof(ciph)) < 0) + offset += sizeof(_icmph); + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(ciph.protocol); + pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ - if (unlikely(ciph.frag_off & __constant_htons(IP_OFFSET) && + if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); - offset += ciph.ihl * 4; + offset += cih->ihl * 4; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(skb, pp, cih, offset, 1); if (!cp) return NF_ACCEPT; @@ -941,7 +953,7 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); - if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol) + if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); /* do not touch skb anymore */ diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index caf24e3754c6..dfd0a7dd3b75 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -166,27 +166,33 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const char *msg) { char buf[128]; - __u16 ports[2]; - struct iphdr iph; + struct iphdr _iph, *ih; - if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0) + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); - else if (iph.frag_off & __constant_htons(IP_OFFSET)) + else if (ih->frag_off & __constant_htons(IP_OFFSET)) sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", - pp->name, NIPQUAD(iph.saddr), - NIPQUAD(iph.daddr)); - else if (skb_copy_bits(skb, offset + iph.ihl*4, ports, sizeof(ports)) < 0) - sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, - NIPQUAD(iph.saddr), - NIPQUAD(iph.daddr)); - else - sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u", - pp->name, - NIPQUAD(iph.saddr), - ntohs(ports[0]), - NIPQUAD(iph.daddr), - ntohs(ports[1])); + pp->name, NIPQUAD(ih->saddr), + NIPQUAD(ih->daddr)); + else { + __u16 _ports[2], *pptr +; + pptr = skb_header_pointer(skb, offset + ih->ihl*4, + sizeof(_ports), _ports); + if (pptr == NULL) + sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u", + pp->name, + NIPQUAD(ih->saddr), + NIPQUAD(ih->daddr)); + else + sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u", + pp->name, + NIPQUAD(ih->saddr), + ntohs(pptr[0]), + NIPQUAD(ih->daddr), + ntohs(pptr[1])); + } printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); } diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c index ffea536b09a8..453e94a0bbd7 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah.c @@ -129,14 +129,15 @@ ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg) { char buf[256]; - struct iphdr iph; + struct iphdr _iph, *ih; - if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0) + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(iph.saddr), - NIPQUAD(iph.daddr)); + pp->name, NIPQUAD(ih->saddr), + NIPQUAD(ih->daddr)); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); } diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c index 1922388327b8..478e5c7c7e8e 100644 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_esp.c @@ -128,14 +128,15 @@ esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg) { char buf[256]; - struct iphdr iph; + struct iphdr _iph, *ih; - if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0) + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(iph.saddr), - NIPQUAD(iph.daddr)); + pp->name, NIPQUAD(ih->saddr), + NIPQUAD(ih->daddr)); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); } diff --git a/net/ipv4/ipvs/ip_vs_proto_icmp.c b/net/ipv4/ipvs/ip_vs_proto_icmp.c index d611b5a36d48..747e0333f5de 100644 --- a/net/ipv4/ipvs/ip_vs_proto_icmp.c +++ b/net/ipv4/ipvs/ip_vs_proto_icmp.c @@ -104,24 +104,29 @@ icmp_debug_packet(struct ip_vs_protocol *pp, const char *msg) { char buf[256]; - struct iphdr iph; - struct icmphdr icmph; + struct iphdr _iph, *ih; - if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0) + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); - else if (iph.frag_off & __constant_htons(IP_OFFSET)) + else if (ih->frag_off & __constant_htons(IP_OFFSET)) sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", - pp->name, NIPQUAD(iph.saddr), - NIPQUAD(iph.daddr)); - else if (skb_copy_bits(skb, offset + iph.ihl*4, &icmph, sizeof(icmph)) < 0) - sprintf(buf, "%s TRUNCATED to %u bytes\n", - pp->name, skb->len - offset); - else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d", - pp->name, NIPQUAD(iph.saddr), - NIPQUAD(iph.daddr), - icmph.type, icmph.code); - + pp->name, NIPQUAD(ih->saddr), + NIPQUAD(ih->daddr)); + else { + struct icmphdr _icmph, *ic; + + ic = skb_header_pointer(skb, offset + ih->ihl*4, + sizeof(_icmph), &_icmph); + if (ic == NULL) + sprintf(buf, "%s TRUNCATED to %u bytes\n", + pp->name, skb->len - offset); + else + sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d", + pp->name, NIPQUAD(ih->saddr), + NIPQUAD(ih->daddr), + ic->type, ic->code); + } printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); } diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 2f00e914288a..bd8f898bfe19 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -29,19 +29,20 @@ static struct ip_vs_conn * tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { - __u16 ports[2]; + __u16 _ports[2], *pptr; - if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0) + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) return NULL; if (likely(!inverse)) { return ip_vs_conn_in_get(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1]); + iph->saddr, pptr[0], + iph->daddr, pptr[1]); } else { return ip_vs_conn_in_get(iph->protocol, - iph->daddr, ports[1], - iph->saddr, ports[0]); + iph->daddr, pptr[1], + iph->saddr, pptr[0]); } } @@ -49,19 +50,20 @@ static struct ip_vs_conn * tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { - __u16 ports[2]; + __u16 _ports[2], *pptr; - if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0) + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) return NULL; if (likely(!inverse)) { return ip_vs_conn_out_get(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1]); + iph->saddr, pptr[0], + iph->daddr, pptr[1]); } else { return ip_vs_conn_out_get(iph->protocol, - iph->daddr, ports[1], - iph->saddr, ports[0]); + iph->daddr, pptr[1], + iph->saddr, pptr[0]); } } @@ -72,16 +74,18 @@ tcp_conn_schedule(struct sk_buff *skb, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; - struct tcphdr tcph; + struct tcphdr _tcph, *th; - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) { + th = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_tcph), &_tcph); + if (th == NULL) { *verdict = NF_DROP; return 0; } - if (tcph.syn && + if (th->syn && (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol, - skb->nh.iph->daddr, tcph.dest))) { + skb->nh.iph->daddr, th->dest))) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -483,13 +487,15 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_protocol *pp) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) + th = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_tcph), &_tcph); + if (th == NULL) return 0; spin_lock(&cp->lock); - set_tcp_state(pp, cp, direction, &tcph); + set_tcp_state(pp, cp, direction, th); spin_unlock(&cp->lock); return 1; diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 81501c938605..443ec4578d40 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -26,19 +26,20 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; - __u16 ports[2]; + __u16 _ports[2], *pptr; - if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0) + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) return NULL; if (likely(!inverse)) { cp = ip_vs_conn_in_get(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1]); + iph->saddr, pptr[0], + iph->daddr, pptr[1]); } else { cp = ip_vs_conn_in_get(iph->protocol, - iph->daddr, ports[1], - iph->saddr, ports[0]); + iph->daddr, pptr[1], + iph->saddr, pptr[0]); } return cp; @@ -50,19 +51,21 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; - __u16 ports[2]; + __u16 _ports[2], *pptr; - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, ports, sizeof(ports)) < 0) + pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_ports), _ports); + if (pptr == NULL) return NULL; if (likely(!inverse)) { cp = ip_vs_conn_out_get(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1]); + iph->saddr, pptr[0], + iph->daddr, pptr[1]); } else { cp = ip_vs_conn_out_get(iph->protocol, - iph->daddr, ports[1], - iph->saddr, ports[0]); + iph->daddr, pptr[1], + iph->saddr, pptr[0]); } return cp; @@ -74,15 +77,17 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; - struct udphdr udph; + struct udphdr _udph, *uh; - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) { + uh = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_udph), &_udph); + if (uh == NULL) { *verdict = NF_DROP; return 0; } if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol, - skb->nh.iph->daddr, udph.dest))) { + skb->nh.iph->daddr, uh->dest))) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -230,13 +235,14 @@ udp_dnat_handler(struct sk_buff **pskb, static int udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) { - struct udphdr udph; + struct udphdr _udph, *uh; unsigned int udphoff = skb->nh.iph->ihl*4; - if (skb_copy_bits(skb, udphoff, &udph, sizeof(udph)) < 0) + uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); + if (uh == NULL) return 0; - if (udph.check != 0) { + if (uh->check != 0) { switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 204767be411e..3a85f7a8d02a 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -234,11 +234,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { - __u16 pt; - if (skb_copy_bits(skb, iph->ihl*4, &pt, sizeof(pt)) < 0) + __u16 _pt, *p; + p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); + if (p == NULL) goto tx_error; - ip_vs_conn_fill_cport(cp, pt); - IP_VS_DBG(10, "filled cport=%d\n", ntohs(pt)); + ip_vs_conn_fill_cport(cp, *p); + IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index fc741925911a..a54ef782f8b5 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -49,7 +49,7 @@ static int help(struct sk_buff *skb, { struct ip_conntrack_expect *exp; struct ip_ct_amanda_expect *exp_amanda_info; - char *data, *data_limit, *tmp; + char *amp, *data, *data_limit, *tmp; unsigned int dataoff, i; u_int16_t port, len; @@ -70,9 +70,11 @@ static int help(struct sk_buff *skb, } LOCK_BH(&amanda_buffer_lock); - skb_copy_bits(skb, dataoff, amanda_buffer, skb->len - dataoff); - data = amanda_buffer; - data_limit = amanda_buffer + skb->len - dataoff; + amp = skb_header_pointer(skb, dataoff, + skb->len - dataoff, amanda_buffer); + BUG_ON(amp == NULL); + data = amp; + data_limit = amp + skb->len - dataoff; *data_limit = '\0'; /* Search for the CONNECT string */ @@ -108,7 +110,7 @@ static int help(struct sk_buff *skb, exp->mask.dst.u.tcp.port = 0xFFFF; exp_amanda_info = &exp->help.exp_amanda_info; - exp_amanda_info->offset = tmp - amanda_buffer; + exp_amanda_info->offset = tmp - amp; exp_amanda_info->port = port; exp_amanda_info->len = len; diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index e4579a4b8fc7..c1403a0cc7d4 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -248,7 +248,8 @@ static int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo) { unsigned int dataoff, datalen; - struct tcphdr tcph; + struct tcphdr _tcph, *th; + char *fb_ptr; u_int32_t old_seq_aft_nl; int old_seq_aft_nl_set, ret; u_int32_t array[6] = { 0 }; @@ -268,10 +269,12 @@ static int help(struct sk_buff *skb, return NF_ACCEPT; } - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) != 0) + th = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_tcph), &_tcph); + if (th == NULL) return NF_ACCEPT; - dataoff = skb->nh.iph->ihl*4 + tcph.doff*4; + dataoff = skb->nh.iph->ihl*4 + th->doff*4; /* No data? */ if (dataoff >= skb->len) { DEBUGP("ftp: skblen = %u\n", skb->len); @@ -280,26 +283,28 @@ static int help(struct sk_buff *skb, datalen = skb->len - dataoff; LOCK_BH(&ip_ftp_lock); - skb_copy_bits(skb, dataoff, ftp_buffer, skb->len - dataoff); + fb_ptr = skb_header_pointer(skb, dataoff, + skb->len - dataoff, ftp_buffer); + BUG_ON(fb_ptr == NULL); old_seq_aft_nl_set = ct_ftp_info->seq_aft_nl_set[dir]; old_seq_aft_nl = ct_ftp_info->seq_aft_nl[dir]; DEBUGP("conntrack_ftp: datalen %u\n", datalen); - if (ftp_buffer[datalen - 1] == '\n') { + if (fb_ptr[datalen - 1] == '\n') { DEBUGP("conntrack_ftp: datalen %u ends in \\n\n", datalen); if (!old_seq_aft_nl_set - || after(ntohl(tcph.seq) + datalen, old_seq_aft_nl)) { + || after(ntohl(th->seq) + datalen, old_seq_aft_nl)) { DEBUGP("conntrack_ftp: updating nl to %u\n", - ntohl(tcph.seq) + datalen); + ntohl(th->seq) + datalen); ct_ftp_info->seq_aft_nl[dir] = - ntohl(tcph.seq) + datalen; + ntohl(th->seq) + datalen; ct_ftp_info->seq_aft_nl_set[dir] = 1; } } if(!old_seq_aft_nl_set || - (ntohl(tcph.seq) != old_seq_aft_nl)) { + (ntohl(th->seq) != old_seq_aft_nl)) { DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u)\n", old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl); ret = NF_ACCEPT; @@ -316,7 +321,7 @@ static int help(struct sk_buff *skb, for (i = 0; i < ARRAY_SIZE(search); i++) { if (search[i].dir != dir) continue; - found = find_pattern(ftp_buffer, skb->len - dataoff, + found = find_pattern(fb_ptr, skb->len - dataoff, search[i].pattern, search[i].plen, search[i].skip, @@ -334,7 +339,7 @@ static int help(struct sk_buff *skb, if (net_ratelimit()) printk("conntrack_ftp: partial %s %u+%u\n", search[i].pattern, - ntohl(tcph.seq), datalen); + ntohl(th->seq), datalen); ret = NF_DROP; goto out; } else if (found == 0) { /* No match */ @@ -344,7 +349,7 @@ static int help(struct sk_buff *skb, DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", (int)matchlen, data + matchoff, - matchlen, ntohl(tcph.seq) + matchoff); + matchlen, ntohl(th->seq) + matchoff); /* Allocate expectation which will be inserted */ exp = ip_conntrack_expect_alloc(); @@ -358,7 +363,7 @@ static int help(struct sk_buff *skb, /* Update the ftp info */ if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]) == ct->tuplehash[dir].tuple.src.ip) { - exp->seq = ntohl(tcph.seq) + matchoff; + exp->seq = ntohl(th->seq) + matchoff; exp_ftp_info->len = matchlen; exp_ftp_info->ftptype = search[i].ftptype; exp_ftp_info->port = array[4] << 8 | array[5]; diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index e06cb4ede710..0d0afe131e4e 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -102,8 +102,8 @@ static int help(struct sk_buff *skb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff; - struct tcphdr tcph; - char *data, *data_limit; + struct tcphdr _tcph, *th; + char *data, *data_limit, *ib_ptr; int dir = CTINFO2DIR(ctinfo); struct ip_conntrack_expect *exp; struct ip_ct_irc_expect *exp_irc_info = NULL; @@ -127,19 +127,23 @@ static int help(struct sk_buff *skb, } /* Not a full tcp header? */ - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) != 0) + th = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_tcph), &_tcph); + if (th == NULL) return NF_ACCEPT; /* No data? */ - dataoff = skb->nh.iph->ihl*4 + tcph.doff*4; + dataoff = skb->nh.iph->ihl*4 + th->doff*4; if (dataoff >= skb->len) return NF_ACCEPT; LOCK_BH(&ip_irc_lock); - skb_copy_bits(skb, dataoff, irc_buffer, skb->len - dataoff); + ib_ptr = skb_header_pointer(skb, dataoff, + skb->len - dataoff, irc_buffer); + BUG_ON(ib_ptr == NULL); - data = irc_buffer; - data_limit = irc_buffer + skb->len - dataoff; + data = ib_ptr; + data_limit = ib_ptr + skb->len - dataoff; /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ @@ -153,8 +157,8 @@ static int help(struct sk_buff *skb, /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n", - NIPQUAD(iph->saddr), ntohs(tcph.source), - NIPQUAD(iph->daddr), ntohs(tcph.dest)); + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest)); for (i = 0; i < ARRAY_SIZE(dccprotos); i++) { if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) { @@ -198,8 +202,8 @@ static int help(struct sk_buff *skb, /* save position of address in dcc string, * necessary for NAT */ - DEBUGP("tcph->seq = %u\n", tcph.seq); - exp->seq = ntohl(tcph.seq) + (addr_beg_p - irc_buffer); + DEBUGP("tcph->seq = %u\n", th->seq); + exp->seq = ntohl(th->seq) + (addr_beg_p - ib_ptr); exp_irc_info->len = (addr_end_p - addr_beg_p); exp_irc_info->port = dcc_port; DEBUGP("wrote info seq=%u (ofs=%u), len=%d\n", diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index b833a7089433..b2f0dee33f2a 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -31,14 +31,15 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct ip_conntrack_tuple *tuple) { - struct icmphdr hdr; + struct icmphdr _hdr, *hp; - if (skb_copy_bits(skb, dataoff, &hdr, sizeof(hdr)) != 0) + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) return 0; - tuple->dst.u.icmp.type = hdr.type; - tuple->src.u.icmp.id = hdr.un.echo.id; - tuple->dst.u.icmp.code = hdr.code; + tuple->dst.u.icmp.type = hp->type; + tuple->src.u.icmp.id = hp->un.echo.id; + tuple->dst.u.icmp.code = hp->code; return 1; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index a47351ba3e52..7fe8ae284e85 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -293,14 +293,15 @@ static int tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct ip_conntrack_tuple *tuple) { - struct tcphdr hdr; + struct tcphdr _hdr, *hp; /* Actually only need first 8 bytes. */ - if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0) + hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + if (hp == NULL) return 0; - tuple->src.u.tcp.port = hdr.source; - tuple->dst.u.tcp.port = hdr.dest; + tuple->src.u.tcp.port = hp->source; + tuple->dst.u.tcp.port = hp->dest; return 1; } @@ -385,12 +386,23 @@ static inline __u32 segment_seq_plus_len(__u32 seq, /* * Simplified tcp_parse_options routine from tcp_input.c */ -static void tcp_options(struct tcphdr *tcph, +static void tcp_options(struct sk_buff *skb, + struct iphdr *iph, + struct tcphdr *tcph, struct ip_ct_tcp_state *state) { - unsigned char *ptr = (unsigned char *)(tcph + 1); + unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; + unsigned char *ptr; int length = (tcph->doff*4) - sizeof(struct tcphdr); + if (!length) + return; + + ptr = skb_header_pointer(skb, + (iph->ihl * 4) + sizeof(struct tcphdr), + length, buff); + BUG_ON(ptr == NULL); + state->td_scale = state->flags = 0; @@ -533,7 +545,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); - tcp_options(tcph, sender); + tcp_options(skb, iph, tcph, sender); /* * RFC 1323: * Both sides must send the Window Scale option @@ -565,7 +577,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); - tcp_options(tcph, sender); + tcp_options(skb, iph, tcph, sender); } if (!(tcph->ack)) { @@ -760,12 +772,14 @@ static int tcp_error(struct sk_buff *skb, unsigned int hooknum) { struct iphdr *iph = skb->nh.iph; - struct tcphdr tcph; + struct tcphdr _tcph, *th; unsigned int tcplen = skb->len - iph->ihl * 4; u_int8_t tcpflags; /* Smaller that minimal TCP header? */ - if (skb_copy_bits(skb, iph->ihl * 4, &tcph, sizeof(tcph)) != 0) { + th = skb_header_pointer(skb, iph->ihl * 4, + sizeof(_tcph), &_tcph); + if (th == NULL) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, "ip_ct_tcp: short packet "); @@ -773,7 +787,7 @@ static int tcp_error(struct sk_buff *skb, } /* Not whole TCP header or malformed packet */ - if (tcph.doff*4 < sizeof(struct tcphdr) || tcplen < tcph.doff*4) { + if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, "ip_ct_tcp: truncated/malformed packet "); @@ -797,7 +811,7 @@ static int tcp_error(struct sk_buff *skb, } /* Check TCP flags. */ - tcpflags = (((u_int8_t *)&tcph)[13] & ~(TH_ECE|TH_CWR)); + tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, @@ -808,19 +822,6 @@ static int tcp_error(struct sk_buff *skb, return NF_ACCEPT; } -static inline void copy_whole_tcp_header(const struct sk_buff *skb, - unsigned char *buff) -{ - struct iphdr *iph = skb->nh.iph; - struct tcphdr *tcph = (struct tcphdr *)buff; - - /* tcp_error guarantees for us that the packet is not malformed */ - skb_copy_bits(skb, iph->ihl * 4, buff, sizeof(*tcph)); - skb_copy_bits(skb, iph->ihl * 4 + sizeof(*tcph), - buff + sizeof(*tcph), - tcph->doff * 4 - sizeof(*tcph)); -} - /* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct ip_conntrack *conntrack, const struct sk_buff *skb, @@ -829,17 +830,18 @@ static int tcp_packet(struct ip_conntrack *conntrack, enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; struct iphdr *iph = skb->nh.iph; - unsigned char buff[15 * 4]; - struct tcphdr *tcph = (struct tcphdr *)buff; + struct tcphdr *th, _tcph; unsigned long timeout; unsigned int index; - copy_whole_tcp_header(skb, buff); + th = skb_header_pointer(skb, iph->ihl * 4, + sizeof(_tcph), &_tcph); + BUG_ON(th == NULL); WRITE_LOCK(&tcp_lock); old_state = conntrack->proto.tcp.state; dir = CTINFO2DIR(ctinfo); - index = get_conntrack_index(tcph); + index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; switch (new_state) { @@ -848,7 +850,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, if (index == TCP_SYNACK_SET && conntrack->proto.tcp.last_index == TCP_SYN_SET && conntrack->proto.tcp.last_dir != dir - && after(ntohl(tcph->ack_seq), + && after(ntohl(th->ack_seq), conntrack->proto.tcp.last_seq)) { /* This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and @@ -868,7 +870,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, } conntrack->proto.tcp.last_index = index; conntrack->proto.tcp.last_dir = dir; - conntrack->proto.tcp.last_seq = ntohl(tcph->seq); + conntrack->proto.tcp.last_seq = ntohl(th->seq); WRITE_UNLOCK(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) @@ -878,7 +880,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, case TCP_CONNTRACK_MAX: /* Invalid packet */ DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", - dir, get_conntrack_index(tcph), + dir, get_conntrack_index(th), old_state); WRITE_UNLOCK(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) @@ -900,7 +902,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, if (index == TCP_RST_SET && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET - && after(ntohl(tcph->ack_seq), + && after(ntohl(th->ack_seq), conntrack->proto.tcp.last_seq)) { /* Ignore RST closing down invalid SYN we had let trough. */ @@ -917,7 +919,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, } if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, - skb, iph, tcph)) { + skb, iph, th)) { WRITE_UNLOCK(&tcp_lock); return -NF_ACCEPT; } @@ -929,10 +931,10 @@ static int tcp_packet(struct ip_conntrack *conntrack, DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", - NIPQUAD(iph->saddr), ntohs(tcph->source), - NIPQUAD(iph->daddr), ntohs(tcph->dest), - (tcph->syn ? 1 : 0), (tcph->ack ? 1 : 0), - (tcph->fin ? 1 : 0), (tcph->rst ? 1 : 0), + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest), + (th->syn ? 1 : 0), (th->ack ? 1 : 0), + (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); conntrack->proto.tcp.state = new_state; @@ -946,7 +948,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ - if (tcph->rst) { + if (th->rst) { if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); @@ -972,18 +974,19 @@ static int tcp_new(struct ip_conntrack *conntrack, { enum tcp_conntrack new_state; struct iphdr *iph = skb->nh.iph; - unsigned char buff[15 * 4]; - struct tcphdr *tcph = (struct tcphdr *)buff; + struct tcphdr *th, _tcph; #ifdef DEBUGP_VARS struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; #endif - copy_whole_tcp_header(skb, buff); - + th = skb_header_pointer(skb, iph->ihl * 4, + sizeof(_tcph), &_tcph); + BUG_ON(th == NULL); + /* Don't need lock here: this conntrack not in circulation yet */ new_state - = tcp_conntracks[0][get_conntrack_index(tcph)] + = tcp_conntracks[0][get_conntrack_index(th)] [TCP_CONNTRACK_NONE]; /* Invalid: delete conntrack */ @@ -995,15 +998,15 @@ static int tcp_new(struct ip_conntrack *conntrack, if (new_state == TCP_CONNTRACK_SYN_SENT) { /* SYN packet */ conntrack->proto.tcp.seen[0].td_end = - segment_seq_plus_len(ntohl(tcph->seq), skb->len, - iph, tcph); - conntrack->proto.tcp.seen[0].td_maxwin = ntohs(tcph->window); + segment_seq_plus_len(ntohl(th->seq), skb->len, + iph, th); + conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); if (conntrack->proto.tcp.seen[0].td_maxwin == 0) conntrack->proto.tcp.seen[0].td_maxwin = 1; conntrack->proto.tcp.seen[0].td_maxend = conntrack->proto.tcp.seen[0].td_end; - tcp_options(tcph, &conntrack->proto.tcp.seen[0]); + tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]); conntrack->proto.tcp.seen[1].flags = 0; conntrack->proto.tcp.seen[0].loose = conntrack->proto.tcp.seen[1].loose = 0; @@ -1017,9 +1020,9 @@ static int tcp_new(struct ip_conntrack *conntrack, * Let's try to use the data from the packet. */ conntrack->proto.tcp.seen[0].td_end = - segment_seq_plus_len(ntohl(tcph->seq), skb->len, - iph, tcph); - conntrack->proto.tcp.seen[0].td_maxwin = ntohs(tcph->window); + segment_seq_plus_len(ntohl(th->seq), skb->len, + iph, th); + conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); if (conntrack->proto.tcp.seen[0].td_maxwin == 0) conntrack->proto.tcp.seen[0].td_maxwin = 1; conntrack->proto.tcp.seen[0].td_maxend = @@ -1056,14 +1059,16 @@ static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp, const struct sk_buff *skb) { const struct iphdr *iph = skb->nh.iph; - struct tcphdr tcph; + struct tcphdr *th, _tcph; unsigned int datalen; - if (skb_copy_bits(skb, iph->ihl * 4, &tcph, sizeof(tcph)) != 0) + th = skb_header_pointer(skb, iph->ihl * 4, + sizeof(_tcph), &_tcph); + if (th == NULL) return 0; - datalen = skb->len - iph->ihl*4 - tcph.doff*4; + datalen = skb->len - iph->ihl*4 - th->doff*4; - return between(exp->seq, ntohl(tcph.seq), ntohl(tcph.seq) + datalen); + return between(exp->seq, ntohl(th->seq), ntohl(th->seq) + datalen); } struct ip_conntrack_protocol ip_conntrack_protocol_tcp = diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 85ea29203f7a..0fe9e9188fdf 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -23,14 +23,15 @@ static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct ip_conntrack_tuple *tuple) { - struct udphdr hdr; + struct udphdr _hdr, *hp; /* Actually only need first 8 bytes. */ - if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0) + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) return 0; - tuple->src.u.udp.port = hdr.source; - tuple->dst.u.udp.port = hdr.dest; + tuple->src.u.udp.port = hp->source; + tuple->dst.u.udp.port = hp->dest; return 1; } diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index 79a2b10070a5..d132a3c48d8d 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -42,14 +42,16 @@ static int tftp_help(struct sk_buff *skb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct tftphdr tftph; + struct tftphdr _tftph, *tfh; struct ip_conntrack_expect *exp; - if (skb_copy_bits(skb, skb->nh.iph->ihl * 4 + sizeof(struct udphdr), - &tftph, sizeof(tftph)) != 0) + tfh = skb_header_pointer(skb, + skb->nh.iph->ihl * 4 + sizeof(struct udphdr), + sizeof(_tftph), &_tftph); + if (tfh == NULL) return NF_ACCEPT; - switch (ntohs(tftph.opcode)) { + switch (ntohs(tfh->opcode)) { /* RRQ and WRQ works the same way */ case TFTP_OPCODE_READ: case TFTP_OPCODE_WRITE: diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 9765fd2d5cf1..62ef0d1f7554 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -99,11 +99,13 @@ ip_nat_fn(unsigned int hooknum, hash table yet). We must not let this through, in case we're doing NAT to the same network. */ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - struct icmphdr hdr; + struct icmphdr _hdr, *hp; - if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4, - &hdr, sizeof(hdr)) == 0 - && hdr.type == ICMP_REDIRECT) + hp = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_hdr), &_hdr); + if (hp != NULL && + hp->type == ICMP_REDIRECT) return NF_DROP; } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c index c94b999fc3e8..cacaab6f768c 100644 --- a/net/ipv4/netfilter/ip_nat_tftp.c +++ b/net/ipv4/netfilter/ip_nat_tftp.c @@ -60,7 +60,7 @@ tftp_nat_help(struct ip_conntrack *ct, struct sk_buff **pskb) { int dir = CTINFO2DIR(ctinfo); - struct tftphdr tftph; + struct tftphdr _tftph, *tfh; struct ip_conntrack_tuple repl; if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) @@ -72,11 +72,13 @@ tftp_nat_help(struct ip_conntrack *ct, return NF_ACCEPT; } - if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr), - &tftph, sizeof(tftph)) != 0) + tfh = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr), + sizeof(_tftph), &_tftph); + if (tfh == NULL) return NF_DROP; - switch (ntohs(tftph.opcode)) { + switch (ntohs(tfh->opcode)) { /* RRQ and WRQ works the same way */ case TFTP_OPCODE_READ: case TFTP_OPCODE_WRITE: @@ -109,9 +111,12 @@ tftp_nat_expected(struct sk_buff **pskb, #if 0 const struct ip_conntrack_tuple *repl = &master->tuplehash[IP_CT_DIR_REPLY].tuple; - struct udphdr udph; + struct udphdr _udph, *uh; - if (skb_copy_bits(*pskb,(*pskb)->nh.iph->ihl*4,&udph,sizeof(udph))!=0) + uh = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_udph), &_udph); + if (uh == NULL) return NF_DROP; #endif @@ -126,8 +131,8 @@ tftp_nat_expected(struct sk_buff **pskb, mr.range[0].min_ip = mr.range[0].max_ip = orig->dst.ip; DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u " "newsrc: %u.%u.%u.%u\n", - NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph.source), - NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph.dest), + NIPQUAD((*pskb)->nh.iph->saddr), ntohs(uh->source), + NIPQUAD((*pskb)->nh.iph->daddr), ntohs(uh->dest), NIPQUAD(orig->dst.ip)); } else { mr.range[0].min_ip = mr.range[0].max_ip = orig->src.ip; @@ -137,8 +142,8 @@ tftp_nat_expected(struct sk_buff **pskb, DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u " "newdst: %u.%u.%u.%u:%u\n", - NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph.source), - NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph.dest), + NIPQUAD((*pskb)->nh.iph->saddr), ntohs(uh->source), + NIPQUAD((*pskb)->nh.iph->daddr), ntohs(uh->dest), NIPQUAD(orig->src.ip), ntohs(orig->src.u.udp.port)); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 878d3bb329ea..bd2e13211a7f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1460,21 +1460,24 @@ tcp_find_option(u_int8_t option, int *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ - u_int8_t opt[60 - sizeof(struct tcphdr)]; + u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; unsigned int i; duprintf("tcp_match: finding option\n"); /* If we don't have the whole header, drop packet. */ - if (skb_copy_bits(skb, skb->nh.iph->ihl*4 + sizeof(struct tcphdr), - opt, optlen) < 0) { + BUG_ON(!optlen); + op = skb_header_pointer(skb, + skb->nh.iph->ihl*4 + sizeof(struct tcphdr), + optlen, _opt); + if (op == NULL) { *hotdrop = 1; return 0; } for (i = 0; i < optlen; ) { - if (opt[i] == option) return !invert; - if (opt[i] < 2) i++; - else i += opt[i+1]?:1; + if (op[i] == option) return !invert; + if (op[i] < 2) i++; + else i += op[i+1]?:1; } return invert; @@ -1488,7 +1491,7 @@ tcp_match(const struct sk_buff *skb, int offset, int *hotdrop) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; const struct ipt_tcp *tcpinfo = matchinfo; if (offset) { @@ -1508,7 +1511,9 @@ tcp_match(const struct sk_buff *skb, #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg)) - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) { + th = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_tcph), &_tcph); + if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil TCP offset=0 tinygram.\n"); @@ -1517,23 +1522,24 @@ tcp_match(const struct sk_buff *skb, } if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1], - ntohs(tcph.source), + ntohs(th->source), !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT))) return 0; if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], - ntohs(tcph.dest), + ntohs(th->dest), !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT))) return 0; - if (!FWINVTCP((((unsigned char *)&tcph)[13] & tcpinfo->flg_mask) + if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp, IPT_TCP_INV_FLAGS)) return 0; if (tcpinfo->option) { - if (tcph.doff * 4 < sizeof(tcph)) { + if (th->doff * 4 < sizeof(_tcph)) { *hotdrop = 1; return 0; } - if (!tcp_find_option(tcpinfo->option, skb, tcph.doff*4 - sizeof(tcph), + if (!tcp_find_option(tcpinfo->option, skb, + th->doff*4 - sizeof(_tcph), tcpinfo->invflags & IPT_TCP_INV_OPTION, hotdrop)) return 0; @@ -1566,14 +1572,16 @@ udp_match(const struct sk_buff *skb, int offset, int *hotdrop) { - struct udphdr udph; + struct udphdr _udph, *uh; const struct ipt_udp *udpinfo = matchinfo; /* Must not be a fragment. */ if (offset) return 0; - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) { + uh = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_udph), &_udph); + if (uh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil UDP tinygram.\n"); @@ -1582,10 +1590,10 @@ udp_match(const struct sk_buff *skb, } return port_match(udpinfo->spts[0], udpinfo->spts[1], - ntohs(udph.source), + ntohs(uh->source), !!(udpinfo->invflags & IPT_UDP_INV_SRCPT)) && port_match(udpinfo->dpts[0], udpinfo->dpts[1], - ntohs(udph.dest), + ntohs(uh->dest), !!(udpinfo->invflags & IPT_UDP_INV_DSTPT)); } @@ -1637,16 +1645,19 @@ icmp_match(const struct sk_buff *skb, int offset, int *hotdrop) { - struct icmphdr icmph; + struct icmphdr _icmph, *ic; const struct ipt_icmp *icmpinfo = matchinfo; /* Must not be a fragment. */ if (offset) return 0; - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph)) < 0){ + ic = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_icmph), &_icmph); + if (ic == NULL) { /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ + * can't. Hence, no choice but to drop. + */ duprintf("Dropping evil ICMP tinygram.\n"); *hotdrop = 1; return 0; @@ -1655,7 +1666,7 @@ icmp_match(const struct sk_buff *skb, return icmp_type_code_match(icmpinfo->type, icmpinfo->code[0], icmpinfo->code[1], - icmph.type, icmph.code, + ic->type, ic->code, !!(icmpinfo->invflags&IPT_ICMP_INV)); } diff --git a/net/ipv4/netfilter/ipchains_core.c b/net/ipv4/netfilter/ipchains_core.c index 97b5401ef1ad..1360222c2537 100644 --- a/net/ipv4/netfilter/ipchains_core.c +++ b/net/ipv4/netfilter/ipchains_core.c @@ -679,49 +679,53 @@ ip_fw_check(const char *rif, case IPPROTO_TCP: dprintf("TCP "); if (!offset) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; - if (skb_copy_bits(*pskb, - (*pskb)->nh.iph->ihl * 4, - &tcph, sizeof(tcph))) + th = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_tcph), &_tcph); + if (th == NULL) return FW_BLOCK; - src_port = ntohs(tcph.source); - dst_port = ntohs(tcph.dest); + src_port = ntohs(th->source); + dst_port = ntohs(th->dest); /* Connection initilisation can only * be made when the syn bit is set and * neither of the ack or reset is * set. */ - if (tcph.syn && !(tcph.ack || tcph.rst)) + if (th->syn && !(th->ack || th->rst)) tcpsyn = 1; } break; case IPPROTO_UDP: dprintf("UDP "); if (!offset) { - struct udphdr udph; + struct udphdr _udph, *uh; - if (skb_copy_bits(*pskb, - (*pskb)->nh.iph->ihl * 4, - &udph, sizeof(udph))) + uh = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_udph), &_udph); + if (uh == NULL) return FW_BLOCK; - src_port = ntohs(udph.source); - dst_port = ntohs(udph.dest); + src_port = ntohs(uh->source); + dst_port = ntohs(uh->dest); } break; case IPPROTO_ICMP: if (!offset) { - struct icmphdr icmph; + struct icmphdr _icmph, *ic; - if (skb_copy_bits(*pskb, - (*pskb)->nh.iph->ihl * 4, - &icmph, sizeof(icmph))) + ic = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_icmph), + &_icmph); + if (ic == NULL) return FW_BLOCK; - src_port = (__u16) icmph.type; - dst_port = (__u16) icmph.code; + src_port = (__u16) ic->type; + dst_port = (__u16) ic->code; } dprintf("ICMP "); break; diff --git a/net/ipv4/netfilter/ipfwadm_core.c b/net/ipv4/netfilter/ipfwadm_core.c index 424a9034fa27..c38a6887722d 100644 --- a/net/ipv4/netfilter/ipfwadm_core.c +++ b/net/ipv4/netfilter/ipfwadm_core.c @@ -410,20 +410,21 @@ int ip_fw_chk(struct sk_buff **pskb, dprintf1("TCP "); /* ports stay 0xFFFF if it is not the first fragment */ if (!offset) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; - if (skb_copy_bits(*pskb, - (*pskb)->nh.iph->ihl * 4, - &tcph, sizeof(tcph))) + th = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_tcph), &_tcph); + if (th == NULL) return FW_BLOCK; - src_port = ntohs(tcph.source); - dst_port = ntohs(tcph.dest); + src_port = ntohs(th->source); + dst_port = ntohs(th->dest); - if(!tcph.ack && !tcph.rst) + if(!th->ack && !th->rst) /* We do NOT have ACK, value TRUE */ notcpack = 1; - if(!tcph.syn || !notcpack) + if(!th->syn || !notcpack) /* We do NOT have SYN, value TRUE */ notcpsyn = 1; } @@ -433,29 +434,32 @@ int ip_fw_chk(struct sk_buff **pskb, dprintf1("UDP "); /* ports stay 0xFFFF if it is not the first fragment */ if (!offset) { - struct udphdr udph; + struct udphdr _udph, *uh; - if (skb_copy_bits(*pskb, - (*pskb)->nh.iph->ihl * 4, - &udph, sizeof(udph))) + uh = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_udph), &_udph); + if (uh == NULL) return FW_BLOCK; - src_port = ntohs(udph.source); - dst_port = ntohs(udph.dest); + src_port = ntohs(uh->source); + dst_port = ntohs(uh->dest); } prt = IP_FW_F_UDP; break; case IPPROTO_ICMP: /* icmp_type stays 255 if it is not the first fragment */ if (!offset) { - struct icmphdr icmph; + struct icmphdr _icmph, *ic; - if (skb_copy_bits(*pskb, - (*pskb)->nh.iph->ihl * 4, - &icmph, sizeof(icmph))) + ic = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_icmph), + &_icmph); + if (ic == NULL) return FW_BLOCK; - icmp_type = (__u16) icmph.type; + icmp_type = (__u16) ic->type; } dprintf2("ICMP:%d ", icmp_type); prt = IP_FW_F_ICMP; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 8ca402564f5e..120109cd294d 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -52,34 +52,39 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) static inline int set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; u_int16_t diffs[2]; /* Not enought header? */ - if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4, &tcph, sizeof(tcph)) - < 0) + th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, + sizeof(_tcph), &_tcph); + if (th == NULL) return 0; - diffs[0] = ((u_int16_t *)&tcph)[6]; + diffs[0] = ((u_int16_t *)th)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) - tcph.ece = einfo->proto.tcp.ece; + th->ece = einfo->proto.tcp.ece; if (einfo->operation & IPT_ECN_OP_SET_CWR) - tcph.cwr = einfo->proto.tcp.cwr; - diffs[1] = ((u_int16_t *)&tcph)[6]; + th->cwr = einfo->proto.tcp.cwr; + diffs[1] = ((u_int16_t *)&th)[6]; /* Only mangle if it's changed. */ if (diffs[0] != diffs[1]) { diffs[0] = diffs[0] ^ 0xFFFF; if (!skb_ip_make_writable(pskb, - (*pskb)->nh.iph->ihl*4+sizeof(tcph))) + (*pskb)->nh.iph->ihl*4+sizeof(_tcph))) return 0; + + if (th != &_tcph) + memcpy(&_tcph, th, sizeof(_tcph)); + if ((*pskb)->ip_summed != CHECKSUM_HW) - tcph.check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - tcph.check^0xFFFF)); + _tcph.check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + _tcph.check^0xFFFF)); memcpy((*pskb)->data + (*pskb)->nh.iph->ihl*4, - &tcph, sizeof(tcph)); + &_tcph, sizeof(_tcph)); if ((*pskb)->ip_summed == CHECKSUM_HW) if (skb_checksum_help(pskb, inward)) return 0; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 47e49ad8202b..2a3e3eb424e3 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -45,9 +45,10 @@ static void dump_packet(const struct ipt_log_info *info, const struct sk_buff *skb, unsigned int iphoff) { - struct iphdr iph; + struct iphdr _iph, *ih; - if (skb_copy_bits(skb, iphoff, &iph, sizeof(iph)) < 0) { + ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); + if (ih == NULL) { printk("TRUNCATED"); return; } @@ -56,32 +57,34 @@ static void dump_packet(const struct ipt_log_info *info, * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ", - NIPQUAD(iph.saddr), NIPQUAD(iph.daddr)); + NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", - ntohs(iph.tot_len), iph.tos & IPTOS_TOS_MASK, - iph.tos & IPTOS_PREC_MASK, iph.ttl, ntohs(iph.id)); + ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, + ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); /* Max length: 6 "CE DF MF " */ - if (ntohs(iph.frag_off) & IP_CE) + if (ntohs(ih->frag_off) & IP_CE) printk("CE "); - if (ntohs(iph.frag_off) & IP_DF) + if (ntohs(ih->frag_off) & IP_DF) printk("DF "); - if (ntohs(iph.frag_off) & IP_MF) + if (ntohs(ih->frag_off) & IP_MF) printk("MF "); /* Max length: 11 "FRAG:65535 " */ - if (ntohs(iph.frag_off) & IP_OFFSET) - printk("FRAG:%u ", ntohs(iph.frag_off) & IP_OFFSET); + if (ntohs(ih->frag_off) & IP_OFFSET) + printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); if ((info->logflags & IPT_LOG_IPOPT) - && iph.ihl * 4 > sizeof(struct iphdr)) { - unsigned char opt[4 * 15 - sizeof(struct iphdr)]; + && ih->ihl * 4 > sizeof(struct iphdr)) { + unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; unsigned int i, optsize; - optsize = iph.ihl * 4 - sizeof(struct iphdr); - if (skb_copy_bits(skb, iphoff+sizeof(iph), opt, optsize) < 0) { + optsize = ih->ihl * 4 - sizeof(struct iphdr); + op = skb_header_pointer(skb, iphoff+sizeof(_iph), + optsize, _opt); + if (op == NULL) { printk("TRUNCATED"); return; } @@ -89,67 +92,71 @@ static void dump_packet(const struct ipt_log_info *info, /* Max length: 127 "OPT (" 15*4*2chars ") " */ printk("OPT ("); for (i = 0; i < optsize; i++) - printk("%02X", opt[i]); + printk("%02X", op[i]); printk(") "); } - switch (iph.protocol) { + switch (ih->protocol) { case IPPROTO_TCP: { - struct tcphdr tcph; + struct tcphdr _tcph, *th; /* Max length: 10 "PROTO=TCP " */ printk("PROTO=TCP "); - if (ntohs(iph.frag_off) & IP_OFFSET) + if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (skb_copy_bits(skb, iphoff+iph.ihl*4, &tcph, sizeof(tcph)) - < 0) { + th = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_tcph), &_tcph); + if (th == NULL) { printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - iph.ihl*4); + skb->len - iphoff - ih->ihl*4); break; } /* Max length: 20 "SPT=65535 DPT=65535 " */ printk("SPT=%u DPT=%u ", - ntohs(tcph.source), ntohs(tcph.dest)); + ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ if (info->logflags & IPT_LOG_TCPSEQ) printk("SEQ=%u ACK=%u ", - ntohl(tcph.seq), ntohl(tcph.ack_seq)); + ntohl(th->seq), ntohl(th->ack_seq)); /* Max length: 13 "WINDOW=65535 " */ - printk("WINDOW=%u ", ntohs(tcph.window)); + printk("WINDOW=%u ", ntohs(th->window)); /* Max length: 9 "RES=0x3F " */ - printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(&tcph) & TCP_RESERVED_BITS) >> 22)); + printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (tcph.cwr) + if (th->cwr) printk("CWR "); - if (tcph.ece) + if (th->ece) printk("ECE "); - if (tcph.urg) + if (th->urg) printk("URG "); - if (tcph.ack) + if (th->ack) printk("ACK "); - if (tcph.psh) + if (th->psh) printk("PSH "); - if (tcph.rst) + if (th->rst) printk("RST "); - if (tcph.syn) + if (th->syn) printk("SYN "); - if (tcph.fin) + if (th->fin) printk("FIN "); /* Max length: 11 "URGP=65535 " */ - printk("URGP=%u ", ntohs(tcph.urg_ptr)); + printk("URGP=%u ", ntohs(th->urg_ptr)); if ((info->logflags & IPT_LOG_TCPOPT) - && tcph.doff * 4 > sizeof(struct tcphdr)) { - unsigned char opt[4 * 15 - sizeof(struct tcphdr)]; + && th->doff * 4 > sizeof(struct tcphdr)) { + unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; + unsigned char *op; unsigned int i, optsize; - optsize = tcph.doff * 4 - sizeof(struct tcphdr); - if (skb_copy_bits(skb, iphoff+iph.ihl*4 + sizeof(tcph), - opt, optsize) < 0) { + optsize = th->doff * 4 - sizeof(struct tcphdr); + op = skb_header_pointer(skb, + iphoff+ih->ihl*4+sizeof(_tcph), + optsize, _opt); + if (op == NULL) { printk("TRUNCATED"); return; } @@ -157,36 +164,37 @@ static void dump_packet(const struct ipt_log_info *info, /* Max length: 127 "OPT (" 15*4*2chars ") " */ printk("OPT ("); for (i = 0; i < optsize; i++) - printk("%02X", opt[i]); + printk("%02X", op[i]); printk(") "); } break; } case IPPROTO_UDP: { - struct udphdr udph; + struct udphdr _udph, *uh; /* Max length: 10 "PROTO=UDP " */ printk("PROTO=UDP "); - if (ntohs(iph.frag_off) & IP_OFFSET) + if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (skb_copy_bits(skb, iphoff+iph.ihl*4, &udph, sizeof(udph)) - < 0) { + uh = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_udph), &_udph); + if (uh == NULL) { printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - iph.ihl*4); + skb->len - iphoff - ih->ihl*4); break; } /* Max length: 20 "SPT=65535 DPT=65535 " */ printk("SPT=%u DPT=%u LEN=%u ", - ntohs(udph.source), ntohs(udph.dest), - ntohs(udph.len)); + ntohs(uh->source), ntohs(uh->dest), + ntohs(uh->len)); break; } case IPPROTO_ICMP: { - struct icmphdr icmph; + struct icmphdr _icmph, *ich; static size_t required_len[NR_ICMP_TYPES+1] = { [ICMP_ECHOREPLY] = 4, [ICMP_DEST_UNREACH] @@ -208,47 +216,48 @@ static void dump_packet(const struct ipt_log_info *info, /* Max length: 11 "PROTO=ICMP " */ printk("PROTO=ICMP "); - if (ntohs(iph.frag_off) & IP_OFFSET) + if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (skb_copy_bits(skb, iphoff+iph.ihl*4, &icmph, sizeof(icmph)) - < 0) { + ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_icmph), &_icmph); + if (ich == NULL) { printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - iph.ihl*4); + skb->len - iphoff - ih->ihl*4); break; } /* Max length: 18 "TYPE=255 CODE=255 " */ - printk("TYPE=%u CODE=%u ", icmph.type, icmph.code); + printk("TYPE=%u CODE=%u ", ich->type, ich->code); /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (icmph.type <= NR_ICMP_TYPES - && required_len[icmph.type] - && skb->len-iphoff-iph.ihl*4 < required_len[icmph.type]) { + if (ich->type <= NR_ICMP_TYPES + && required_len[ich->type] + && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - iph.ihl*4); + skb->len - iphoff - ih->ihl*4); break; } - switch (icmph.type) { + switch (ich->type) { case ICMP_ECHOREPLY: case ICMP_ECHO: /* Max length: 19 "ID=65535 SEQ=65535 " */ printk("ID=%u SEQ=%u ", - ntohs(icmph.un.echo.id), - ntohs(icmph.un.echo.sequence)); + ntohs(ich->un.echo.id), + ntohs(ich->un.echo.sequence)); break; case ICMP_PARAMETERPROB: /* Max length: 14 "PARAMETER=255 " */ printk("PARAMETER=%u ", - ntohl(icmph.un.gateway) >> 24); + ntohl(ich->un.gateway) >> 24); break; case ICMP_REDIRECT: /* Max length: 24 "GATEWAY=255.255.255.255 " */ printk("GATEWAY=%u.%u.%u.%u ", - NIPQUAD(icmph.un.gateway)); + NIPQUAD(ich->un.gateway)); /* Fall through */ case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: @@ -257,62 +266,65 @@ static void dump_packet(const struct ipt_log_info *info, if (!iphoff) { /* Only recurse once. */ printk("["); dump_packet(info, skb, - iphoff + iph.ihl*4+sizeof(icmph)); + iphoff + ih->ihl*4+sizeof(_icmph)); printk("] "); } /* Max length: 10 "MTU=65535 " */ - if (icmph.type == ICMP_DEST_UNREACH - && icmph.code == ICMP_FRAG_NEEDED) - printk("MTU=%u ", ntohs(icmph.un.frag.mtu)); + if (ich->type == ICMP_DEST_UNREACH + && ich->code == ICMP_FRAG_NEEDED) + printk("MTU=%u ", ntohs(ich->un.frag.mtu)); } break; } /* Max Length */ case IPPROTO_AH: { - struct ip_auth_hdr ah; + struct ip_auth_hdr _ahdr, *ah; - if (ntohs(iph.frag_off) & IP_OFFSET) + if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 9 "PROTO=AH " */ printk("PROTO=AH "); /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (skb_copy_bits(skb, iphoff+iph.ihl*4, &ah, sizeof(ah)) < 0) { + ah = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_ahdr), &_ahdr); + if (ah == NULL) { printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - iph.ihl*4); + skb->len - iphoff - ih->ihl*4); break; } /* Length: 15 "SPI=0xF1234567 " */ - printk("SPI=0x%x ", ntohl(ah.spi)); + printk("SPI=0x%x ", ntohl(ah->spi)); break; } case IPPROTO_ESP: { - struct ip_esp_hdr esph; + struct ip_esp_hdr _esph, *eh; /* Max length: 10 "PROTO=ESP " */ printk("PROTO=ESP "); - if (ntohs(iph.frag_off) & IP_OFFSET) + if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (skb_copy_bits(skb, iphoff+iph.ihl*4, &esph, sizeof(esph)) - < 0) { + eh = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_esph), &_esph); + if (eh == NULL) { printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - iph.ihl*4); + skb->len - iphoff - ih->ihl*4); break; } /* Length: 15 "SPI=0xF1234567 " */ - printk("SPI=0x%x ", ntohl(esph.spi)); + printk("SPI=0x%x ", ntohl(eh->spi)); break; } /* Max length: 10 "PROTO 255 " */ default: - printk("PROTO=%u ", iph.protocol); + printk("PROTO=%u ", ih->protocol); } /* Proto Max log string length */ diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b8018cb023ff..9637b75fd71e 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -103,7 +103,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, int hook) static void send_reset(struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; - struct tcphdr otcph, *tcph; + struct tcphdr _otcph, *oth, *tcph; struct rtable *rt; u_int16_t tmp_port; u_int32_t tmp_addr; @@ -114,12 +114,13 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) return; - if (skb_copy_bits(oldskb, oldskb->nh.iph->ihl*4, - &otcph, sizeof(otcph)) < 0) + oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4, + sizeof(_otcph), &_otcph); + if (oth == NULL) return; /* No RST for RST. */ - if (otcph.rst) + if (oth->rst) return; /* FIXME: Check checksum --RR */ @@ -167,13 +168,13 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (tcph->ack) { needs_ack = 0; - tcph->seq = otcph.ack_seq; + tcph->seq = oth->ack_seq; tcph->ack_seq = 0; } else { needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - oldskb->nh.iph->ihl*4 - - (otcph.doff<<2)); + - (oth->doff<<2)); tcph->seq = 0; } diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 1f0d7652f6dc..a0fea847cb72 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -43,23 +43,26 @@ match(const struct sk_buff *skb, int offset, int *hotdrop) { - struct ip_auth_hdr ah; + struct ip_auth_hdr _ahdr, *ah; const struct ipt_ah *ahinfo = matchinfo; /* Must not be a fragment. */ if (offset) return 0; - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &ah, sizeof(ah)) < 0) { + ah = skb_header_pointer(skb, skb->nh.iph->ihl * 4, + sizeof(_ahdr), &_ahdr); + if (ah == NULL) { /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ + * can't. Hence, no choice but to drop. + */ duprintf("Dropping evil AH tinygram.\n"); *hotdrop = 1; return 0; } return spi_match(ahinfo->spis[0], ahinfo->spis[1], - ntohl(ah.spi), + ntohl(ah->spi), !!(ahinfo->invflags & IPT_AH_INV_SPI)); } diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 0e1efd764fc6..b6f7181e89cc 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -30,31 +30,34 @@ static inline int match_tcp(const struct sk_buff *skb, const struct ipt_ecn_info *einfo, int *hotdrop) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; /* In practice, TCP match does this, so can't fail. But let's - be good citizens. */ - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) { + * be good citizens. + */ + th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, + sizeof(_tcph), &_tcph); + if (th == NULL) { *hotdrop = 0; return 0; } if (einfo->operation & IPT_ECN_OP_MATCH_ECE) { if (einfo->invert & IPT_ECN_OP_MATCH_ECE) { - if (tcph.ece == 1) + if (th->ece == 1) return 0; } else { - if (tcph.ece == 0) + if (th->ece == 0) return 0; } } if (einfo->operation & IPT_ECN_OP_MATCH_CWR) { if (einfo->invert & IPT_ECN_OP_MATCH_CWR) { - if (tcph.cwr == 1) + if (th->cwr == 1) return 0; } else { - if (tcph.cwr == 0) + if (th->cwr == 0) return 0; } } diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c index c3b889378844..e1d0dd31e117 100644 --- a/net/ipv4/netfilter/ipt_esp.c +++ b/net/ipv4/netfilter/ipt_esp.c @@ -44,23 +44,26 @@ match(const struct sk_buff *skb, int offset, int *hotdrop) { - struct ip_esp_hdr esp; + struct ip_esp_hdr _esp, *eh; const struct ipt_esp *espinfo = matchinfo; /* Must not be a fragment. */ if (offset) return 0; - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &esp, sizeof(esp)) < 0) { + eh = skb_header_pointer(skb, skb->nh.iph->ihl * 4, + sizeof(_esp), &_esp); + if (eh == NULL) { /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ + * can't. Hence, no choice but to drop. + */ duprintf("Dropping evil ESP tinygram.\n"); *hotdrop = 1; return 0; } return spi_match(espinfo->spis[0], espinfo->spis[1], - ntohl(esp.spi), + ntohl(eh->spi), !!(espinfo->invflags & IPT_ESP_INV_SPI)); } diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c index 64e7999b049c..7fdf41e22c86 100644 --- a/net/ipv4/netfilter/ipt_multiport.c +++ b/net/ipv4/netfilter/ipt_multiport.c @@ -54,7 +54,7 @@ match(const struct sk_buff *skb, int offset, int *hotdrop) { - u16 ports[2]; + u16 _ports[2], *pptr; const struct ipt_multiport *multiinfo = matchinfo; /* Must not be a fragment. */ @@ -63,18 +63,21 @@ match(const struct sk_buff *skb, /* Must be big enough to read ports (both UDP and TCP have them at the start). */ - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, ports, sizeof(ports)) < 0) { + pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4, + sizeof(_ports), &_ports[0]); + if (pptr == NULL) { /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ - duprintf("ipt_multiport:" - " Dropping evil offset=0 tinygram.\n"); - *hotdrop = 1; - return 0; + * can't. Hence, no choice but to drop. + */ + duprintf("ipt_multiport:" + " Dropping evil offset=0 tinygram.\n"); + *hotdrop = 1; + return 0; } return ports_match(multiinfo->ports, multiinfo->flags, multiinfo->count, - ntohs(ports[0]), ntohs(ports[1])); + ntohs(pptr[0]), ntohs(pptr[1])); } /* Called when user tries to insert an entry of this type. */ diff --git a/net/ipv4/netfilter/ipt_tcpmss.c b/net/ipv4/netfilter/ipt_tcpmss.c index c7cb62ade3f4..5cda547e011e 100644 --- a/net/ipv4/netfilter/ipt_tcpmss.c +++ b/net/ipv4/netfilter/ipt_tcpmss.c @@ -27,37 +27,45 @@ mssoption_match(u_int16_t min, u_int16_t max, int invert, int *hotdrop) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ - u8 opt[15 * 4 - sizeof(tcph)]; + u8 _opt[15 * 4 - sizeof(_tcph)], *op; unsigned int i, optlen; /* If we don't have the whole header, drop packet. */ - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) + th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, + sizeof(_tcph), &_tcph); + if (th == NULL) goto dropit; /* Malformed. */ - if (tcph.doff*4 < sizeof(tcph)) + if (th->doff*4 < sizeof(*th)) goto dropit; - optlen = tcph.doff*4 - sizeof(tcph); + optlen = th->doff*4 - sizeof(*th); + if (!optlen) + goto out; + /* Truncated options. */ - if (skb_copy_bits(skb, skb->nh.iph->ihl*4+sizeof(tcph), opt, optlen)<0) + op = skb_header_pointer(skb, skb->nh.iph->ihl * 4 + sizeof(*th), + optlen, _opt); + if (op == NULL) goto dropit; for (i = 0; i < optlen; ) { - if (opt[i] == TCPOPT_MSS + if (op[i] == TCPOPT_MSS && (optlen - i) >= TCPOLEN_MSS - && opt[i+1] == TCPOLEN_MSS) { + && op[i+1] == TCPOLEN_MSS) { u_int16_t mssval; - mssval = (opt[i+2] << 8) | opt[i+3]; + mssval = (op[i+2] << 8) | op[i+3]; return (mssval >= min && mssval <= max) ^ invert; } - if (opt[i] < 2) i++; - else i += opt[i+1]?:1; + if (op[i] < 2) i++; + else i += op[i+1]?:1; } +out: return invert; dropit: diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 07151a6c354d..6dda815c013f 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -68,34 +68,35 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len u8 nexthdr = *nexthdrp; while (ipv6_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr hdr; + struct ipv6_opt_hdr _hdr, *hp; int hdrlen; if (len < (int)sizeof(struct ipv6_opt_hdr)) return -1; if (nexthdr == NEXTHDR_NONE) return -1; - if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) BUG(); if (nexthdr == NEXTHDR_FRAGMENT) { - unsigned short frag_off; - if (skb_copy_bits(skb, - start+offsetof(struct frag_hdr, - frag_off), - &frag_off, - sizeof(frag_off))) { + unsigned short _frag_off, *fp; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) return -1; - } - if (ntohs(frag_off) & ~0x7) + if (ntohs(*fp) & ~0x7) break; hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hdr.hdrlen+2)<<2; + hdrlen = (hp->hdrlen+2)<<2; else - hdrlen = ipv6_optlen(&hdr); + hdrlen = ipv6_optlen(hp); - nexthdr = hdr.nexthdr; + nexthdr = hp->nexthdr; len -= hdrlen; start += hdrlen; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4faafff32e7b..10d780c17412 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -139,10 +139,12 @@ static int is_ineligible(struct sk_buff *skb) if (ptr < 0) return 0; if (nexthdr == IPPROTO_ICMPV6) { - u8 type; - if (skb_copy_bits(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), - &type, 1) - || !(type & ICMPV6_INFOMSG_MASK)) + u8 _type, *tp; + tp = skb_header_pointer(skb, + ptr+offsetof(struct icmp6hdr, icmp6_type), + sizeof(_type), &_type); + if (tp == NULL || + !(*tp & ICMPV6_INFOMSG_MASK)) return 1; } return 0; @@ -200,12 +202,13 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) { - u8 optval; + u8 _optval, *op; offset += skb->nh.raw - skb->data; - if (skb_copy_bits(skb, offset, &optval, 1)) + op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); + if (op == NULL) return 1; - return (optval&0xC0) == 0x80; + return (*op & 0xC0) == 0x80; } int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len) diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c index 6c87fcaeca6a..1f0d51a341f3 100644 --- a/net/rxrpc/call.c +++ b/net/rxrpc/call.c @@ -929,7 +929,6 @@ static void rxrpc_call_receive_packet(struct rxrpc_call *call) { struct rxrpc_message *msg; struct list_head *_p; - uint32_t data32; _enter("%p", call); @@ -986,22 +985,21 @@ static void rxrpc_call_receive_packet(struct rxrpc_call *call) break; /* deal with abort packets */ - case RXRPC_PACKET_TYPE_ABORT: - data32 = 0; - if (skb_copy_bits(msg->pkt, msg->offset, - &data32, sizeof(data32)) < 0) { + case RXRPC_PACKET_TYPE_ABORT: { + uint32_t _dbuf, *dp; + + dp = skb_header_pointer(msg->pkt, msg->offset, + sizeof(_dbuf), &_dbuf); + if (dp == NULL) printk("Rx Received short ABORT packet\n"); - } - else { - data32 = ntohl(data32); - } - _proto("Rx Received Call ABORT { data=%d }", data32); + _proto("Rx Received Call ABORT { data=%d }", + (dp ? ntohl(*dp) : 0)); spin_lock(&call->lock); call->app_call_state = RXRPC_CSTATE_ERROR; call->app_err_state = RXRPC_ESTATE_PEER_ABORT; - call->app_abort_code = data32; + call->app_abort_code = (dp ? ntohl(*dp) : 0); call->app_errno = -ECONNABORTED; call->app_mark = RXRPC_APP_MARK_EOF; call->app_read_buf = NULL; @@ -1013,7 +1011,7 @@ static void rxrpc_call_receive_packet(struct rxrpc_call *call) spin_unlock(&call->lock); call->app_error_func(call); break; - + } default: /* deal with other packet types */ _proto("Rx Unsupported packet type %u (#%u)", @@ -1271,7 +1269,7 @@ static void rxrpc_call_receive_data_packet(struct rxrpc_call *call, static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call, struct rxrpc_message *msg) { - struct rxrpc_ackpacket ack; + struct rxrpc_ackpacket _ack, *ap; rxrpc_serial_t serial; rxrpc_seq_t seq; int ret; @@ -1279,33 +1277,34 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call, _enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq); /* extract the basic ACK record */ - if (skb_copy_bits(msg->pkt, msg->offset, &ack, sizeof(ack)) < 0) { + ap = skb_header_pointer(msg->pkt, msg->offset, sizeof(_ack), &_ack); + if (ap == NULL) { printk("Rx Received short ACK packet\n"); return; } - msg->offset += sizeof(ack); + msg->offset += sizeof(_ack); - serial = ack.serial; - seq = ntohl(ack.firstPacket); + serial = ap->serial; + seq = ntohl(ap->firstPacket); _proto("Rx Received ACK %%%d { b=%hu m=%hu f=%u p=%u s=%u r=%s n=%u }", ntohl(msg->hdr.serial), - ntohs(ack.bufferSpace), - ntohs(ack.maxSkew), + ntohs(ap->bufferSpace), + ntohs(ap->maxSkew), seq, - ntohl(ack.previousPacket), + ntohl(ap->previousPacket), ntohl(serial), - rxrpc_acks[ack.reason], + rxrpc_acks[ap->reason], call->ackr.nAcks ); /* check the other side isn't ACK'ing a sequence number I haven't sent * yet */ - if (ack.nAcks > 0 && + if (ap->nAcks > 0 && (seq > call->snd_seq_count || - seq + ack.nAcks - 1 > call->snd_seq_count)) { + seq + ap->nAcks - 1 > call->snd_seq_count)) { printk("Received ACK (#%u-#%u) for unsent packet\n", - seq, seq + ack.nAcks - 1); + seq, seq + ap->nAcks - 1); rxrpc_call_abort(call, -EINVAL); _leave(""); return; @@ -1354,7 +1353,7 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call, } } - switch (ack.reason) { + switch (ap->reason) { /* deal with negative/positive acknowledgement of data * packets */ case RXRPC_ACK_REQUESTED: @@ -1366,14 +1365,14 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call, case RXRPC_ACK_OUT_OF_SEQUENCE: case RXRPC_ACK_EXCEEDS_WINDOW: call->snd_resend_cnt = 0; - ret = rxrpc_call_record_ACK(call, msg, seq, ack.nAcks); + ret = rxrpc_call_record_ACK(call, msg, seq, ap->nAcks); if (ret < 0) rxrpc_call_abort(call, ret); break; /* respond to ping packets immediately */ case RXRPC_ACK_PING: - rxrpc_call_generate_ACK(call, &msg->hdr, &ack); + rxrpc_call_generate_ACK(call, &msg->hdr, ap); break; /* only record RTT on ping response packets */ @@ -1386,7 +1385,7 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call, rttmsg = NULL; spin_lock(&call->lock); if (call->snd_ping && - call->snd_ping->hdr.serial == ack.serial) { + call->snd_ping->hdr.serial == ap->serial) { rttmsg = call->snd_ping; call->snd_ping = NULL; } @@ -1402,7 +1401,7 @@ static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call, break; default: - printk("Unsupported ACK reason %u\n", ack.reason); + printk("Unsupported ACK reason %u\n", ap->reason); break; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a7ab07fe5cc0..de4bccc1c25d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -753,7 +753,7 @@ udp_data_ready(struct sock *sk, int len) struct rpc_rqst *rovr; struct sk_buff *skb; int err, repsize, copied; - u32 xid; + u32 _xid, *xp; read_lock(&sk->sk_callback_lock); dprintk("RPC: udp_data_ready...\n"); @@ -777,12 +777,14 @@ udp_data_ready(struct sock *sk, int len) } /* Copy the XID from the skb... */ - if (skb_copy_bits(skb, sizeof(struct udphdr), &xid, sizeof(xid)) < 0) + xp = skb_header_pointer(skb, sizeof(struct udphdr), + sizeof(_xid), &_xid); + if (xp == NULL) goto dropit; /* Look up and lock the request corresponding to the given XID */ spin_lock(&xprt->sock_lock); - rovr = xprt_lookup_rqst(xprt, xid); + rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; task = rovr->rq_task; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 52fa3cfdfd5b..587d63bd6861 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2823,48 +2823,50 @@ static void selinux_task_to_inode(struct task_struct *p, static int selinux_parse_skb_ipv4(struct sk_buff *skb, struct avc_audit_data *ad) { int offset, ihlen, ret; - struct iphdr iph; + struct iphdr _iph, *ih; offset = skb->nh.raw - skb->data; - ret = skb_copy_bits(skb, offset, &iph, sizeof(iph)); - if (ret) + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) goto out; - ihlen = iph.ihl * 4; - if (ihlen < sizeof(iph)) + ihlen = ih->ihl * 4; + if (ihlen < sizeof(_iph)) goto out; - ad->u.net.v4info.saddr = iph.saddr; - ad->u.net.v4info.daddr = iph.daddr; + ad->u.net.v4info.saddr = ih->saddr; + ad->u.net.v4info.daddr = ih->daddr; - switch (iph.protocol) { + switch (ih->protocol) { case IPPROTO_TCP: { - struct tcphdr tcph; + struct tcphdr _tcph, *th; - if (ntohs(iph.frag_off) & IP_OFFSET) + if (ntohs(ih->frag_off) & IP_OFFSET) break; offset += ihlen; - if (skb_copy_bits(skb, offset, &tcph, sizeof(tcph)) < 0) + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) break; - ad->u.net.sport = tcph.source; - ad->u.net.dport = tcph.dest; + ad->u.net.sport = th->source; + ad->u.net.dport = th->dest; break; } case IPPROTO_UDP: { - struct udphdr udph; + struct udphdr _udph, *uh; - if (ntohs(iph.frag_off) & IP_OFFSET) + if (ntohs(ih->frag_off) & IP_OFFSET) break; offset += ihlen; - if (skb_copy_bits(skb, offset, &udph, sizeof(udph)) < 0) - break; + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) + break; - ad->u.net.sport = udph.source; - ad->u.net.dport = udph.dest; + ad->u.net.sport = uh->source; + ad->u.net.dport = uh->dest; break; } @@ -2882,18 +2884,18 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb, struct avc_audit_data *ad { u8 nexthdr; int ret, offset; - struct ipv6hdr ipv6h; + struct ipv6hdr _ipv6h, *ip6; offset = skb->nh.raw - skb->data; - ret = skb_copy_bits(skb, offset, &ipv6h, sizeof(ipv6h)); - if (ret) + ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); + if (ip6 == NULL) goto out; - ipv6_addr_copy(&ad->u.net.v6info.saddr, &ipv6h.saddr); - ipv6_addr_copy(&ad->u.net.v6info.daddr, &ipv6h.daddr); + ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr); + ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr); - nexthdr = ipv6h.nexthdr; - offset += sizeof(ipv6h); + nexthdr = ip6->nexthdr; + offset += sizeof(_ipv6h); offset = ipv6_skip_exthdr(skb, offset, &nexthdr, skb->tail - skb->head - offset); if (offset < 0) @@ -2901,24 +2903,26 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb, struct avc_audit_data *ad switch (nexthdr) { case IPPROTO_TCP: { - struct tcphdr tcph; + struct tcphdr _tcph, *th; - if (skb_copy_bits(skb, offset, &tcph, sizeof(tcph)) < 0) + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) break; - ad->u.net.sport = tcph.source; - ad->u.net.dport = tcph.dest; + ad->u.net.sport = th->source; + ad->u.net.dport = th->dest; break; } case IPPROTO_UDP: { - struct udphdr udph; + struct udphdr _udph, *uh; - if (skb_copy_bits(skb, offset, &udph, sizeof(udph)) < 0) + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) break; - ad->u.net.sport = udph.source; - ad->u.net.dport = udph.dest; + ad->u.net.sport = uh->source; + ad->u.net.dport = uh->dest; break; } -- cgit v1.2.3 From a9497b16168faa07d05e325682fa2e3ef97c15b0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 03:56:52 -0700 Subject: [TCP]: When fetching srtt from metrics, do not forget to set rtt_seq. Otherwise RTT will end up being wrong for a long time, because tcp_rtt_estimator() requires that every time srtt is set, rtt_seq is set too. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5e7f70f1c940..85643472b84d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -852,8 +852,10 @@ static void tcp_init_metrics(struct sock *sk) * to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ - if (dst_metric(dst, RTAX_RTT) > tp->srtt) + if (dst_metric(dst, RTAX_RTT) > tp->srtt) { tp->srtt = dst_metric(dst, RTAX_RTT); + tp->rtt_seq = tp->snd_nxt; + } if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) { tp->mdev = dst_metric(dst, RTAX_RTTVAR); tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); -- cgit v1.2.3 From 343320d2f080addb04e72675e93c6635b922b810 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 08:17:01 -0700 Subject: [IPV4/IPV6]: Fix direct user pointer deref in xfrm icmp changes. Signed-off-by: David S. Miller --- net/ipv4/raw.c | 8 ++++---- net/ipv6/raw.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6742e162a6f5..c17f8716ecdd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -326,8 +326,8 @@ error: static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) { struct iovec *iov; - u8 *type = NULL; - u8 *code = NULL; + u8 __user *type = NULL; + u8 __user *code = NULL; int probed = 0; int i; @@ -354,8 +354,8 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - fl->fl_icmp_type = *type; - fl->fl_icmp_code = *code; + get_user(fl->fl_icmp_type, type); + __get_user(fl->fl_icmp_code, code); probed = 1; } break; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index eb6480aea025..91fbcfee87ac 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -559,8 +559,8 @@ error: static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) { struct iovec *iov; - u8 *type = NULL; - u8 *code = NULL; + u8 __user *type = NULL; + u8 __user *code = NULL; int probed = 0; int i; @@ -587,8 +587,8 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - fl->fl_icmp_type = *type; - fl->fl_icmp_code = *code; + get_user(fl->fl_icmp_type, type); + __get_user(fl->fl_icmp_code, code); probed = 1; } break; -- cgit v1.2.3 From c4a88af99a8ba104a4795326c68f75a456f6a389 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Aug 2004 17:12:01 -0700 Subject: [IPSEC]: Use xfrm4_rcv in xfrm4_tunnel This is a trivial patch to use xfrm4_rcv in xfrm4_tunnel. It doesn't need the extra argument provided by xfrm4_rcv_encap. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/xfrm4_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 9f04c5706aac..ce9dbb92b7a7 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -68,7 +68,7 @@ static int ipip_rcv(struct sk_buff *skb) if (handler && handler->handler(skb) == 0) return 0; - return xfrm4_rcv_encap(skb, 0); + return xfrm4_rcv(skb); } static void ipip_err(struct sk_buff *skb, u32 info) -- cgit v1.2.3 From 4471649367b4104d18d5f679f6cb1c4065dd23b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Aug 2004 17:13:27 -0700 Subject: [IPSEC]: Modularise xfrm_tunnel. This patch allows the the user to build xfrm4_tunnel/xfrm6_tunnel as modules. This makes sense because they're only used by IPComp/IPIP/IP6Tunnel which are modules themselves. It also means that distros can cut down on there core kernel size when compiling with IPsec support. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 -- net/ipv4/Kconfig | 12 +++++++++++- net/ipv4/Makefile | 3 ++- net/ipv4/xfrm4_tunnel.c | 5 +++++ net/ipv6/Kconfig | 14 +++++++++++++- net/ipv6/Makefile | 3 ++- net/ipv6/xfrm6_policy.c | 2 -- net/ipv6/xfrm6_tunnel.c | 15 ++++++++++----- net/xfrm/xfrm_export.c | 2 -- 9 files changed, 43 insertions(+), 15 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a1924436eeb5..c0d653db64b2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -792,8 +792,6 @@ extern void xfrm4_state_init(void); extern void xfrm4_state_fini(void); extern void xfrm6_state_init(void); extern void xfrm6_state_fini(void); -extern void xfrm6_tunnel_init(void); -extern void xfrm6_tunnel_fini(void); extern int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *); extern struct xfrm_state *xfrm_state_alloc(void); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index de00c668c98c..b5a2588afca3 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -187,7 +187,7 @@ config IP_PNP_RARP config NET_IPIP tristate "IP: tunneling" depends on INET - select XFRM + select INET_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -351,6 +351,7 @@ config INET_ESP config INET_IPCOMP tristate "IP: IPComp transformation" select XFRM + select INET_TUNNEL select CRYPTO select CRYPTO_DEFLATE ---help--- @@ -359,5 +360,14 @@ config INET_IPCOMP If unsure, say Y. +config INET_TUNNEL + tristate "IP: tunnel transformation" + select XFRM + ---help--- + Support for generic IP tunnel transformation, which is required by + the IP tunneling module as well as tunnel mode IPComp. + + If unsure, say Y. + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 0fe409afa094..a7a7a35574d4 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -19,9 +19,10 @@ obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o +obj-$(CONFIG_INET_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ - xfrm4_tunnel.o xfrm4_output.o + xfrm4_output.o diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index ce9dbb92b7a7..a2f2e7a75709 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -43,6 +44,8 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler) return ret; } +EXPORT_SYMBOL(xfrm4_tunnel_register); + int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler) { int ret; @@ -60,6 +63,8 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler) return ret; } +EXPORT_SYMBOL(xfrm4_tunnel_deregister); + static int ipip_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler = ipip_handler; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 53f8e348d816..fd87a5a192da 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -48,6 +48,7 @@ config INET6_IPCOMP tristate "IPv6: IPComp transformation" depends on IPV6 select XFRM + select INET6_TUNNEL select CRYPTO select CRYPTO_DEFLATE ---help--- @@ -56,10 +57,21 @@ config INET6_IPCOMP If unsure, say Y. +config INET6_TUNNEL + tristate "IPv6: tunnel transformation" + depends on IPV6 + select XFRM + ---help--- + Support for generic IPv6-in-IPv6 tunnel transformation, which is + required by the IPv6-in-IPv6 tunneling module as well as tunnel mode + IPComp. + + If unsure, say Y. + config IPV6_TUNNEL tristate "IPv6: IPv6-in-IPv6 tunnel" depends on IPV6 - select XFRM + select INET6_TUNNEL ---help--- Support for IPv6-in-IPv6 tunnels described in RFC 2473. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index d9e309fe8490..b39e04940590 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -11,12 +11,13 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ ip6_flowlabel.o ipv6_syms.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ - xfrm6_tunnel.o xfrm6_output.o + xfrm6_output.o ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o +obj-$(CONFIG_INET6_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6c1cb74e6a6f..088e3cb6c65e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -287,12 +287,10 @@ void __init xfrm6_init(void) { xfrm6_policy_init(); xfrm6_state_init(); - xfrm6_tunnel_init(); } void __exit xfrm6_fini(void) { - xfrm6_tunnel_fini(); //xfrm6_input_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index a9736d2ea721..3b58accb6786 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -501,31 +501,32 @@ static struct inet6_protocol xfrm6_tunnel_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -void __init xfrm6_tunnel_init(void) +static int __init xfrm6_tunnel_init(void) { X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__); if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) { X6TPRINTK1(KERN_ERR "xfrm6_tunnel init: can't add xfrm type\n"); - return; + return -EAGAIN; } if (inet6_add_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0) { X6TPRINTK1(KERN_ERR "xfrm6_tunnel init(): can't add protocol\n"); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); - return; + return -EAGAIN; } if (xfrm6_tunnel_spi_init() < 0) { X6TPRINTK1(KERN_ERR "xfrm6_tunnel init: failed to initialize spi\n"); inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); - return; + return -EAGAIN; } + return 0; } -void __exit xfrm6_tunnel_fini(void) +static void __exit xfrm6_tunnel_fini(void) { X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__); @@ -537,3 +538,7 @@ void __exit xfrm6_tunnel_fini(void) X6TPRINTK1(KERN_ERR "xfrm6_tunnel close: can't remove xfrm type\n"); } + +module_init(xfrm6_tunnel_init); +module_exit(xfrm6_tunnel_fini); +MODULE_LICENSE("GPL"); diff --git a/net/xfrm/xfrm_export.c b/net/xfrm/xfrm_export.c index f72754953d29..9f335640d254 100644 --- a/net/xfrm/xfrm_export.c +++ b/net/xfrm/xfrm_export.c @@ -33,8 +33,6 @@ EXPORT_SYMBOL(secpath_dup); EXPORT_SYMBOL(xfrm_get_acqseq); EXPORT_SYMBOL(xfrm_parse_spi); EXPORT_SYMBOL(xfrm4_rcv); -EXPORT_SYMBOL(xfrm4_tunnel_register); -EXPORT_SYMBOL(xfrm4_tunnel_deregister); EXPORT_SYMBOL(xfrm_register_type); EXPORT_SYMBOL(xfrm_unregister_type); EXPORT_SYMBOL(xfrm_get_type); -- cgit v1.2.3 From 5b52e084d80500efb75fd09f6cac4025106153a5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Aug 2004 20:49:16 -0700 Subject: [NETFILTER]: Mark tcp_options skb arg as const. --- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 7fe8ae284e85..64c7538c4b18 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -386,7 +386,7 @@ static inline __u32 segment_seq_plus_len(__u32 seq, /* * Simplified tcp_parse_options routine from tcp_input.c */ -static void tcp_options(struct sk_buff *skb, +static void tcp_options(const struct sk_buff *skb, struct iphdr *iph, struct tcphdr *tcph, struct ip_ct_tcp_state *state) -- cgit v1.2.3 From 101c46b55bb75973b148acf8252f4b5c0de14641 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 19 Aug 2004 21:41:25 -0700 Subject: [VLAN]: __vlan_hwaccel_rx() needs to use dev_kfree_skb_any. Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 420767fcb3c9..529c401a9a86 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -152,7 +152,7 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, skb->real_dev = skb->dev; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { - kfree_skb(skb); + dev_kfree_skb_any(skb); /* Not NET_RX_DROP, this is not being dropped * due to congestion. -- cgit v1.2.3 From a7863093e34be8acc390a376a5bcf33db21943a9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 19 Aug 2004 23:54:23 -0700 Subject: [IPSEC]: Revert pskb change for x->type->output Since skb_checksum_help() has been moved to xfrm[46]_output(), we don't need the sk_buff ** argument in x->type->output anymore. This patch reverts it to a sk_buff *. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/ah4.c | 10 +++++----- net/ipv4/esp4.c | 32 ++++++++++++++++---------------- net/ipv4/ipcomp.c | 22 +++++++++++----------- net/ipv4/xfrm4_output.c | 3 +-- net/ipv4/xfrm4_tunnel.c | 3 +-- net/ipv6/ah6.c | 18 +++++++++--------- net/ipv6/esp6.c | 34 +++++++++++++++++----------------- net/ipv6/ipcomp6.c | 26 +++++++++++++------------- net/ipv6/xfrm6_output.c | 3 +-- net/ipv6/xfrm6_tunnel.c | 3 +-- 11 files changed, 76 insertions(+), 80 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c0d653db64b2..72600a92c58a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -216,7 +216,7 @@ struct xfrm_type void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); int (*post_input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); - int (*output)(struct sk_buff **pskb); + int (*output)(struct sk_buff *pskb); /* Estimate maximal size of result of transformation of a dgram */ u32 (*get_max_size)(struct xfrm_state *, int size); }; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index b345043f51dd..78cc13170e2c 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -53,10 +53,10 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) return 0; } -static int ah_output(struct sk_buff **pskb) +static int ah_output(struct sk_buff *skb) { int err; - struct dst_entry *dst = (*pskb)->dst; + struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; struct iphdr *iph, *top_iph; struct ip_auth_hdr *ah; @@ -66,7 +66,7 @@ static int ah_output(struct sk_buff **pskb) char buf[60]; } tmp_iph; - top_iph = (*pskb)->nh.iph; + top_iph = skb->nh.iph; iph = &tmp_iph.iph; iph->tos = top_iph->tos; @@ -85,7 +85,7 @@ static int ah_output(struct sk_buff **pskb) ah->nexthdr = top_iph->protocol; top_iph->tos = 0; - top_iph->tot_len = htons((*pskb)->len); + top_iph->tot_len = htons(skb->len); top_iph->frag_off = 0; top_iph->ttl = 0; top_iph->protocol = IPPROTO_AH; @@ -98,7 +98,7 @@ static int ah_output(struct sk_buff **pskb) ah->reserved = 0; ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); - ahp->icv(ahp, *pskb, ah->auth_data); + ahp->icv(ahp, skb, ah->auth_data); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index c8cd0c7de3ea..b06b8ec983c7 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -17,10 +17,10 @@ struct esp_decap_data { __u8 proto; }; -int esp_output(struct sk_buff **pskb) +int esp_output(struct sk_buff *skb) { int err; - struct dst_entry *dst = (*pskb)->dst; + struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; struct iphdr *top_iph; struct ip_esp_hdr *esph; @@ -33,13 +33,13 @@ int esp_output(struct sk_buff **pskb) int nfrags; /* Strip IP+ESP header. */ - __skb_pull(*pskb, (*pskb)->h.raw - (*pskb)->data); + __skb_pull(skb, skb->h.raw - skb->data); /* Now skb is pure payload to encrypt */ err = -ENOMEM; /* Round to block size */ - clen = (*pskb)->len; + clen = skb->len; esp = x->data; alen = esp->auth.icv_trunc_len; @@ -49,22 +49,22 @@ int esp_output(struct sk_buff **pskb) if (esp->conf.padlen) clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1); - if ((nfrags = skb_cow_data(*pskb, clen-(*pskb)->len+alen, &trailer)) < 0) + if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) goto error; /* Fill padding... */ do { int i; - for (i=0; ilen - 2; i++) + for (i=0; ilen - 2; i++) *(u8*)(trailer->tail + i) = i+1; } while (0); - *(u8*)(trailer->tail + clen-(*pskb)->len - 2) = (clen - (*pskb)->len)-2; - pskb_put(*pskb, trailer, clen - (*pskb)->len); + *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2; + pskb_put(skb, trailer, clen - skb->len); - __skb_push(*pskb, (*pskb)->data - (*pskb)->nh.raw); - top_iph = (*pskb)->nh.iph; - esph = (struct ip_esp_hdr *)((*pskb)->nh.raw + top_iph->ihl*4); - top_iph->tot_len = htons((*pskb)->len + alen); + __skb_push(skb, skb->data - skb->nh.raw); + top_iph = skb->nh.iph; + esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4); + top_iph->tot_len = htons(skb->len + alen); *(u8*)(trailer->tail - 1) = top_iph->protocol; /* this is non-NULL only with UDP Encapsulation */ @@ -76,7 +76,7 @@ int esp_output(struct sk_buff **pskb) uh = (struct udphdr *)esph; uh->source = encap->encap_sport; uh->dest = encap->encap_dport; - uh->len = htons((*pskb)->len + alen - top_iph->ihl*4); + uh->len = htons(skb->len + alen - top_iph->ihl*4); uh->check = 0; switch (encap->encap_type) { @@ -109,7 +109,7 @@ int esp_output(struct sk_buff **pskb) if (!sg) goto error; } - skb_to_sgvec(*pskb, sg, esph->enc_data+esp->conf.ivlen-(*pskb)->data, clen); + skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); crypto_cipher_encrypt(tfm, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); @@ -121,9 +121,9 @@ int esp_output(struct sk_buff **pskb) } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, *pskb, (u8*)esph-(*pskb)->data, + esp->auth.icv(esp, skb, (u8*)esph-skb->data, sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(*pskb, trailer, alen); + pskb_put(skb, trailer, alen); } ip_send_check(top_iph); diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index b3885885abf0..095028111e64 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -120,20 +120,20 @@ out: return err; } -static int ipcomp_output(struct sk_buff **pskb) +static int ipcomp_output(struct sk_buff *skb) { int err; - struct dst_entry *dst = (*pskb)->dst; + struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; struct iphdr *iph; struct ip_comp_hdr *ipch; struct ipcomp_data *ipcd = x->data; int hdr_len = 0; - iph = (*pskb)->nh.iph; - iph->tot_len = htons((*pskb)->len); + iph = skb->nh.iph; + iph->tot_len = htons(skb->len); hdr_len = iph->ihl * 4; - if (((*pskb)->len - hdr_len) < ipcd->threshold) { + if ((skb->len - hdr_len) < ipcd->threshold) { /* Don't bother compressing */ if (x->props.mode) { ip_send_check(iph); @@ -141,17 +141,17 @@ static int ipcomp_output(struct sk_buff **pskb) goto out_ok; } - if ((skb_is_nonlinear(*pskb) || skb_cloned(*pskb)) && - skb_linearize(*pskb, GFP_ATOMIC) != 0) { + if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && + skb_linearize(skb, GFP_ATOMIC) != 0) { err = -ENOMEM; goto error; } - err = ipcomp_compress(x, *pskb); + err = ipcomp_compress(x, skb); if (err) { if (err == -EMSGSIZE) { if (x->props.mode) { - iph = (*pskb)->nh.iph; + iph = skb->nh.iph; ip_send_check(iph); } goto out_ok; @@ -160,8 +160,8 @@ static int ipcomp_output(struct sk_buff **pskb) } /* Install ipcomp header, convert into ipcomp datagram. */ - iph = (*pskb)->nh.iph; - iph->tot_len = htons((*pskb)->len); + iph = skb->nh.iph; + iph->tot_len = htons(skb->len); ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4); ipch->nexthdr = iph->protocol; ipch->flags = 0; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 191cec718e95..1bf740e71c64 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -119,8 +119,7 @@ int xfrm4_output(struct sk_buff **pskb) xfrm4_encap(skb); - err = x->type->output(pskb); - skb = *pskb; + err = x->type->output(skb); if (err) goto error; diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index a2f2e7a75709..dcc04644ccec 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -9,9 +9,8 @@ #include #include -static int ipip_output(struct sk_buff **pskb) +static int ipip_output(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct iphdr *iph; iph = skb->nh.iph; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 28bac499f839..4bb24b7afec2 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -154,11 +154,11 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) return 0; } -int ah6_output(struct sk_buff **pskb) +int ah6_output(struct sk_buff *skb) { int err; int extlen; - struct dst_entry *dst = (*pskb)->dst; + struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; struct ipv6hdr *top_iph; struct ip_auth_hdr *ah; @@ -170,11 +170,11 @@ int ah6_output(struct sk_buff **pskb) char hdrs[0]; } *tmp_ext; - top_iph = (struct ipv6hdr *)(*pskb)->data; - top_iph->payload_len = htons((*pskb)->len - sizeof(*top_iph)); + top_iph = (struct ipv6hdr *)skb->data; + top_iph->payload_len = htons(skb->len - sizeof(*top_iph)); - nexthdr = *(*pskb)->nh.raw; - *(*pskb)->nh.raw = IPPROTO_AH; + nexthdr = *skb->nh.raw; + *skb->nh.raw = IPPROTO_AH; /* When there are no extension headers, we only need to save the first * 8 bytes of the base IP header. @@ -182,7 +182,7 @@ int ah6_output(struct sk_buff **pskb) memcpy(tmp_base, top_iph, sizeof(tmp_base)); tmp_ext = NULL; - extlen = (*pskb)->h.raw - (unsigned char *)(top_iph + 1); + extlen = skb->h.raw - (unsigned char *)(top_iph + 1); if (extlen) { extlen += sizeof(*tmp_ext); tmp_ext = kmalloc(extlen, GFP_ATOMIC); @@ -198,7 +198,7 @@ int ah6_output(struct sk_buff **pskb) goto error_free_iph; } - ah = (struct ip_auth_hdr *)(*pskb)->h.raw; + ah = (struct ip_auth_hdr *)skb->h.raw; ah->nexthdr = nexthdr; top_iph->priority = 0; @@ -214,7 +214,7 @@ int ah6_output(struct sk_buff **pskb) ah->reserved = 0; ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); - ahp->icv(ahp, *pskb, ah->auth_data); + ahp->icv(ahp, skb, ah->auth_data); err = 0; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index eb94426df27e..e40e569f419d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -37,11 +37,11 @@ #include #include -int esp6_output(struct sk_buff **pskb) +int esp6_output(struct sk_buff *skb) { int err; int hdr_len; - struct dst_entry *dst = (*pskb)->dst; + struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; struct ipv6hdr *top_iph; struct ipv6_esp_hdr *esph; @@ -54,17 +54,17 @@ int esp6_output(struct sk_buff **pskb) int nfrags; esp = x->data; - hdr_len = (*pskb)->h.raw - (*pskb)->data + + hdr_len = skb->h.raw - skb->data + sizeof(*esph) + esp->conf.ivlen; /* Strip IP+ESP header. */ - __skb_pull(*pskb, hdr_len); + __skb_pull(skb, hdr_len); /* Now skb is pure payload to encrypt */ err = -ENOMEM; /* Round to block size */ - clen = (*pskb)->len; + clen = skb->len; alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; @@ -73,24 +73,24 @@ int esp6_output(struct sk_buff **pskb) if (esp->conf.padlen) clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1); - if ((nfrags = skb_cow_data(*pskb, clen-(*pskb)->len+alen, &trailer)) < 0) { + if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) { goto error; } /* Fill padding... */ do { int i; - for (i=0; ilen - 2; i++) + for (i=0; ilen - 2; i++) *(u8*)(trailer->tail + i) = i+1; } while (0); - *(u8*)(trailer->tail + clen-(*pskb)->len - 2) = (clen - (*pskb)->len)-2; - pskb_put(*pskb, trailer, clen - (*pskb)->len); + *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2; + pskb_put(skb, trailer, clen - skb->len); - top_iph = (struct ipv6hdr *)__skb_push(*pskb, hdr_len); - esph = (struct ipv6_esp_hdr *)(*pskb)->h.raw; - top_iph->payload_len = htons((*pskb)->len + alen - sizeof(*top_iph)); - *(u8*)(trailer->tail - 1) = *(*pskb)->nh.raw; - *(*pskb)->nh.raw = IPPROTO_ESP; + top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len); + esph = (struct ipv6_esp_hdr *)skb->h.raw; + top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph)); + *(u8*)(trailer->tail - 1) = *skb->nh.raw; + *skb->nh.raw = IPPROTO_ESP; esph->spi = x->id.spi; esph->seq_no = htonl(++x->replay.oseq); @@ -106,7 +106,7 @@ int esp6_output(struct sk_buff **pskb) if (!sg) goto error; } - skb_to_sgvec(*pskb, sg, esph->enc_data+esp->conf.ivlen-(*pskb)->data, clen); + skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); crypto_cipher_encrypt(tfm, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); @@ -118,9 +118,9 @@ int esp6_output(struct sk_buff **pskb) } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, *pskb, (u8*)esph-(*pskb)->data, + esp->auth.icv(esp, skb, (u8*)esph-skb->data, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(*pskb, trailer, alen); + pskb_put(skb, trailer, alen); } err = 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index ee62dba1b3ca..8f5296e3f9d0 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -114,10 +114,10 @@ error_out: return err; } -static int ipcomp6_output(struct sk_buff **pskb) +static int ipcomp6_output(struct sk_buff *skb) { int err; - struct dst_entry *dst = (*pskb)->dst; + struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; struct ipv6hdr *top_iph; int hdr_len; @@ -126,23 +126,23 @@ static int ipcomp6_output(struct sk_buff **pskb) int plen, dlen; u8 *start, *scratch = ipcd->scratch; - hdr_len = (*pskb)->h.raw - (*pskb)->data; + hdr_len = skb->h.raw - skb->data; /* check whether datagram len is larger than threshold */ - if (((*pskb)->len - hdr_len) < ipcd->threshold) { + if ((skb->len - hdr_len) < ipcd->threshold) { goto out_ok; } - if ((skb_is_nonlinear(*pskb) || skb_cloned(*pskb)) && - skb_linearize(*pskb, GFP_ATOMIC) != 0) { + if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && + skb_linearize(skb, GFP_ATOMIC) != 0) { err = -ENOMEM; goto error; } /* compression */ - plen = (*pskb)->len - hdr_len; + plen = skb->len - hdr_len; dlen = IPCOMP_SCRATCH_SIZE; - start = (*pskb)->h.raw; + start = skb->h.raw; err = crypto_comp_compress(ipcd->tfm, start, plen, scratch, &dlen); if (err) { @@ -152,18 +152,18 @@ static int ipcomp6_output(struct sk_buff **pskb) goto out_ok; } memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - pskb_trim(*pskb, hdr_len + dlen + sizeof(struct ip_comp_hdr)); + pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr)); /* insert ipcomp header and replace datagram */ - top_iph = (struct ipv6hdr *)(*pskb)->data; + top_iph = (struct ipv6hdr *)skb->data; - top_iph->payload_len = htons((*pskb)->len - sizeof(struct ipv6hdr)); + top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); ipch = (struct ipv6_comp_hdr *)start; - ipch->nexthdr = *(*pskb)->nh.raw; + ipch->nexthdr = *skb->nh.raw; ipch->flags = 0; ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *(*pskb)->nh.raw = IPPROTO_COMP; + *skb->nh.raw = IPPROTO_COMP; out_ok: err = 0; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 712856f0f356..786de7d912bb 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -113,8 +113,7 @@ int xfrm6_output(struct sk_buff **pskb) xfrm6_encap(skb); - err = x->type->output(pskb); - skb = *pskb; + err = x->type->output(skb); if (err) goto error; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 3b58accb6786..9616a63cc431 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -343,9 +343,8 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) EXPORT_SYMBOL(xfrm6_tunnel_free_spi); -static int xfrm6_tunnel_output(struct sk_buff **pskb) +static int xfrm6_tunnel_output(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct ipv6hdr *top_iph; top_iph = (struct ipv6hdr *)skb->data; -- cgit v1.2.3 From 0b9d4093354061a85f2e4d060f5129b3eed59d81 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Thu, 19 Aug 2004 23:55:23 -0700 Subject: [IPV6]: Use offsetof(). Signed-off-by: Hideaki YOSHIFUJI Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 169506ba0c40..ff6bd80f7b1b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -515,7 +515,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, int err = -ENOMEM; fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), - rt->rt6i_dst.plen, (u8*) &rt->rt6i_dst - (u8*) rt); + rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst)); if (fn == NULL) goto out; @@ -551,7 +551,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, sn = fib6_add_1(sfn, &rt->rt6i_src.addr, sizeof(struct in6_addr), rt->rt6i_src.plen, - (u8*) &rt->rt6i_src - (u8*) rt); + offsetof(struct rt6_info, rt6i_src)); if (sn == NULL) { /* If it is failed, discard just allocated @@ -572,7 +572,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, } else { sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, sizeof(struct in6_addr), rt->rt6i_src.plen, - (u8*) &rt->rt6i_src - (u8*) rt); + offsetof(struct rt6_info, rt6i_src)); if (sn == NULL) goto st_failure; @@ -681,14 +681,13 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, struct in6_addr *saddr) { struct lookup_args args[2]; - struct rt6_info *rt = NULL; struct fib6_node *fn; - args[0].offset = (u8*) &rt->rt6i_dst - (u8*) rt; + args[0].offset = offsetof(struct rt6_info, rt6i_dst); args[0].addr = daddr; #ifdef CONFIG_IPV6_SUBTREES - args[1].offset = (u8*) &rt->rt6i_src - (u8*) rt; + args[1].offset = offsetof(struct rt6_info, rt6i_src); args[1].addr = saddr; #endif @@ -740,11 +739,10 @@ struct fib6_node * fib6_locate(struct fib6_node *root, struct in6_addr *daddr, int dst_len, struct in6_addr *saddr, int src_len) { - struct rt6_info *rt = NULL; struct fib6_node *fn; fn = fib6_locate_1(root, daddr, dst_len, - (u8*) &rt->rt6i_dst - (u8*) rt); + offsetof(struct rt6_info, rt6i_dst)); #ifdef CONFIG_IPV6_SUBTREES if (src_len) { @@ -753,7 +751,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root, fn = fn->subtree; if (fn) fn = fib6_locate_1(fn, saddr, src_len, - (u8*) &rt->rt6i_src - (u8*) rt); + offsetof(struct rt6_info, rt6i_src)); } #endif -- cgit v1.2.3 From 38e36e2f3c805ac3c8e0ba61734c929bf834fba8 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Thu, 19 Aug 2004 23:56:10 -0700 Subject: [IPV6]: Improve readability in ip6_flowlabel.c Signed-off-by: Hideaki YOSHIFUJI Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 2c5aab0894d2..15f341adc74c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -538,7 +538,8 @@ release: /* Do not check for fault */ if (!freq.flr_label) - copy_to_user(optval + ((u8*)&freq.flr_label - (u8*)&freq), &fl->label, sizeof(fl->label)); + copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, + &fl->label, sizeof(fl->label)); sfl1->fl = fl; sfl1->next = np->ipv6_fl_list; -- cgit v1.2.3 From ad520964407948a916ebd0ada2d8707b9108b555 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 21 Aug 2004 08:00:27 -0700 Subject: [XFRM]: Mark some functions/data static. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 -- net/ipv4/ah4.c | 4 ++-- net/ipv4/esp4.c | 12 ++++++------ net/ipv4/xfrm4_policy.c | 12 ++++++------ net/ipv6/ah6.c | 10 +++++----- net/ipv6/esp6.c | 14 +++++++------- net/ipv6/xfrm6_policy.c | 14 +++++++------- 7 files changed, 33 insertions(+), 35 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 72600a92c58a..756c2016e4a1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -848,8 +848,6 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig #endif void xfrm_policy_init(void); -void xfrm4_policy_init(void); -void xfrm6_policy_init(void); struct xfrm_policy *xfrm_policy_alloc(int gfp); extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 78cc13170e2c..970fe58b4880 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -116,7 +116,7 @@ error: return err; } -int ah_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) +static int ah_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) { int ah_hlen; struct iphdr *iph; @@ -184,7 +184,7 @@ out: return -EINVAL; } -void ah4_err(struct sk_buff *skb, u32 info) +static void ah4_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr*)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b06b8ec983c7..27d8f808bad4 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -17,7 +17,7 @@ struct esp_decap_data { __u8 proto; }; -int esp_output(struct sk_buff *skb) +static int esp_output(struct sk_buff *skb) { int err; struct dst_entry *dst = skb->dst; @@ -139,7 +139,7 @@ error: * expensive, so we only support truncated data, which is the recommended * and common case. */ -int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) +static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) { struct iphdr *iph; struct ip_esp_hdr *esph; @@ -246,7 +246,7 @@ out: return -EINVAL; } -int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) +static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) { if (x->encap) { @@ -320,7 +320,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) return mtu + x->props.header_len + esp->auth.icv_trunc_len; } -void esp4_err(struct sk_buff *skb, u32 info) +static void esp4_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr*)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); @@ -338,7 +338,7 @@ void esp4_err(struct sk_buff *skb, u32 info) xfrm_state_put(x); } -void esp_destroy(struct xfrm_state *x) +static void esp_destroy(struct xfrm_state *x) { struct esp_data *esp = x->data; @@ -364,7 +364,7 @@ void esp_destroy(struct xfrm_state *x) kfree(esp); } -int esp_init_state(struct xfrm_state *x, void *args) +static int esp_init_state(struct xfrm_state *x, void *args) { struct esp_data *esp = NULL; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index edd813dade15..3ce69883bcc4 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -12,8 +12,8 @@ #include #include -extern struct dst_ops xfrm4_dst_ops; -extern struct xfrm_policy_afinfo xfrm4_policy_afinfo; +static struct dst_ops xfrm4_dst_ops; +static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct xfrm_type_map xfrm4_type_map = { .lock = RW_LOCK_UNLOCKED }; @@ -243,7 +243,7 @@ static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) path->ops->update_pmtu(path, mtu); } -struct dst_ops xfrm4_dst_ops = { +static struct dst_ops xfrm4_dst_ops = { .family = AF_INET, .protocol = __constant_htons(ETH_P_IP), .gc = xfrm4_garbage_collect, @@ -252,7 +252,7 @@ struct dst_ops xfrm4_dst_ops = { .entry_size = sizeof(struct xfrm_dst), }; -struct xfrm_policy_afinfo xfrm4_policy_afinfo = { +static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, .lock = RW_LOCK_UNLOCKED, .type_map = &xfrm4_type_map, @@ -263,12 +263,12 @@ struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .decode_session = _decode_session4, }; -void __init xfrm4_policy_init(void) +static void __init xfrm4_policy_init(void) { xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); } -void __exit xfrm4_policy_fini(void) +static void __exit xfrm4_policy_fini(void) { xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 4bb24b7afec2..32ecedb04abd 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -154,7 +154,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) return 0; } -int ah6_output(struct sk_buff *skb) +static int ah6_output(struct sk_buff *skb) { int err; int extlen; @@ -229,7 +229,7 @@ error: return err; } -int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) +static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) { /* * Before process AH @@ -319,8 +319,8 @@ out: return -EINVAL; } -void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) +static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + int type, int code, int offset, __u32 info) { struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); @@ -448,7 +448,7 @@ static struct inet6_protocol ah6_protocol = { .flags = INET6_PROTO_NOPOLICY, }; -int __init ah6_init(void) +static int __init ah6_init(void) { if (xfrm_register_type(&ah6_type, AF_INET6) < 0) { printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n"); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e40e569f419d..77b4ba6f8016 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -37,7 +37,7 @@ #include #include -int esp6_output(struct sk_buff *skb) +static int esp6_output(struct sk_buff *skb) { int err; int hdr_len; @@ -129,7 +129,7 @@ error: return err; } -int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) +static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) { struct ipv6hdr *iph; struct ipv6_esp_hdr *esph; @@ -252,8 +252,8 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) return mtu + x->props.header_len + esp->auth.icv_full_len; } -void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) +static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + int type, int code, int offset, __u32 info) { struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset); @@ -272,7 +272,7 @@ void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, xfrm_state_put(x); } -void esp6_destroy(struct xfrm_state *x) +static void esp6_destroy(struct xfrm_state *x) { struct esp_data *esp = x->data; @@ -298,7 +298,7 @@ void esp6_destroy(struct xfrm_state *x) kfree(esp); } -int esp6_init_state(struct xfrm_state *x, void *args) +static int esp6_init_state(struct xfrm_state *x, void *args) { struct esp_data *esp = NULL; @@ -402,7 +402,7 @@ static struct inet6_protocol esp6_protocol = { .flags = INET6_PROTO_NOPOLICY, }; -int __init esp6_init(void) +static int __init esp6_init(void) { if (xfrm_register_type(&esp6_type, AF_INET6) < 0) { printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n"); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 088e3cb6c65e..a0715e2f05d7 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -17,12 +17,12 @@ #include #include -extern struct dst_ops xfrm6_dst_ops; -extern struct xfrm_policy_afinfo xfrm6_policy_afinfo; +static struct dst_ops xfrm6_dst_ops; +static struct xfrm_policy_afinfo xfrm6_policy_afinfo; static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED }; -int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) +static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) { int err = 0; *dst = (struct xfrm_dst*)ip6_route_output(NULL, fl); @@ -253,7 +253,7 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) return; } -struct dst_ops xfrm6_dst_ops = { +static struct dst_ops xfrm6_dst_ops = { .family = AF_INET6, .protocol = __constant_htons(ETH_P_IPV6), .gc = xfrm6_garbage_collect, @@ -262,7 +262,7 @@ struct dst_ops xfrm6_dst_ops = { .entry_size = sizeof(struct xfrm_dst), }; -struct xfrm_policy_afinfo xfrm6_policy_afinfo = { +static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, .lock = RW_LOCK_UNLOCKED, .type_map = &xfrm6_type_map, @@ -273,12 +273,12 @@ struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .decode_session = _decode_session6, }; -void __init xfrm6_policy_init(void) +static void __init xfrm6_policy_init(void) { xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); } -void __exit xfrm6_policy_fini(void) +static void __exit xfrm6_policy_fini(void) { xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); } -- cgit v1.2.3 From fb456da7071eda376f8aa6b5bd8c968c8772d538 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 21 Aug 2004 08:08:54 -0700 Subject: [VLAN]: Missing Kconfig help From: Francois Romieu Help message (2.4.x port + module name) for the configuration of vlan support. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/Kconfig | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/Kconfig b/net/Kconfig index 37ee31e7d2a9..cae135013a82 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -324,6 +324,19 @@ config BRIDGE config VLAN_8021Q tristate "802.1Q VLAN Support" + ---help--- + Select this and you will be able to create 802.1Q VLAN interfaces + on your ethernet interfaces. 802.1Q VLAN supports almost + everything a regular ethernet interface does, including + firewalling, bridging, and of course IP traffic. You will need + the 'vconfig' tool from the VLAN project in order to effectively + use VLANs. See the VLAN web page for more information: + + + To compile this code as a module, choose M here: the module + will be called 8021q. + + If unsure, say N. config DECNET tristate "DECnet Support" -- cgit v1.2.3 From 06f4b0a3e30851c88dbd3c2994b4832422c8e636 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 22 Aug 2004 07:32:31 -0700 Subject: [PKT_SCHED]: Fix class leak in CBQ scheduler. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 01dfcb1ab832..192ad0a9b904 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1746,15 +1746,18 @@ static void cbq_destroy_filters(struct cbq_class *cl) } } -static void cbq_destroy_class(struct cbq_class *cl) +static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) { + struct cbq_sched_data *q = qdisc_priv(sch); + cbq_destroy_filters(cl); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); #ifdef CONFIG_NET_ESTIMATOR qdisc_kill_estimator(&cl->stats); #endif - kfree(cl); + if (cl != &q->link) + kfree(cl); } static void @@ -1777,8 +1780,7 @@ cbq_destroy(struct Qdisc* sch) for (cl = q->classes[h]; cl; cl = next) { next = cl->next; - if (cl != &q->link) - cbq_destroy_class(cl); + cbq_destroy_class(sch, cl); } } @@ -1799,7 +1801,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) spin_unlock_bh(&sch->dev->queue_lock); #endif - cbq_destroy_class(cl); + cbq_destroy_class(sch, cl); } } @@ -2035,7 +2037,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) sch_tree_unlock(sch); if (--cl->refcnt == 0) - cbq_destroy_class(cl); + cbq_destroy_class(sch, cl); return 0; } -- cgit v1.2.3 From 455aef820d814acb9d5602c6779a3d88727ae1be Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 22 Aug 2004 07:34:34 -0700 Subject: [PKT_SCHED]: Missing dev_put in error path Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 4 +++- net/sched/sch_generic.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 21cdd83d0f9d..ebb9935ab4ca 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -441,7 +441,7 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) handle = qdisc_alloc_handle(dev); err = -ENOMEM; if (handle == 0) - goto err_out2; + goto err_out3; } if (handle == TC_H_INGRESS) @@ -464,6 +464,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) #endif return sch; } +err_out3: + dev_put(dev); err_out2: module_put(ops->owner); err_out: diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 82117f9ba7d1..2d7e859d0cd8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -415,6 +415,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) if (!ops->init || ops->init(sch, NULL) == 0) return sch; + dev_put(dev); kfree(p); return NULL; } -- cgit v1.2.3 From c8a38b29f7a63611c4f7eac8e3f1c576edabcba3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 22 Aug 2004 07:51:31 -0700 Subject: [SUNGEM]: Fix locking in gem_interrupt(). Do not release lock unless actually taken. Noticed by Eric Lemoine. Signed-off-by: David S. Miller --- drivers/net/sungem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index e48ec3421328..72138612d2fb 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -808,16 +808,16 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id, struct pt_regs *regs) if (gem_status & GREG_STAT_ABNORMAL) { if (gem_abnormal_irq(dev, gp, gem_status)) - goto out; + goto out_unlock; } if (gem_status & (GREG_STAT_TXALL | GREG_STAT_TXINTME)) gem_tx(dev, gp, gem_status); if (gem_status & GREG_STAT_RXDONE) gem_rx(gp); -out: +out_unlock: spin_unlock(&gp->lock); - +out: return IRQ_HANDLED; } -- cgit v1.2.3 From f04b34e0ab28cb8cc945bd3614a4d62b691e3f6e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 22 Aug 2004 08:59:00 -0700 Subject: [PKT_SCHED]: Fix unused label warning in ingress_init(). Signed-off-by: David S. Miller --- net/sched/sch_ingress.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 93ed728f3a02..13b5c3414794 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -283,7 +283,7 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt) #ifndef CONFIG_NET_CLS_ACT #ifndef CONFIG_NETFILTER printk("You MUST compile classifier actions into the kernel\n"); - goto error; + return -EINVAL; #else printk("Ingress scheduler: Classifier actions prefered over netfilter\n"); #endif @@ -294,7 +294,7 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt) if (!nf_registered) { if (nf_register_hook(&ing_ops) < 0) { printk("ingress qdisc registration error \n"); - goto error; + return -EINVAL; } nf_registered++; } @@ -304,8 +304,6 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt) DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); p->q = &noop_qdisc; return 0; -error: - return -EINVAL; } -- cgit v1.2.3