From 5a7c200070b3f24fbf79f45fb742b6adbd6085cc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 13:47:33 -0800 Subject: Move IP-specific identity information out of struct sock. Fix -EFAULT handling in TCP direct user copy handling. Use struct initializers in IPV6 ndisc code. --- drivers/net/pppoe.c | 6 +- include/linux/if_ec.h | 1 + include/linux/if_pppox.h | 1 + include/linux/ip.h | 11 ++- include/net/ip.h | 3 +- include/net/sock.h | 31 +++------ include/net/tcp.h | 41 +++++------ include/net/udp.h | 3 +- net/econet/af_econet.c | 2 +- net/ipv4/af_inet.c | 50 +++++++------- net/ipv4/ip_input.c | 2 +- net/ipv4/ip_output.c | 13 ++-- net/ipv4/ip_sockglue.c | 8 +-- net/ipv4/netfilter/ip_conntrack_core.c | 7 +- net/ipv4/raw.c | 33 +++++---- net/ipv4/tcp.c | 17 ++--- net/ipv4/tcp_diag.c | 49 +++++++++----- net/ipv4/tcp_input.c | 44 +++++------- net/ipv4/tcp_ipv4.c | 120 +++++++++++++++++++-------------- net/ipv4/tcp_minisocks.c | 19 +++--- net/ipv4/tcp_output.c | 7 +- net/ipv4/tcp_timer.c | 8 ++- net/ipv4/udp.c | 91 ++++++++++++++----------- net/ipv6/af_inet6.c | 30 ++++----- net/ipv6/ipv6_sockglue.c | 4 +- net/ipv6/ndisc.c | 95 +++++++++++++------------- net/ipv6/raw.c | 22 +++--- net/ipv6/tcp_ipv6.c | 82 ++++++++++++---------- net/ipv6/udp.c | 61 +++++++++-------- net/packet/af_packet.c | 22 +++--- net/sunrpc/svcsock.c | 3 +- 31 files changed, 482 insertions(+), 404 deletions(-) diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index ffb77a36fa7f..3c6bfaed71cf 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -635,7 +635,7 @@ int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, sk->state = PPPOX_CONNECTED; } - sk->num = sp->sa_addr.pppoe.sid; + po->num = sp->sa_addr.pppoe.sid; end: release_sock(sk); @@ -788,7 +788,7 @@ int pppoe_sendmsg(struct socket *sock, struct msghdr *m, hdr.ver = 1; hdr.type = 1; hdr.code = 0; - hdr.sid = sk->num; + hdr.sid = po->num; lock_sock(sk); @@ -862,7 +862,7 @@ int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) hdr.ver = 1; hdr.type = 1; hdr.code = 0; - hdr.sid = sk->num; + hdr.sid = po->num; hdr.length = htons(skb->len); if (!dev) diff --git a/include/linux/if_ec.h b/include/linux/if_ec.h index 8e2e4a1fe424..b5ee8425325f 100644 --- a/include/linux/if_ec.h +++ b/include/linux/if_ec.h @@ -53,6 +53,7 @@ struct econet_opt unsigned char port; unsigned char station; unsigned char net; + unsigned short num; }; #define ec_sk(__sk) ((struct econet_opt *)(__sk)->protinfo) diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 64b16b277e3c..9d2206b1d06f 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -127,6 +127,7 @@ struct pppox_opt { union { struct pppoe_opt pppoe; } proto; + unsigned short num; }; #define pppoe_dev proto.pppoe.dev #define pppoe_pa proto.pppoe.pa diff --git a/include/linux/ip.h b/include/linux/ip.h index 97e388ade261..3ba8e804b0d8 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -116,17 +116,24 @@ struct ip_options { #define optlength(opt) (sizeof(struct ip_options) + opt->optlen) struct inet_opt { + /* Socket demultiplex comparisons on incoming packets. */ + __u32 daddr; /* Foreign IPv4 addr */ + __u32 rcv_saddr; /* Bound local IPv4 addr */ + __u16 dport; /* Destination port */ + __u16 num; /* Local port */ + __u32 saddr; /* Sending source */ int ttl; /* TTL setting */ int tos; /* TOS */ unsigned cmsg_flags; struct ip_options *opt; + __u16 sport; /* Source port */ unsigned char hdrincl; /* Include headers ? */ __u8 mc_ttl; /* Multicasting TTL */ __u8 mc_loop; /* Loopback */ + __u8 pmtudisc; + __u16 id; /* ID counter for DF pkts */ unsigned recverr : 1, freebind : 1; - __u16 id; /* ID counter for DF pkts */ - __u8 pmtudisc; int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ diff --git a/include/net/ip.h b/include/net/ip.h index 705fed205357..e2da92393da8 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -197,7 +197,8 @@ static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, str * does not change, they drop every other packet in * a TCP stream using header compression. */ - iph->id = (sk && sk->daddr) ? htons(inet_sk(sk)->id++) : 0; + iph->id = (sk && inet_sk(sk)->daddr) ? + htons(inet_sk(sk)->id++) : 0; } else __ip_select_ident(iph, dst); } diff --git a/include/net/sock.h b/include/net/sock.h index 898193d61fc3..4c5d7c257d75 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -83,28 +83,22 @@ do { spin_lock_init(&((__sk)->lock.slock)); \ } while(0); struct sock { - /* Socket demultiplex comparisons on incoming packets. */ - __u32 daddr; /* Foreign IPv4 addr */ - __u32 rcv_saddr; /* Bound local IPv4 addr */ - __u16 dport; /* Destination port */ - unsigned short num; /* Local port */ - int bound_dev_if; /* Bound device index if != 0 */ - + /* Begin of struct sock/struct tcp_tw_bucket shared layout */ + volatile unsigned char state, /* Connection state */ + zapped; /* ax25 & ipx means !linked */ + unsigned char reuse; /* SO_REUSEADDR setting */ + unsigned char shutdown; + int bound_dev_if; /* Bound device index if != 0 */ /* Main hash linkage for various protocol lookup tables. */ struct sock *next; struct sock **pprev; struct sock *bind_next; struct sock **bind_pprev; - - volatile unsigned char state, /* Connection state */ - zapped; /* In ax25 & ipx means not linked */ - __u16 sport; /* Source port */ - - unsigned short family; /* Address family */ - unsigned char reuse; /* SO_REUSEADDR setting */ - unsigned char shutdown; atomic_t refcnt; /* Reference count */ - + unsigned short family; /* Address family */ + /* End of struct sock/struct tcp_tw_bucket shared layout */ + unsigned char use_write_queue; + unsigned char userlocks; socket_lock_t lock; /* Synchronizer... */ int rcvbuf; /* Size of receive buffer in bytes */ @@ -118,7 +112,6 @@ struct sock { atomic_t omem_alloc; /* "o" is "option" or "other" */ int wmem_queued; /* Persistent queue size */ int forward_alloc; /* Space allocated forward. */ - __u32 saddr; /* Sending source */ unsigned int allocation; /* Allocation mode */ int sndbuf; /* Size of send buffer in bytes */ struct sock *prev; @@ -137,9 +130,7 @@ struct sock { bsdism; unsigned char debug; unsigned char rcvtstamp; - unsigned char use_write_queue; - unsigned char userlocks; - /* Hole of 3 bytes. Try to pack. */ + /* Hole of 1 byte. Try to pack. */ int route_caps; int proc; unsigned long lingertime; diff --git a/include/net/tcp.h b/include/net/tcp.h index bccbac72b43e..c45fc3e12845 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -53,7 +53,7 @@ struct tcp_ehash_bucket { * 2) If all sockets have sk->reuse set, and none of them are in * TCP_LISTEN state, the port may be shared. * Failing that, goto test 3. - * 3) If all sockets are bound to a specific sk->rcv_saddr local + * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local * address, and none of them are the same, the port may be * shared. * Failing this, the port cannot be shared. @@ -162,23 +162,26 @@ struct tcp_tw_bucket { * XXX Yes I know this is gross, but I'd have to edit every single * XXX networking file if I created a "struct sock_header". -DaveM */ - __u32 daddr; - __u32 rcv_saddr; - __u16 dport; - unsigned short num; + volatile unsigned char state, /* Connection state */ + substate; /* "zapped" -> "substate" */ + unsigned char reuse; /* SO_REUSEADDR setting */ + unsigned char rcv_wscale; /* also TW bucket specific */ int bound_dev_if; + /* Main hash linkage for various protocol lookup tables. */ struct sock *next; struct sock **pprev; struct sock *bind_next; struct sock **bind_pprev; - unsigned char state, - substate; /* "zapped" is replaced with "substate" */ - __u16 sport; - unsigned short family; - unsigned char reuse, - rcv_wscale; /* It is also TW bucket specific */ atomic_t refcnt; - + unsigned short family; + /* End of struct sock/struct tcp_tw_bucket shared layout */ + __u16 sport; + /* Socket demultiplex comparisons on incoming packets. */ + /* these five are in inet_opt */ + __u32 daddr; + __u32 rcv_saddr; + __u16 dport; + __u16 num; /* And these are ours. */ int hashent; int timeout; @@ -236,20 +239,20 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); #endif /* __BIG_ENDIAN */ #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&((__sk)->daddr)))== (__cookie)) && \ - ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ + (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->daddr == (__saddr)) && \ - ((__sk)->rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ + ((inet_sk(__sk)->daddr == (__saddr)) && \ + (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif)))) #endif /* 64-bit arch */ #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ + (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ ((__sk)->family == AF_INET6) && \ !ipv6_addr_cmp(&inet6_sk(__sk)->daddr, (__saddr)) && \ !ipv6_addr_cmp(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ @@ -263,7 +266,7 @@ static __inline__ int tcp_lhashfn(unsigned short num) static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) { - return tcp_lhashfn(sk->num); + return tcp_lhashfn(inet_sk(sk)->num); } #define MAX_TCP_HEADER (128 + MAX_HEADER) diff --git a/include/net/udp.h b/include/net/udp.h index c7049c5b4bf8..15f7626f7dcd 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -23,6 +23,7 @@ #define _UDP_H #include +#include #include #define UDP_HTABLE_SIZE 128 @@ -41,7 +42,7 @@ static inline int udp_lport_inuse(u16 num) struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)]; for(; sk != NULL; sk = sk->next) { - if(sk->num == num) + if (inet_sk(sk)->num == num) return 1; } return 0; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index e9fa912af200..8aeaff1bb471 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -554,7 +554,7 @@ static int econet_create(struct socket *sock, int protocol) memset(eo, 0, sizeof(*eo)); sk->zapped=0; sk->family = PF_ECONET; - sk->num = protocol; + eo->num = protocol; sklist_insert_socket(&econet_sklist, sk); return(0); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 93080c9a1e0f..8b70a21036be 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -270,14 +270,15 @@ int inet_getsockopt(struct socket *sock, int level, int optname, static int inet_autobind(struct sock *sk) { + struct inet_opt *inet = inet_sk(sk); /* We may need to bind the socket. */ lock_sock(sk); - if (sk->num == 0) { + if (!inet->num) { if (sk->prot->get_port(sk, 0) != 0) { release_sock(sk); return -EAGAIN; } - sk->sport = htons(sk->num); + inet->sport = htons(inet->num); } release_sock(sk); return 0; @@ -397,7 +398,7 @@ static int inet_create(struct socket *sock, int protocol) inet = inet_sk(sk); if (SOCK_RAW == sock->type) { - sk->num = protocol; + inet->num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } @@ -430,13 +431,13 @@ static int inet_create(struct socket *sock, int protocol) atomic_inc(&inet_sock_nr); #endif - if (sk->num) { + if (inet->num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically * shares. */ - sk->sport = htons(sk->num); + inet->sport = htons(inet->num); /* Add to protocol hash chains. */ sk->prot->hash(sk); @@ -551,28 +552,27 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check these errors (active socket, double bind). */ err = -EINVAL; - if ((sk->state != TCP_CLOSE) || - (sk->num != 0)) + if (sk->state != TCP_CLOSE || inet->num) goto out; - sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr; + inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) - sk->saddr = 0; /* Use device */ + inet->saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ if (sk->prot->get_port(sk, snum) != 0) { - sk->saddr = sk->rcv_saddr = 0; + inet->saddr = inet->rcv_saddr = 0; err = -EADDRINUSE; goto out; } - if (sk->rcv_saddr) + if (inet->rcv_saddr) sk->userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->userlocks |= SOCK_BINDPORT_LOCK; - sk->sport = htons(sk->num); - sk->daddr = 0; - sk->dport = 0; + inet->sport = htons(inet->num); + inet->daddr = 0; + inet->dport = 0; sk_dst_reset(sk); err = 0; out: @@ -588,7 +588,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, if (uaddr->sa_family == AF_UNSPEC) return sk->prot->disconnect(sk, flags); - if (sk->num==0 && inet_autobind(sk) != 0) + if (!inet_sk(sk)->num && inet_autobind(sk)) return -EAGAIN; return sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len); } @@ -627,6 +627,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, int addr_len, int flags) { struct sock *sk=sock->sk; + struct inet_opt *inet = inet_sk(sk); int err; long timeo; @@ -655,10 +656,10 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, goto out; err = -EAGAIN; - if (sk->num == 0) { + if (!inet->num) { if (sk->prot->get_port(sk, 0) != 0) goto out; - sk->sport = htons(sk->num); + inet->sport = htons(inet->num); } err = sk->prot->connect(sk, uaddr, addr_len); @@ -748,21 +749,22 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sock *sk = sock->sk; + struct inet_opt *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; sin->sin_family = AF_INET; if (peer) { - if (!sk->dport) + if (!inet->dport) return -ENOTCONN; if (((1<state)&(TCPF_CLOSE|TCPF_SYN_SENT)) && peer == 1) return -ENOTCONN; - sin->sin_port = sk->dport; - sin->sin_addr.s_addr = sk->daddr; + sin->sin_port = inet->dport; + sin->sin_addr.s_addr = inet->daddr; } else { - __u32 addr = sk->rcv_saddr; + __u32 addr = inet->rcv_saddr; if (!addr) - addr = sk->saddr; - sin->sin_port = sk->sport; + addr = inet->saddr; + sin->sin_port = inet->sport; sin->sin_addr.s_addr = addr; } *uaddr_len = sizeof(*sin); @@ -792,7 +794,7 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, struct sock *sk = sock->sk; /* We may need to bind the socket. */ - if (sk->num==0 && inet_autobind(sk) != 0) + if (!inet_sk(sk)->num && inet_autobind(sk)) return -EAGAIN; return sk->prot->sendmsg(sk, msg, size); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 6cafdab00560..3f93680cc5fe 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -166,7 +166,7 @@ int ip_call_ra_chain(struct sk_buff *skb) /* If socket is bound to an interface, only report * the packet if it came from that interface. */ - if (sk && sk->num == protocol + if (sk && inet_sk(sk)->num == protocol && ((sk->bound_dev_if == 0) || (sk->bound_dev_if == skb->dev->ifindex))) { if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c6a27ffabb9e..b5d4485d08f2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -360,7 +360,7 @@ int ip_queue_xmit(struct sk_buff *skb) u32 daddr; /* Use correct destination address if we have options. */ - daddr = sk->daddr; + daddr = inet->daddr; if(opt && opt->srr) daddr = opt->faddr; @@ -368,7 +368,7 @@ int ip_queue_xmit(struct sk_buff *skb) * keep trying until route appears or the connection times itself * out. */ - if (ip_route_output(&rt, daddr, sk->saddr, + if (ip_route_output(&rt, daddr, inet->saddr, RT_CONN_FLAGS(sk), sk->bound_dev_if)) goto no_route; @@ -395,7 +395,7 @@ packet_routed: if(opt && opt->optlen) { iph->ihl += opt->optlen >> 2; - ip_options_build(skb, opt, sk->daddr, rt, 0); + ip_options_build(skb, opt, inet->daddr, rt, 0); } return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, @@ -471,7 +471,7 @@ static int ip_build_xmit_slow(struct sock *sk, } if (length + fragheaderlen > 0xFFFF) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu); + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); return -EMSGSIZE; } @@ -503,7 +503,7 @@ static int ip_build_xmit_slow(struct sock *sk, */ if (offset > 0 && inet->pmtudisc == IP_PMTUDISC_DO) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu); + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) @@ -659,7 +659,8 @@ int ip_build_xmit(struct sock *sk, return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags); } else { if (length > rt->u.dst.dev->mtu) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu); + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, + rt->u.dst.dev->mtu); return -EMSGSIZE; } } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 135b3235f840..ffd85dc5870b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -193,7 +193,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s { struct ip_ra_chain *ra, *new_ra, **rap; - if (sk->type != SOCK_RAW || sk->num == IPPROTO_RAW) + if (sk->type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW) return -EINVAL; new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -435,7 +435,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (sk->family == PF_INET || (!((1<state)&(TCPF_LISTEN|TCPF_CLOSE)) - && sk->daddr != LOOPBACK4_IPV6)) { + && inet->daddr != LOOPBACK4_IPV6)) { #endif if (opt) tp->ext_header_len = opt->optlen; @@ -771,8 +771,8 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *op if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; - info.ipi_addr.s_addr = sk->rcv_saddr; - info.ipi_spec_dst.s_addr = sk->rcv_saddr; + info.ipi_addr.s_addr = inet->rcv_saddr; + info.ipi_spec_dst.s_addr = inet->rcv_saddr; info.ipi_ifindex = inet->mc_index; put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 97f23d5c4a0c..41c893ed78bd 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -969,9 +969,12 @@ ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), static int getorigdst(struct sock *sk, int optval, void *user, int *len) { + struct inet_opt *inet = inet_sk(sk); struct ip_conntrack_tuple_hash *h; - struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport } }, - { sk->daddr, { sk->dport }, + struct ip_conntrack_tuple tuple = { { inet->rcv_saddr, + { inet->sport } }, + { inet->daddr, + { inet->dport }, IPPROTO_TCP } }; /* We only do TCP at the moment: is there a better way? */ diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 49c1c6db38f1..1c798bdd9414 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -70,7 +70,8 @@ rwlock_t raw_v4_lock = RW_LOCK_UNLOCKED; static void raw_v4_hash(struct sock *sk) { - struct sock **skp = &raw_v4_htable[sk->num & (RAWV4_HTABLE_SIZE - 1)]; + struct sock **skp = &raw_v4_htable[inet_sk(sk)->num & + (RAWV4_HTABLE_SIZE - 1)]; write_lock_bh(&raw_v4_lock); if ((sk->next = *skp) != NULL) @@ -103,9 +104,11 @@ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, struct sock *s = sk; for (s = sk; s; s = s->next) { - if (s->num == num && - !(s->daddr && s->daddr != raddr) && - !(s->rcv_saddr && s->rcv_saddr != laddr) && + struct inet_opt *inet = inet_sk(s); + + if (inet->num == num && + !(inet->daddr && inet->daddr != raddr) && + !(inet->rcv_saddr && inet->rcv_saddr != laddr) && !(s->bound_dev_if && s->bound_dev_if != dif)) break; /* gotcha */ } @@ -364,10 +367,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len) err = -EINVAL; if (sk->state != TCP_ESTABLISHED) goto out; - daddr = sk->daddr; + daddr = inet->daddr; } - ipc.addr = sk->saddr; + ipc.addr = inet->saddr; ipc.opt = NULL; ipc.oif = sk->bound_dev_if; @@ -458,6 +461,7 @@ static void raw_close(struct sock *sk, long timeout) /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + struct inet_opt *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; int ret = -EINVAL; int chk_addr_ret; @@ -469,9 +473,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; - sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr; + inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) - sk->saddr = 0; /* Use device */ + inet->saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; out: return ret; @@ -534,7 +538,7 @@ out: return err ? : copied; static int raw_init(struct sock *sk) { struct raw_opt *tp = raw4_sk(sk); - if (sk->num == IPPROTO_ICMP) + if (inet_sk(sk)->num == IPPROTO_ICMP) memset(&tp->filter, 0, sizeof(tp->filter)); return 0; } @@ -574,7 +578,7 @@ static int raw_setsockopt(struct sock *sk, int level, int optname, return ip_setsockopt(sk, level, optname, optval, optlen); if (optname == ICMP_FILTER) { - if (sk->num != IPPROTO_ICMP) + if (inet_sk(sk)->num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_seticmpfilter(sk, optval, optlen); @@ -589,7 +593,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname, return ip_getsockopt(sk, level, optname, optval, optlen); if (optname == ICMP_FILTER) { - if (sk->num != IPPROTO_ICMP) + if (inet_sk(sk)->num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_geticmpfilter(sk, optval, optlen); @@ -627,10 +631,11 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) static void get_raw_sock(struct sock *sp, char *tmpbuf, int i) { - unsigned int dest = sp->daddr, - src = sp->rcv_saddr; + struct inet_opt *inet = inet_sk(sp); + unsigned int dest = inet->daddr, + src = inet->rcv_saddr; __u16 destp = 0, - srcp = sp->num; + srcp = inet->num; sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d8366237ee8..286c18ca2ee4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -524,6 +524,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) int tcp_listen_start(struct sock *sk) { + struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcp_listen_opt *lopt; @@ -552,8 +553,8 @@ int tcp_listen_start(struct sock *sk) * after validation is complete. */ sk->state = TCP_LISTEN; - if (sk->prot->get_port(sk, sk->num) == 0) { - sk->sport = htons(sk->num); + if (!sk->prot->get_port(sk, inet->num)) { + inet->sport = htons(inet->num); sk_dst_reset(sk); sk->prot->hash(sk); @@ -1786,8 +1787,8 @@ void tcp_destroy_sock(struct sock *sk) /* It cannot be in hash table! */ BUG_TRAP(sk->pprev==NULL); - /* If it has not 0 sk->num, it must be bound */ - BUG_TRAP(!sk->num || sk->prev!=NULL); + /* If it has not 0 inet_sk(sk)->num, it must be bound */ + BUG_TRAP(!inet_sk(sk)->num || sk->prev); #ifdef TCP_DEBUG if (sk->zapped) { @@ -1988,6 +1989,7 @@ extern __inline__ int tcp_need_reset(int state) int tcp_disconnect(struct sock *sk, int flags) { + struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); int old_state; int err = 0; @@ -2015,11 +2017,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_writequeue_purge(sk); __skb_queue_purge(&tp->out_of_order_queue); - sk->dport = 0; + inet->dport = 0; if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) { - sk->rcv_saddr = 0; - sk->saddr = 0; + inet->rcv_saddr = inet->saddr = 0; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (sk->family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -2049,7 +2050,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_sack_reset(tp); __sk_dst_reset(sk); - BUG_TRAP(!sk->num || sk->prev); + BUG_TRAP(!inet->num || sk->prev); sk->error_report(sk); return err; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index e84d336e8fac..a2248dc5b948 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -44,6 +44,7 @@ static struct sock *tcpnl; static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, int ext, u32 pid, u32 seq) { + struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; @@ -64,10 +65,6 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_timer = 0; r->tcpdiag_retrans = 0; - r->id.tcpdiag_sport = sk->sport; - r->id.tcpdiag_dport = sk->dport; - r->id.tcpdiag_src[0] = sk->rcv_saddr; - r->id.tcpdiag_dst[0] = sk->daddr; r->id.tcpdiag_if = sk->bound_dev_if; *((struct sock **)&r->id.tcpdiag_cookie) = sk; @@ -77,6 +74,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (tmo < 0) tmo = 0; + r->id.tcpdiag_sport = tw->sport; + r->id.tcpdiag_dport = tw->dport; + r->id.tcpdiag_src[0] = tw->rcv_saddr; + r->id.tcpdiag_dst[0] = tw->daddr; r->tcpdiag_state = tw->substate; r->tcpdiag_timer = 3; r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ; @@ -94,6 +95,11 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, return skb->len; } + r->id.tcpdiag_sport = inet->sport; + r->id.tcpdiag_dport = inet->dport; + r->id.tcpdiag_src[0] = inet->rcv_saddr; + r->id.tcpdiag_dst[0] = inet->daddr; + #ifdef CONFIG_IPV6 if (r->tcpdiag_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -291,6 +297,7 @@ int tcpdiag_bc_run(char *bc, int len, struct sock *sk) { while (len > 0) { int yes = 1; + struct inet_opt *inet = inet_sk(sk); struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc; switch (op->code) { @@ -300,16 +307,16 @@ int tcpdiag_bc_run(char *bc, int len, struct sock *sk) yes = 0; break; case TCPDIAG_BC_S_GE: - yes = (sk->num >= op[1].no); + yes = inet->num >= op[1].no; break; case TCPDIAG_BC_S_LE: - yes = (sk->num <= op[1].no); + yes = inet->num <= op[1].no; break; case TCPDIAG_BC_D_GE: - yes = (ntohs(sk->dport) >= op[1].no); + yes = ntohs(inet->dport) >= op[1].no; break; case TCPDIAG_BC_D_LE: - yes = (ntohs(sk->dport) <= op[1].no); + yes = ntohs(inet->dport) <= op[1].no; break; case TCPDIAG_BC_AUTO: yes = !(sk->userlocks&SOCK_BINDPORT_LOCK); @@ -321,7 +328,8 @@ int tcpdiag_bc_run(char *bc, int len, struct sock *sk) u32 *addr; if (cond->port != -1 && - cond->port != (op->code == TCPDIAG_BC_S_COND ? sk->num : ntohs(sk->dport))) { + cond->port != (op->code == TCPDIAG_BC_S_COND ? + inet->num : ntohs(inet->dport))) { yes = 0; break; } @@ -341,9 +349,9 @@ int tcpdiag_bc_run(char *bc, int len, struct sock *sk) #endif { if (op->code == TCPDIAG_BC_S_COND) - addr = &sk->rcv_saddr; + addr = &inet->rcv_saddr; else - addr = &sk->daddr; + addr = &inet->daddr; } if (bitstring_match(addr, cond->addr, cond->prefix_len)) @@ -453,12 +461,14 @@ int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) for (sk = tcp_listening_hash[i], num = 0; sk != NULL; sk = sk->next, num++) { + struct inet_opt *inet = inet_sk(sk); if (num < s_num) continue; if (!(r->tcpdiag_states&TCPF_LISTEN) || r->id.tcpdiag_dport) continue; - if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport) + if (r->id.tcpdiag_sport != inet->sport && + r->id.tcpdiag_sport) continue; if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk)) continue; @@ -491,13 +501,16 @@ skip_listen_ht: for (sk = head->chain, num = 0; sk != NULL; sk = sk->next, num++) { + struct inet_opt *inet = inet_sk(sk); + if (num < s_num) continue; if (!(r->tcpdiag_states&(1<state))) continue; - if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport) + if (r->id.tcpdiag_sport != inet->sport && + r->id.tcpdiag_sport) continue; - if (r->id.tcpdiag_dport != sk->dport && r->id.tcpdiag_dport) + if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport) continue; if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk)) continue; @@ -513,13 +526,17 @@ skip_listen_ht: for (sk = tcp_ehash[i+tcp_ehash_size].chain; sk != NULL; sk = sk->next, num++) { + struct inet_opt *inet = inet_sk(sk); + if (num < s_num) continue; if (!(r->tcpdiag_states&(1<zapped))) continue; - if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport) + if (r->id.tcpdiag_sport != inet->sport && + r->id.tcpdiag_sport) continue; - if (r->id.tcpdiag_dport != sk->dport && r->id.tcpdiag_dport) + if (r->id.tcpdiag_dport != inet->dport && + r->id.tcpdiag_dport) continue; if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk)) continue; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d8643a594a7c..a397d84e0044 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1329,9 +1329,10 @@ static __inline__ int tcp_packet_delayed(struct tcp_opt *tp) #if FASTRETRANS_DEBUG > 1 static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg) { + struct inet_opt *inet = inet_sk(sk); printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, - NIPQUAD(sk->daddr), ntohs(sk->dport), + NIPQUAD(inet->daddr), ntohs(inet->dport), tp->snd_cwnd, tp->left_out, tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } @@ -2570,15 +2571,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) __set_current_state(TASK_RUNNING); local_bh_enable(); - if (skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, - chunk)) { - sk->err = EFAULT; - sk->error_report(sk); + if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { + tp->ucopy.len -= chunk; + tp->copied_seq += chunk; + eaten = (chunk == skb->len && !th->fin); } local_bh_disable(); - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - eaten = (chunk == skb->len && !th->fin); } if (eaten <= 0) { @@ -3178,17 +3176,8 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) tp->ucopy.iov); if (!err) { -update: - tp->ucopy.len -= chunk; + tp->ucopy.len -= chunk; tp->copied_seq += chunk; - local_bh_disable(); - return 0; - } - - if (err == -EFAULT) { - sk->err = EFAULT; - sk->error_report(sk); - goto update; } local_bh_disable(); @@ -3327,19 +3316,16 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len && sk->lock.users) { - eaten = 1; - - NET_INC_STATS_BH(TCPHPHitsToUser); - __set_current_state(TASK_RUNNING); - if (tcp_copy_to_iovec(sk, skb, tcp_header_len)) - goto csum_error; - - __skb_pull(skb,tcp_header_len); - - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - } else { + if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { + __skb_pull(skb, tcp_header_len); + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + NET_INC_STATS_BH(TCPHPHitsToUser); + eaten = 1; + } + } + if (!eaten) { if (tcp_checksum_complete_user(sk, skb)) goto csum_error; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 41648df322c6..b7049e4294af 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -109,10 +109,11 @@ static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport, static __inline__ int tcp_sk_hashfn(struct sock *sk) { - __u32 laddr = sk->rcv_saddr; - __u16 lport = sk->num; - __u32 faddr = sk->daddr; - __u16 fport = sk->dport; + struct inet_opt *inet = inet_sk(sk); + __u32 laddr = inet->rcv_saddr; + __u16 lport = inet->num; + __u32 faddr = inet->daddr; + __u16 fport = inet->dport; return tcp_hashfn(laddr, lport, faddr, fport); } @@ -141,7 +142,8 @@ struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, /* Caller must disable local BH processing. */ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) { - struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(child->num)]; + struct tcp_bind_hashbucket *head = + &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)]; struct tcp_bind_bucket *tb; spin_lock(&head->lock); @@ -163,7 +165,7 @@ __inline__ void tcp_inherit_port(struct sock *sk, struct sock *child) static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, unsigned short snum) { - sk->num = snum; + inet_sk(sk)->num = snum; if ((sk->bind_next = tb->owners) != NULL) tb->owners->bind_pprev = &sk->bind_next; tb->owners = sk; @@ -173,6 +175,7 @@ static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, un static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) { + struct inet_opt *inet = inet_sk(sk); struct sock *sk2 = tb->owners; int sk_reuse = sk->reuse; @@ -182,9 +185,10 @@ static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) if (!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) { - if (!sk2->rcv_saddr || - !sk->rcv_saddr || - (sk2->rcv_saddr == sk->rcv_saddr)) + struct inet_opt *inet2 = inet_sk(sk2); + if (!inet2->rcv_saddr || + !inet->rcv_saddr || + (inet2->rcv_saddr == inet->rcv_saddr)) break; } } @@ -281,7 +285,8 @@ fail: */ __inline__ void __tcp_put_port(struct sock *sk) { - struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(sk->num)]; + struct inet_opt *inet = inet_sk(sk); + struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)]; struct tcp_bind_bucket *tb; spin_lock(&head->lock); @@ -290,7 +295,7 @@ __inline__ void __tcp_put_port(struct sock *sk) sk->bind_next->bind_pprev = sk->bind_pprev; *(sk->bind_pprev) = sk->bind_next; sk->prev = NULL; - sk->num = 0; + inet->num = 0; if (tb->owners == NULL) { if (tb->next) tb->next->pprev = tb->pprev; @@ -409,8 +414,10 @@ static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, unsigne hiscore=0; for(; sk; sk = sk->next) { - if(sk->num == hnum) { - __u32 rcv_saddr = sk->rcv_saddr; + struct inet_opt *inet = inet_sk(sk); + + if(inet->num == hnum) { + __u32 rcv_saddr = inet->rcv_saddr; score = 1; if(rcv_saddr) { @@ -442,9 +449,11 @@ __inline__ struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, i read_lock(&tcp_lhash_lock); sk = tcp_listening_hash[tcp_lhashfn(hnum)]; if (sk) { - if (sk->num == hnum && + struct inet_opt *inet = inet_sk(sk); + + if (inet->num == hnum && sk->next == NULL && - (!sk->rcv_saddr || sk->rcv_saddr == daddr) && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && !sk->bound_dev_if) goto sherry_cache; sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif); @@ -531,12 +540,13 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) static int tcp_v4_check_established(struct sock *sk) { - u32 daddr = sk->rcv_saddr; - u32 saddr = sk->daddr; + struct inet_opt *inet = inet_sk(sk); + u32 daddr = inet->rcv_saddr; + u32 saddr = inet->daddr; int dif = sk->bound_dev_if; TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(sk->dport, sk->num); - int hash = tcp_hashfn(daddr, sk->num, saddr, sk->dport); + __u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num); + int hash = tcp_hashfn(daddr, inet->num, saddr, inet->dport); struct tcp_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2, **skp; struct tcp_tw_bucket *tw; @@ -625,7 +635,7 @@ not_unique: int tcp_v4_hash_connecting(struct sock *sk) { - unsigned short snum = sk->num; + unsigned short snum = inet_sk(sk)->num; struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(snum)]; struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev; @@ -667,7 +677,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } - tmp = ip_route_connect(&rt, nexthop, sk->saddr, + tmp = ip_route_connect(&rt, nexthop, inet->saddr, RT_CONN_FLAGS(sk), sk->bound_dev_if); if (tmp < 0) return tmp; @@ -689,11 +699,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (buff == NULL) goto failure; - if (!sk->saddr) - sk->saddr = rt->rt_src; - sk->rcv_saddr = sk->saddr; + if (!inet->saddr) + inet->saddr = rt->rt_src; + inet->rcv_saddr = inet->saddr; - if (tp->ts_recent_stamp && sk->daddr != daddr) { + if (tp->ts_recent_stamp && inet->daddr != daddr) { /* Reset inherited state */ tp->ts_recent = 0; tp->ts_recent_stamp = 0; @@ -716,12 +726,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } } - sk->dport = usin->sin_port; - sk->daddr = daddr; + inet->dport = usin->sin_port; + inet->daddr = daddr; if (!tp->write_seq) - tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr, - sk->sport, + tp->write_seq = secure_tcp_sequence_number(inet->saddr, + inet->daddr, + inet->sport, usin->sin_port); tp->ext_header_len = 0; @@ -738,7 +749,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) failure: __sk_dst_reset(sk); sk->route_caps = 0; - sk->dport = 0; + inet->dport = 0; return err; } @@ -1018,11 +1029,13 @@ out: void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb) { + struct inet_opt *inet = inet_sk(sk); + if (skb->ip_summed == CHECKSUM_HW) { - th->check = ~tcp_v4_check(th, len, sk->saddr, sk->daddr, 0); + th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); skb->csum = offsetof(struct tcphdr, check); } else { - th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr, + th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr, csum_partial((char *)th, th->doff<<2, skb->csum)); } } @@ -1448,10 +1461,10 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->route_caps = dst->dev->features; newtp = tcp_sk(newsk); - newsk->daddr = req->af.v4_req.rmt_addr; - newsk->saddr = req->af.v4_req.loc_addr; - newsk->rcv_saddr = req->af.v4_req.loc_addr; newinet = inet_sk(newsk); + newinet->daddr = req->af.v4_req.rmt_addr; + newinet->rcv_saddr = req->af.v4_req.loc_addr; + newinet->saddr = req->af.v4_req.loc_addr; newinet->opt = req->af.v4_req.opt; req->af.v4_req.opt = NULL; newinet->mc_index = tcp_v4_iif(skb); @@ -1736,9 +1749,9 @@ static int tcp_v4_reselect_saddr(struct sock *sk) struct inet_opt *inet = inet_sk(sk); int err; struct rtable *rt; - __u32 old_saddr = sk->saddr; + __u32 old_saddr = inet->saddr; __u32 new_saddr; - __u32 daddr = sk->daddr; + __u32 daddr = inet->daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; @@ -1759,14 +1772,14 @@ static int tcp_v4_reselect_saddr(struct sock *sk) return 0; if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "tcp_v4_rebuild_header(): shifting sk->saddr " - "from %d.%d.%d.%d to %d.%d.%d.%d\n", + printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->" + "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", NIPQUAD(old_saddr), NIPQUAD(new_saddr)); } - sk->saddr = new_saddr; - sk->rcv_saddr = new_saddr; + inet->saddr = new_saddr; + inet->rcv_saddr = new_saddr; /* XXX The only one ugly spot where we need to * XXX really change the sockets identity after @@ -1791,11 +1804,11 @@ int tcp_v4_rebuild_header(struct sock *sk) return 0; /* Reroute. */ - daddr = sk->daddr; + daddr = inet->daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; - err = ip_route_output(&rt, daddr, sk->saddr, + err = ip_route_output(&rt, daddr, inet->saddr, RT_CONN_FLAGS(sk), sk->bound_dev_if); if (!err) { __sk_dst_set(sk, &rt->u.dst); @@ -1818,10 +1831,11 @@ int tcp_v4_rebuild_header(struct sock *sk) static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; + struct inet_opt *inet = inet_sk(sk); sin->sin_family = AF_INET; - sin->sin_addr.s_addr = sk->daddr; - sin->sin_port = sk->dport; + sin->sin_addr.s_addr = inet->daddr; + sin->sin_port = inet->dport; } /* VJ's idea. Save last timestamp seen from this destination @@ -1832,13 +1846,14 @@ static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) int tcp_v4_remember_stamp(struct sock *sk) { + struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct rtable *rt = (struct rtable*)__sk_dst_get(sk); struct inet_peer *peer = NULL; int release_it = 0; - if (rt == NULL || rt->rt_dst != sk->daddr) { - peer = inet_getpeer(sk->daddr, 1); + if (rt == NULL || rt->rt_dst != inet->daddr) { + peer = inet_getpeer(inet->daddr, 1); release_it = 1; } else { if (rt->peer == NULL) @@ -1979,7 +1994,7 @@ static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, " %02X %08X:%08X %02X:%08X %08X %5d %8d %u %d %p", i, req->af.v4_req.loc_addr, - ntohs(sk->sport), + ntohs(inet_sk(sk)->sport), req->af.v4_req.rmt_addr, ntohs(req->rmt_port), TCP_SYN_RECV, @@ -2002,11 +2017,12 @@ static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i) int timer_active; unsigned long timer_expires; struct tcp_opt *tp = tcp_sk(sp); + struct inet_opt *inet = inet_sk(sp); - dest = sp->daddr; - src = sp->rcv_saddr; - destp = ntohs(sp->dport); - srcp = ntohs(sp->sport); + dest = inet->daddr; + src = inet->rcv_saddr; + destp = ntohs(inet->dport); + srcp = ntohs(inet->sport); if (tp->pending == TCP_TIME_RETRANS) { timer_active = 1; timer_expires = tp->timeout; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 13a8c5872437..16ddbb124bd2 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -327,10 +327,10 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) write_unlock(&ehead->lock); /* Step 3: Put TW into bind hash. Original socket stays there too. - Note, that any socket with sk->num!=0 MUST be bound in binding - cache, even if it is closed. + Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in + binding cache, even if it is closed. */ - bhead = &tcp_bhash[tcp_bhashfn(sk->num)]; + bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; spin_lock(&bhead->lock); tw->tb = (struct tcp_bind_bucket *)sk->prev; BUG_TRAP(sk->prev!=NULL); @@ -357,17 +357,18 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC); if(tw != NULL) { + struct inet_opt *inet = inet_sk(sk); int rto = (tp->rto<<2) - (tp->rto>>1); /* Give us an identity. */ - tw->daddr = sk->daddr; - tw->rcv_saddr = sk->rcv_saddr; + tw->daddr = inet->daddr; + tw->rcv_saddr = inet->rcv_saddr; tw->bound_dev_if= sk->bound_dev_if; - tw->num = sk->num; + tw->num = inet->num; tw->state = TCP_TIME_WAIT; tw->substate = state; - tw->sport = sk->sport; - tw->dport = sk->dport; + tw->sport = inet->sport; + tw->dport = inet->dport; tw->family = sk->family; tw->reuse = sk->reuse; tw->rcv_wscale = tp->rcv_wscale; @@ -660,7 +661,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newsk->prev = NULL; /* Clone the TCP header template */ - newsk->dport = req->rmt_port; + inet_sk(newsk)->dport = req->rmt_port; sock_lock_init(newsk); bh_lock_sock(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7035069014ee..71b406306d24 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -188,6 +188,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if(skb != NULL) { + struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); int tcp_header_size = tp->tcp_header_len; @@ -227,8 +228,8 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) skb_set_owner_w(skb, sk); /* Build TCP header and checksum it. */ - th->source = sk->sport; - th->dest = sk->dport; + th->source = inet->sport; + th->dest = inet->dport; th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); @@ -1120,7 +1121,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); - th->source = sk->sport; + th->source = inet_sk(sk)->sport; th->dest = req->rmt_port; TCP_SKB_CB(skb)->seq = req->snt_isn; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 7bffbbfd7b11..3901d11cac15 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -334,10 +334,12 @@ static void tcp_retransmit_timer(struct sock *sk) * we cannot allow such beasts to hang infinitely. */ #ifdef TCP_DEBUG - if (net_ratelimit()) + if (net_ratelimit()) { + struct inet_opt *inet = inet_sk(sk); printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", - NIPQUAD(sk->daddr), htons(sk->dport), sk->num, - tp->snd_una, tp->snd_nxt); + NIPQUAD(inet->daddr), htons(inet->dport), + inet->num, tp->snd_una, tp->snd_nxt); + } #endif if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { tcp_write_err(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f43ec7d29215..7cb2d0cee4ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -108,6 +108,8 @@ int udp_port_rover; static int udp_v4_get_port(struct sock *sk, unsigned short snum) { + struct inet_opt *inet = inet_sk(sk); + write_lock_bh(&udp_hash_lock); if (snum == 0) { int best_size_so_far, best, result, i; @@ -118,11 +120,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct sock *sk; + struct sock *sk2; int size; - sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (!sk) { + sk2 = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (!sk2) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -133,7 +135,7 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) do { if (++size >= best_size_so_far) goto next; - } while ((sk = sk->next) != NULL); + } while ((sk2 = sk2->next) != NULL); best_size_so_far = size; best = result; next:; @@ -157,17 +159,19 @@ gotit: for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) { - if (sk2->num == snum && + struct inet_opt *inet2 = inet_sk(sk2); + + if (inet2->num == snum && sk2 != sk && sk2->bound_dev_if == sk->bound_dev_if && - (!sk2->rcv_saddr || - !sk->rcv_saddr || - sk2->rcv_saddr == sk->rcv_saddr) && + (!inet2->rcv_saddr || + !inet->rcv_saddr || + inet2->rcv_saddr == inet->rcv_saddr) && (!sk2->reuse || !sk->reuse)) goto fail; } } - sk->num = snum; + inet->num = snum; if (sk->pprev == NULL) { struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; if ((sk->next = *skp) != NULL) @@ -198,7 +202,7 @@ static void udp_v4_unhash(struct sock *sk) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; - sk->num = 0; + inet_sk(sk)->num = 0; sock_prot_dec_use(sk->prot); __sock_put(sk); } @@ -215,20 +219,22 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i int badness = -1; for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { - if(sk->num == hnum) { + struct inet_opt *inet = inet_sk(sk); + + if (inet->num == hnum) { int score = 0; - if(sk->rcv_saddr) { - if(sk->rcv_saddr != daddr) + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) continue; score++; } - if(sk->daddr) { - if(sk->daddr != saddr) + if (inet->daddr) { + if (inet->daddr != saddr) continue; score++; } - if(sk->dport) { - if(sk->dport != sport) + if (inet->dport) { + if (inet->dport != sport) continue; score++; } @@ -269,10 +275,12 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk, struct sock *s = sk; unsigned short hnum = ntohs(loc_port); for(; s; s = s->next) { - if ((s->num != hnum) || - (s->daddr && s->daddr!=rmt_addr) || - (s->dport != rmt_port && s->dport != 0) || - (s->rcv_saddr && s->rcv_saddr != loc_addr) || + struct inet_opt *inet = inet_sk(s); + + if (inet->num != hnum || + (inet->daddr && inet->daddr != rmt_addr) || + (inet->dport != rmt_port && inet->dport) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || (s->bound_dev_if && s->bound_dev_if != dif)) continue; break; @@ -469,15 +477,15 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len) } else { if (sk->state != TCP_ESTABLISHED) return -ENOTCONN; - ufh.daddr = sk->daddr; - ufh.uh.dest = sk->dport; + ufh.daddr = inet->daddr; + ufh.uh.dest = inet->dport; /* Open fast path for connected socket. Route will not be used, if at least one option is set. */ connected = 1; } - ipc.addr = sk->saddr; - ufh.uh.source = sk->sport; + ipc.addr = inet->saddr; + ufh.uh.source = inet->sport; ipc.opt = NULL; ipc.oif = sk->bound_dev_if; @@ -728,7 +736,7 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); - err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr, + err = ip_route_connect(&rt, usin->sin_addr.s_addr, inet->saddr, RT_CONN_FLAGS(sk), sk->bound_dev_if); if (err) return err; @@ -736,12 +744,12 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ip_rt_put(rt); return -EACCES; } - if(!sk->saddr) - sk->saddr = rt->rt_src; /* Update source address */ - if(!sk->rcv_saddr) - sk->rcv_saddr = rt->rt_src; - sk->daddr = rt->rt_dst; - sk->dport = usin->sin_port; + if (!inet->saddr) + inet->saddr = rt->rt_src; /* Update source address */ + if (!inet->rcv_saddr) + inet->rcv_saddr = rt->rt_src; + inet->daddr = rt->rt_dst; + inet->dport = usin->sin_port; sk->state = TCP_ESTABLISHED; inet->id = jiffies; @@ -751,17 +759,17 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) int udp_disconnect(struct sock *sk, int flags) { + struct inet_opt *inet = inet_sk(sk); /* * 1003.1g - break association. */ sk->state = TCP_CLOSE; - sk->daddr = 0; - sk->dport = 0; + inet->daddr = 0; + inet->dport = 0; sk->bound_dev_if = 0; if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) { - sk->rcv_saddr = 0; - sk->saddr = 0; + inet->rcv_saddr = inet->saddr = 0; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (sk->family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -773,7 +781,7 @@ int udp_disconnect(struct sock *sk, int flags) } if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) { sk->prot->unhash(sk); - sk->sport = 0; + inet->sport = 0; } sk_dst_reset(sk); return 0; @@ -962,13 +970,14 @@ csum_error: static void get_udp_sock(struct sock *sp, char *tmpbuf, int i) { + struct inet_opt *inet = inet_sk(sp); unsigned int dest, src; __u16 destp, srcp; - dest = sp->daddr; - src = sp->rcv_saddr; - destp = ntohs(sp->dport); - srcp = ntohs(sp->sport); + dest = inet->daddr; + src = inet->rcv_saddr; + destp = ntohs(inet->dport); + srcp = ntohs(inet->sport); sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", i, src, srcp, dest, destp, sp->state, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0cc92d8ac246..628bf2fb97c8 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -200,7 +200,7 @@ static int inet6_create(struct socket *sock, int protocol) inet = inet_sk(sk); if (SOCK_RAW == sock->type) { - sk->num = protocol; + inet->num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } @@ -241,12 +241,12 @@ static int inet6_create(struct socket *sock, int protocol) #endif MOD_INC_USE_COUNT; - if (sk->num) { + if (inet->num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically shares. */ - sk->sport = ntohs(sk->num); + inet->sport = ntohs(inet->num); sk->prot->hash(sk); } if (sk->prot->init) { @@ -278,6 +278,7 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); __u32 v4addr = 0; unsigned short snum; @@ -318,8 +319,7 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) lock_sock(sk); /* Check these errors (active socket, double bind). */ - if ((sk->state != TCP_CLOSE) || - (sk->num != 0)) { + if (sk->state != TCP_CLOSE || inet->num) { release_sock(sk); return -EINVAL; } @@ -340,8 +340,8 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } } - sk->rcv_saddr = v4addr; - sk->saddr = v4addr; + inet->rcv_saddr = v4addr; + inet->saddr = v4addr; ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); @@ -350,8 +350,7 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Make sure we are allowed to bind here. */ if (sk->prot->get_port(sk, snum) != 0) { - sk->rcv_saddr = 0; - sk->saddr = 0; + inet->rcv_saddr = inet->saddr = 0; memset(&np->rcv_saddr, 0, sizeof(struct in6_addr)); memset(&np->saddr, 0, sizeof(struct in6_addr)); @@ -363,9 +362,9 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sk->userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->userlocks |= SOCK_BINDPORT_LOCK; - sk->sport = ntohs(sk->num); - sk->dport = 0; - sk->daddr = 0; + inet->sport = ntohs(inet->num); + inet->dport = 0; + inet->daddr = 0; release_sock(sk); return 0; @@ -421,17 +420,18 @@ static int inet6_getname(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; if (peer) { - if (!sk->dport) + if (!inet->dport) return -ENOTCONN; if (((1<state)&(TCPF_CLOSE|TCPF_SYN_SENT)) && peer == 1) return -ENOTCONN; - sin->sin6_port = sk->dport; + sin->sin6_port = inet->dport; memcpy(&sin->sin6_addr, &np->daddr, sizeof(struct in6_addr)); if (np->sndflow) sin->sin6_flowinfo = np->flow_label; @@ -443,7 +443,7 @@ static int inet6_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(&sin->sin6_addr, &np->rcv_saddr, sizeof(struct in6_addr)); - sin->sin6_port = sk->sport; + sin->sin6_port = inet->sport; } if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = sk->bound_dev_if; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 286395420550..808481d98488 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -79,7 +79,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) struct ip6_ra_chain *ra, *new_ra, **rap; /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ - if (sk->type != SOCK_RAW || sk->num != IPPROTO_RAW) + if (sk->type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) return -EINVAL; new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -283,7 +283,7 @@ update: if (opt) { struct tcp_opt *tp = tcp_sk(sk); if (!((1<state)&(TCPF_LISTEN|TCPF_CLOSE)) - && sk->daddr != LOOPBACK4_IPV6) { + && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { tp->ext_header_len = opt->opt_flen + opt->opt_nflen; tcp_sync_mss(sk, tp->pmtu_cookie); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 399eb59ad2ea..2231feb00c2f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -84,58 +84,63 @@ static int pndisc_constructor(struct pneigh_entry *n); static void pndisc_destructor(struct pneigh_entry *n); static void pndisc_redo(struct sk_buff *skb); -static struct neigh_ops ndisc_generic_ops = -{ - AF_INET6, - NULL, - ndisc_solicit, - ndisc_error_report, - neigh_resolve_output, - neigh_connected_output, - dev_queue_xmit, - dev_queue_xmit +static struct neigh_ops ndisc_generic_ops = { + family: AF_INET6, + solicit: ndisc_solicit, + error_report: ndisc_error_report, + output: neigh_resolve_output, + connected_output: neigh_connected_output, + hh_output: dev_queue_xmit, + queue_xmit: dev_queue_xmit, }; -static struct neigh_ops ndisc_hh_ops = -{ - AF_INET6, - NULL, - ndisc_solicit, - ndisc_error_report, - neigh_resolve_output, - neigh_resolve_output, - dev_queue_xmit, - dev_queue_xmit +static struct neigh_ops ndisc_hh_ops = { + family: AF_INET6, + solicit: ndisc_solicit, + error_report: ndisc_error_report, + output: neigh_resolve_output, + connected_output: neigh_resolve_output, + hh_output: dev_queue_xmit, + queue_xmit: dev_queue_xmit, }; -static struct neigh_ops ndisc_direct_ops = -{ - AF_INET6, - NULL, - NULL, - NULL, - dev_queue_xmit, - dev_queue_xmit, - dev_queue_xmit, - dev_queue_xmit +static struct neigh_ops ndisc_direct_ops = { + family: AF_INET6, + output: dev_queue_xmit, + connected_output: dev_queue_xmit, + hh_output: dev_queue_xmit, + queue_xmit: dev_queue_xmit, }; -struct neigh_table nd_tbl = -{ - NULL, - AF_INET6, - sizeof(struct neighbour) + sizeof(struct in6_addr), - sizeof(struct in6_addr), - ndisc_hash, - ndisc_constructor, - pndisc_constructor, - pndisc_destructor, - pndisc_redo, - "ndisc_cache", - { NULL, NULL, &nd_tbl, 0, NULL, NULL, - 30*HZ, 1*HZ, 60*HZ, 30*HZ, 5*HZ, 3, 3, 0, 3, 1*HZ, (8*HZ)/10, 64, 0 }, - 30*HZ, 128, 512, 1024, +struct neigh_table nd_tbl = { + family: AF_INET6, + entry_size: sizeof(struct neighbour) + sizeof(struct in6_addr), + key_len: sizeof(struct in6_addr), + hash: ndisc_hash, + constructor: ndisc_constructor, + pconstructor: pndisc_constructor, + pdestructor: pndisc_destructor, + proxy_redo: pndisc_redo, + id: "ndisc_cache", + parms: { + tbl: &nd_tbl, + base_reachable_time: 30 * HZ, + retrans_time: 1 * HZ, + gc_staletime: 60 * HZ, + reachable_time: 30 * HZ, + delay_probe_time: 5 * HZ, + queue_len: 3, + ucast_probes: 3, + mcast_probes: 3, + anycast_delay: 1 * HZ, + proxy_delay: (8 * HZ) / 10, + proxy_qlen: 64, + }, + gc_interval: 30 * HZ, + gc_thresh1: 128, + gc_thresh2: 512, + gc_thresh3: 1024, }; #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e5745fcd371e..b2255c005693 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -50,7 +50,8 @@ rwlock_t raw_v6_lock = RW_LOCK_UNLOCKED; static void raw_v6_hash(struct sock *sk) { - struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)]; + struct sock **skp = &raw_v6_htable[inet_sk(sk)->num & + (RAWV6_HTABLE_SIZE - 1)]; write_lock_bh(&raw_v6_lock); if ((sk->next = *skp) != NULL) @@ -85,7 +86,7 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, int addr_type = ipv6_addr_type(loc_addr); for(s = sk; s; s = s->next) { - if(s->num == num) { + if (inet_sk(s)->num == num) { struct ipv6_pinfo *np = inet6_sk(s); if (!ipv6_addr_any(&np->daddr) && @@ -186,6 +187,7 @@ out: /* This cleans up af_inet6 a bit. -DaveM */ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; __u32 v4addr = 0; @@ -233,8 +235,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } } - sk->rcv_saddr = v4addr; - sk->saddr = v4addr; + inet->rcv_saddr = inet->saddr = v4addr; ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); if (!(addr_type & IPV6_ADDR_MULTICAST)) ipv6_addr_copy(&np->saddr, &addr->sin6_addr); @@ -439,6 +440,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len) { struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; @@ -478,7 +480,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len) proto = ntohs(sin6->sin6_port); if (!proto) - proto = sk->num; + proto = inet->num; if (proto > 255) return(-EINVAL); @@ -507,7 +509,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len) if (sk->state != TCP_ESTABLISHED) return(-EINVAL); - proto = sk->num; + proto = inet->num; daddr = &np->daddr; fl.fl6_flowlabel = np->flow_label; } @@ -635,7 +637,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, break; case SOL_ICMPV6: - if (sk->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); @@ -678,7 +680,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, break; case SOL_ICMPV6: - if (sk->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); @@ -741,7 +743,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) static void rawv6_close(struct sock *sk, long timeout) { - if (sk->num == IPPROTO_RAW) + if (inet_sk(sk)->num == IPPROTO_RAW) ip6_ra_control(sk, -1, NULL); inet_sock_release(sk); @@ -764,7 +766,7 @@ static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i) dest = &np->daddr; src = &np->rcv_saddr; destp = 0; - srcp = sp->num; + srcp = inet_sk(sp)->num; sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 867cea08a196..7ff24c734dae 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,11 +76,12 @@ static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) { + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *laddr = &np->rcv_saddr; struct in6_addr *faddr = &np->daddr; - __u16 lport = sk->num; - __u16 fport = sk->dport; + __u16 lport = inet->num; + __u16 fport = inet->dport; return tcp_v6_hashfn(laddr, lport, faddr, fport); } @@ -153,14 +154,15 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) !sk2->reuse || sk2->state == TCP_LISTEN) { /* NOTE: IPv6 tw bucket have different format */ - if (!sk2->rcv_saddr || + if (!inet_sk(sk2)->rcv_saddr || addr_type == IPV6_ADDR_ANY || !ipv6_addr_cmp(&np->rcv_saddr, sk2->state != TCP_TIME_WAIT ? &np2->rcv_saddr : &((struct tcp_tw_bucket*)sk)->v6_rcv_saddr) || (addr_type==IPV6_ADDR_MAPPED && sk2->family==AF_INET && - sk->rcv_saddr==sk2->rcv_saddr)) + inet_sk(sk)->rcv_saddr == + inet_sk(sk2)->rcv_saddr)) break; } } @@ -185,7 +187,7 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) tb->fastreuse = 0; success: - sk->num = snum; + inet_sk(sk)->num = snum; if (sk->prev == NULL) { if ((sk->bind_next = tb->owners) != NULL) tb->owners->bind_pprev = &sk->bind_next; @@ -255,7 +257,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor read_lock(&tcp_lhash_lock); sk = tcp_listening_hash[tcp_lhashfn(hnum)]; for(; sk; sk = sk->next) { - if((sk->num == hnum) && (sk->family == PF_INET6)) { + if (inet_sk(sk)->num == hnum && sk->family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); score = 1; @@ -313,9 +315,11 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u } /* Must check for a TIME_WAIT'er before going to listener hash. */ for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next) { - if(*((__u32 *)&(sk->dport)) == ports && + /* FIXME: acme: check this... */ + struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + + if(*((__u32 *)&(tw->dport)) == ports && sk->family == PF_INET6) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; if(!ipv6_addr_cmp(&tw->v6_daddr, saddr) && !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) && (!sk->bound_dev_if || sk->bound_dev_if == dif)) @@ -424,12 +428,13 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) static int tcp_v6_check_established(struct sock *sk) { + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr = &np->rcv_saddr; struct in6_addr *saddr = &np->daddr; int dif = sk->bound_dev_if; - u32 ports = TCP_COMBINED_PORTS(sk->dport, sk->num); - int hash = tcp_v6_hashfn(daddr, sk->num, saddr, sk->dport); + u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num); + int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); struct tcp_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2, **skp; struct tcp_tw_bucket *tw; @@ -439,7 +444,7 @@ static int tcp_v6_check_established(struct sock *sk) for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp)!=NULL; skp = &sk2->next) { tw = (struct tcp_tw_bucket*)sk2; - if(*((__u32 *)&(sk2->dport)) == ports && + if(*((__u32 *)&(tw->dport)) == ports && sk2->family == PF_INET6 && !ipv6_addr_cmp(&tw->v6_daddr, saddr) && !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) && @@ -496,7 +501,7 @@ not_unique: static int tcp_v6_hash_connecting(struct sock *sk) { - unsigned short snum = sk->num; + unsigned short snum = inet_sk(sk)->num; struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(snum)]; struct tcp_bind_bucket *tb = head->chain; @@ -522,6 +527,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct in6_addr *saddr = NULL; @@ -618,9 +624,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } else { ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), - sk->saddr); + inet->saddr); ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), - sk->rcv_saddr); + inet->rcv_saddr); } return err; @@ -634,7 +640,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.fl6_src = saddr; fl.oif = sk->bound_dev_if; fl.uli_u.ports.dport = usin->sin6_port; - fl.uli_u.ports.sport = sk->sport; + fl.uli_u.ports.sport = inet->sport; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -662,7 +668,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, /* set the source address */ ipv6_addr_copy(&np->rcv_saddr, saddr); ipv6_addr_copy(&np->saddr, saddr); - sk->rcv_saddr= LOOPBACK4_IPV6; + inet->rcv_saddr = LOOPBACK4_IPV6; tp->ext_header_len = 0; if (np->opt) @@ -675,7 +681,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (buff == NULL) goto failure; - sk->dport = usin->sin6_port; + inet->dport = usin->sin6_port; /* * Init variables @@ -684,7 +690,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!tp->write_seq) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, - sk->sport, sk->dport); + inet->sport, + inet->dport); err = tcp_connect(sk, buff); if (err == 0) @@ -692,7 +699,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, failure: __sk_dst_reset(sk); - sk->dport = 0; + inet->dport = 0; sk->route_caps = 0; return err; } @@ -750,6 +757,7 @@ void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { + struct inet_opt *inet = inet_sk(sk); struct flowi fl; /* BUGGG_FUTURE: Again, it is not clear how @@ -760,8 +768,8 @@ void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.nl_u.ip6_u.daddr = &np->daddr; fl.nl_u.ip6_u.saddr = &np->saddr; fl.oif = sk->bound_dev_if; - fl.uli_u.ports.dport = sk->dport; - fl.uli_u.ports.sport = sk->sport; + fl.uli_u.ports.dport = inet->dport; + fl.uli_u.ports.sport = inet->sport; dst = ip6_route_output(sk, &fl); } else @@ -850,7 +858,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req, fl.fl6_flowlabel = 0; fl.oif = req->af.v6_req.iif; fl.uli_u.ports.dport = req->rmt_port; - fl.uli_u.ports.sport = sk->sport; + fl.uli_u.ports.sport = inet_sk(sk)->sport; if (dst == NULL) { opt = np->opt; @@ -1245,14 +1253,15 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (newsk == NULL) return NULL; + newinet = inet_sk(newsk); newnp = inet6_sk(newsk); newtp = tcp_sk(newsk); ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF), - newsk->daddr); + newinet->daddr); ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF), - newsk->saddr); + newinet->saddr); ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); @@ -1303,7 +1312,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.fl6_flowlabel = 0; fl.oif = sk->bound_dev_if; fl.uli_u.ports.dport = req->rmt_port; - fl.uli_u.ports.sport = sk->sport; + fl.uli_u.ports.sport = inet_sk(sk)->sport; dst = ip6_route_output(sk, &fl); } @@ -1376,9 +1385,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = dst->advmss; tcp_initialize_rcv_mss(newsk); - newsk->daddr = LOOPBACK4_IPV6; - newsk->saddr = LOOPBACK4_IPV6; - newsk->rcv_saddr = LOOPBACK4_IPV6; + newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; __tcp_v6_hash(newsk); tcp_inherit_port(sk, newsk); @@ -1680,6 +1687,7 @@ static int tcp_v6_rebuild_header(struct sock *sk) dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { + struct inet_opt *inet = inet_sk(sk); struct flowi fl; fl.proto = IPPROTO_TCP; @@ -1687,8 +1695,8 @@ static int tcp_v6_rebuild_header(struct sock *sk) fl.nl_u.ip6_u.saddr = &np->saddr; fl.fl6_flowlabel = np->flow_label; fl.oif = sk->bound_dev_if; - fl.uli_u.ports.dport = sk->dport; - fl.uli_u.ports.sport = sk->sport; + fl.uli_u.ports.dport = inet->dport; + fl.uli_u.ports.sport = inet->sport; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; @@ -1714,6 +1722,7 @@ static int tcp_v6_rebuild_header(struct sock *sk) static int tcp_v6_xmit(struct sk_buff *skb) { struct sock *sk = skb->sk; + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; struct dst_entry *dst; @@ -1724,8 +1733,8 @@ static int tcp_v6_xmit(struct sk_buff *skb) fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); fl.oif = sk->bound_dev_if; - fl.uli_u.ports.sport = sk->sport; - fl.uli_u.ports.dport = sk->dport; + fl.uli_u.ports.sport = inet->sport; + fl.uli_u.ports.dport = inet->dport; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; @@ -1761,7 +1770,7 @@ static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) sin6->sin6_family = AF_INET6; memcpy(&sin6->sin6_addr, &np->daddr, sizeof(struct in6_addr)); - sin6->sin6_port = sk->dport; + sin6->sin6_port = inet_sk(sk)->dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; @@ -1903,7 +1912,7 @@ static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], - ntohs(sk->sport), + ntohs(inet_sk(sk)->sport), dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], ntohs(req->rmt_port), @@ -1924,13 +1933,14 @@ static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i) __u16 destp, srcp; int timer_active; unsigned long timer_expires; + struct inet_opt *inet = inet_sk(sp); struct tcp_opt *tp = tcp_sk(sp); struct ipv6_pinfo *np = inet6_sk(sp); dest = &np->daddr; src = &np->rcv_saddr; - destp = ntohs(sp->dport); - srcp = ntohs(sp->sport); + destp = ntohs(inet->dport); + srcp = ntohs(inet->sport); if (tp->pending == TCP_TIME_RETRANS) { timer_active = 1; timer_expires = tp->timeout; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c28551fd6078..a8e89f16dc2c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -65,11 +65,11 @@ static int udp_v6_get_port(struct sock *sk, unsigned short snum) best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct sock *sk; + struct sock *sk2; int size; - sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (!sk) { + sk2 = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (!sk2) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -80,7 +80,7 @@ static int udp_v6_get_port(struct sock *sk, unsigned short snum) do { if (++size >= best_size_so_far) goto next; - } while ((sk = sk->next) != NULL); + } while ((sk2 = sk2->next) != NULL); best_size_so_far = size; best = result; next:; @@ -104,23 +104,24 @@ gotit: for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) { + struct inet_opt *inet2 = inet_sk(sk2); struct ipv6_pinfo *np2 = inet6_sk(sk2); - if (sk2->num == snum && + if (inet2->num == snum && sk2 != sk && sk2->bound_dev_if == sk->bound_dev_if && - (!sk2->rcv_saddr || + (!inet2->rcv_saddr || addr_type == IPV6_ADDR_ANY || !ipv6_addr_cmp(&np->rcv_saddr, &np2->rcv_saddr) || (addr_type == IPV6_ADDR_MAPPED && sk2->family == AF_INET && - sk->rcv_saddr == sk2->rcv_saddr)) && + inet_sk(sk)->rcv_saddr == inet2->rcv_saddr)) && (!sk2->reuse || !sk->reuse)) goto fail; } } - sk->num = snum; + inet_sk(sk)->num = snum; if (sk->pprev == NULL) { struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; if ((sk->next = *skp) != NULL) @@ -151,7 +152,7 @@ static void udp_v6_unhash(struct sock *sk) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; - sk->num = 0; + inet_sk(sk)->num = 0; sock_prot_dec_use(sk->prot); __sock_put(sk); } @@ -167,12 +168,13 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, read_lock(&udp_hash_lock); for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { - if((sk->num == hnum) && - (sk->family == PF_INET6)) { + struct inet_opt *inet = inet_sk(sk); + + if (inet->num == hnum && sk->family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); int score = 0; - if(sk->dport) { - if(sk->dport != sport) + if (inet->dport) { + if (inet->dport != sport) continue; score++; } @@ -213,6 +215,7 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr; struct in6_addr saddr; @@ -268,16 +271,16 @@ ipv4_connected: if (err < 0) return err; - ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), sk->daddr); + ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr); if (ipv6_addr_any(&np->saddr)) { ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff), - sk->saddr); + inet->saddr); } if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff), - sk->rcv_saddr); + inet->rcv_saddr); } return 0; } @@ -300,7 +303,7 @@ ipv4_connected: ipv6_addr_copy(&np->daddr, daddr); np->flow_label = fl.fl6_flowlabel; - sk->dport = usin->sin6_port; + inet->dport = usin->sin6_port; /* * Check for a route to destination an obtain the @@ -311,8 +314,8 @@ ipv4_connected: fl.fl6_dst = &np->daddr; fl.fl6_src = &saddr; fl.oif = sk->bound_dev_if; - fl.uli_u.ports.dport = sk->dport; - fl.uli_u.ports.sport = sk->sport; + fl.uli_u.ports.dport = inet->dport; + fl.uli_u.ports.sport = inet->sport; if (flowlabel) { if (flowlabel->opt && flowlabel->opt->srcrt) { @@ -344,7 +347,7 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &saddr); - sk->rcv_saddr = LOOPBACK4_IPV6; + inet->rcv_saddr = LOOPBACK4_IPV6; } sk->state = TCP_ESTABLISHED; } @@ -528,10 +531,12 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, struct sock *s = sk; unsigned short num = ntohs(loc_port); for(; s; s = s->next) { - if(s->num == num) { + struct inet_opt *inet = inet_sk(s); + + if (inet->num == num) { struct ipv6_pinfo *np = inet6_sk(s); - if(s->dport) { - if(s->dport != rmt_port) + if (inet->dport) { + if (inet->dport != rmt_port) continue; } if (!ipv6_addr_any(&np->daddr) && @@ -757,6 +762,7 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) { struct ipv6_txoptions opt_space; struct udpv6fakehdr udh; + struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; struct ipv6_txoptions *opt = NULL; @@ -818,7 +824,7 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) if (sk->state != TCP_ESTABLISHED) return -ENOTCONN; - udh.uh.dest = sk->dport; + udh.uh.dest = inet->dport; daddr = &np->daddr; fl.fl6_flowlabel = np->flow_label; } @@ -867,7 +873,7 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) if (opt && opt->srcrt) udh.daddr = daddr; - udh.uh.source = sk->sport; + udh.uh.source = inet->sport; udh.uh.len = len < 0x10000 ? htons(len) : 0; udh.uh.check = 0; udh.iov = msg->msg_iov; @@ -905,14 +911,15 @@ static struct inet6_protocol udpv6_protocol = { static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i) { + struct inet_opt *inet = inet_sk(sp); struct ipv6_pinfo *np = inet6_sk(sp); struct in6_addr *dest, *src; __u16 destp, srcp; dest = &np->daddr; src = &np->rcv_saddr; - destp = ntohs(sp->dport); - srcp = ntohs(sp->sport); + destp = ntohs(inet->dport); + srcp = ntohs(inet->sport); sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 76c50e67866e..84e1262d6660 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -180,6 +180,7 @@ struct packet_opt spinlock_t bind_lock; char running; /* prot_hook is attached*/ int ifindex; /* bound device */ + unsigned short num; struct tpacket_stats stats; #ifdef CONFIG_PACKET_MULTICAST struct packet_mclist *mclist; @@ -678,8 +679,10 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len, */ if (saddr == NULL) { - ifindex = pkt_sk(sk)->ifindex; - proto = sk->num; + struct packet_opt *po = pkt_sk(sk); + + ifindex = po->ifindex; + proto = po->num; addr = NULL; } else { err = -EINVAL; @@ -839,7 +842,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol) po->running = 0; } - sk->num = protocol; + po->num = protocol; po->prot_hook.type = protocol; po->prot_hook.dev = dev; @@ -894,7 +897,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add dev = dev_get_by_name(name); if (dev) { - err = packet_do_bind(sk, dev, sk->num); + err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); } return err; @@ -924,7 +927,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (dev == NULL) goto out; } - err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num); + err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); if (dev) dev_put(dev); @@ -972,7 +975,7 @@ static int packet_create(struct socket *sock, int protocol) goto out_free; memset(po, 0, sizeof(*po)); sk->family = PF_PACKET; - sk->num = protocol; + po->num = protocol; sk->destruct = packet_sock_destruct; atomic_inc(&packet_socks_nr); @@ -1131,7 +1134,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; - sll->sll_protocol = sk->num; + sll->sll_protocol = po->num; dev = dev_get_by_index(po->ifindex); if (dev) { sll->sll_hatype = dev->type; @@ -1410,7 +1413,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void break; case NETDEV_UP: spin_lock(&po->bind_lock); - if (dev->ifindex == po->ifindex && sk->num && po->running==0) { + if (dev->ifindex == po->ifindex && po->num && + !po->running) { dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; @@ -1861,7 +1865,7 @@ static int packet_read_proc(char *buffer, char **start, off_t offset, s, atomic_read(&s->refcnt), s->type, - ntohs(s->num), + ntohs(po->num), po->ifindex, po->running, atomic_read(&s->rmem_alloc), diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 919a095f6953..31200f4283c6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1161,7 +1161,8 @@ if (svsk->sk_sk == NULL) /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, inet->protocol, ntohs(inet->sport)); + *errp = svc_register(serv, inet->protocol, + ntohs(inet_sk(inet)->sport)); if (*errp < 0) { inet->user_data = NULL; -- cgit v1.2.3 From fc3b43ea7743b991dea4f2acb7ebe5709864fd6d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 13:50:45 -0800 Subject: Remove bogus tw->tb NULL check in tcp_timewait_kill. This is what made the following bug harder to find: Put new timewait buckets into the bind hash _FIRST_ before they appear into the established hash to kill some races with socket creation/lookup. --- net/ipv4/tcp_minisocks.c | 53 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 16ddbb124bd2..318bceafa44e 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -75,17 +75,16 @@ void tcp_timewait_kill(struct tcp_tw_bucket *tw) /* Disassociate with bind bucket. */ bhead = &tcp_bhash[tcp_bhashfn(tw->num)]; spin_lock(&bhead->lock); - if ((tb = tw->tb) != NULL) { - if(tw->bind_next) - tw->bind_next->bind_pprev = tw->bind_pprev; - *(tw->bind_pprev) = tw->bind_next; - tw->tb = NULL; - if (tb->owners == NULL) { - if (tb->next) - tb->next->pprev = tb->pprev; - *(tb->pprev) = tb->next; - kmem_cache_free(tcp_bucket_cachep, tb); - } + tb = tw->tb; + if(tw->bind_next) + tw->bind_next->bind_pprev = tw->bind_pprev; + *(tw->bind_pprev) = tw->bind_next; + tw->tb = NULL; + if (tb->owners == NULL) { + if (tb->next) + tb->next->pprev = tb->pprev; + *(tb->pprev) = tb->next; + kmem_cache_free(tcp_bucket_cachep, tb); } spin_unlock(&bhead->lock); @@ -304,9 +303,23 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) struct tcp_bind_hashbucket *bhead; struct sock **head, *sktw; + /* Step 1: Put TW into bind hash. Original socket stays there too. + Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in + binding cache, even if it is closed. + */ + bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; + spin_lock(&bhead->lock); + tw->tb = (struct tcp_bind_bucket *)sk->prev; + BUG_TRAP(sk->prev!=NULL); + if ((tw->bind_next = tw->tb->owners) != NULL) + tw->tb->owners->bind_pprev = &tw->bind_next; + tw->tb->owners = (struct sock*)tw; + tw->bind_pprev = &tw->tb->owners; + spin_unlock(&bhead->lock); + write_lock(&ehead->lock); - /* Step 1: Remove SK from established hash. */ + /* Step 2: Remove SK from established hash. */ if (sk->pprev) { if(sk->next) sk->next->pprev = sk->pprev; @@ -315,7 +328,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) sock_prot_dec_use(sk->prot); } - /* Step 2: Hash TW into TIMEWAIT half of established hash table. */ + /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ head = &(ehead + tcp_ehash_size)->chain; sktw = (struct sock *)tw; if((sktw->next = *head) != NULL) @@ -325,20 +338,6 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) atomic_inc(&tw->refcnt); write_unlock(&ehead->lock); - - /* Step 3: Put TW into bind hash. Original socket stays there too. - Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in - binding cache, even if it is closed. - */ - bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; - spin_lock(&bhead->lock); - tw->tb = (struct tcp_bind_bucket *)sk->prev; - BUG_TRAP(sk->prev!=NULL); - if ((tw->bind_next = tw->tb->owners) != NULL) - tw->tb->owners->bind_pprev = &tw->bind_next; - tw->tb->owners = (struct sock*)tw; - tw->bind_pprev = &tw->tb->owners; - spin_unlock(&bhead->lock); } /* -- cgit v1.2.3 From f5ead92b654b4f646bc06d02384ee8bb3e823af8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 13:54:19 -0800 Subject: Avoid using read/modify/write cycles to set frag_off field of IPv4 header in hot paths. Use __constant_htons as appropriate. --- net/ipv4/ip_output.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b5d4485d08f2..3d4c5c7ea391 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -135,9 +135,10 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->version = 4; iph->ihl = 5; iph->tos = inet->tos; - iph->frag_off = 0; if (ip_dont_fragment(sk, &rt->u.dst)) - iph->frag_off |= htons(IP_DF); + iph->frag_off = __constant_htons(IP_DF); + else + iph->frag_off = 0; iph->ttl = inet->ttl; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; @@ -308,9 +309,6 @@ static inline int ip_queue_xmit2(struct sk_buff *skb) if (skb->len > rt->u.dst.pmtu) goto fragment; - if (ip_dont_fragment(sk, &rt->u.dst)) - iph->frag_off |= __constant_htons(IP_DF); - ip_select_ident(iph, &rt->u.dst, sk); /* Add an IP checksum. */ @@ -324,7 +322,6 @@ fragment: /* Reject packet ONLY if TCP might fragment * it itself, if were careful enough. */ - iph->frag_off |= __constant_htons(IP_DF); NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big to self\n")); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, @@ -385,7 +382,10 @@ packet_routed: iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); iph->tot_len = htons(skb->len); - iph->frag_off = 0; + if (ip_dont_fragment(sk, &rt->u.dst)) + iph->frag_off = __constant_htons(IP_DF); + else + iph->frag_off = 0; iph->ttl = inet->ttl; iph->protocol = sk->protocol; iph->saddr = rt->rt_src; @@ -452,7 +452,7 @@ static int ip_build_xmit_slow(struct sock *sk, mtu = rt->u.dst.pmtu; if (ip_dont_fragment(sk, &rt->u.dst)) - df = htons(IP_DF); + df = __constant_htons(IP_DF); length -= sizeof(struct iphdr); @@ -573,7 +573,7 @@ static int ip_build_xmit_slow(struct sock *sk, /* * Any further fragments will have MF set. */ - mf = htons(IP_MF); + mf = __constant_htons(IP_MF); } if (rt->rt_type == RTN_MULTICAST) iph->ttl = inet->mc_ttl; @@ -672,7 +672,7 @@ int ip_build_xmit(struct sock *sk, */ df = 0; if (ip_dont_fragment(sk, &rt->u.dst)) - df = htons(IP_DF); + df = __constant_htons(IP_DF); /* * Fast path for unfragmented frames without options. @@ -776,7 +776,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) */ offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; - not_last_frag = iph->frag_off & htons(IP_MF); + not_last_frag = iph->frag_off & __constant_htons(IP_MF); /* * Keep copying data until we run out. @@ -861,7 +861,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) * last fragment then keep MF on each bit */ if (left > 0 || not_last_frag) - iph->frag_off |= htons(IP_MF); + iph->frag_off |= __constant_htons(IP_MF); ptr += len; offset += len; -- cgit v1.2.3 From b786a0802d70e7b3963bd49b95be503e368cc9ef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 13:56:00 -0800 Subject: Do not report inode numbers as negative in networking procfs nodes --- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/unix/af_unix.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1c798bdd9414..107267695d83 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -638,7 +638,7 @@ static void get_raw_sock(struct sock *sp, char *tmpbuf, int i) srcp = inet->num; sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", i, src, srcp, dest, destp, sp->state, atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc), 0, 0L, 0, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7cb2d0cee4ee..b0b616b52a12 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -979,7 +979,7 @@ static void get_udp_sock(struct sock *sp, char *tmpbuf, int i) destp = ntohs(inet->dport); srcp = ntohs(inet->sport); sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", i, src, srcp, dest, destp, sp->state, atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc), 0, 0L, 0, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b2255c005693..7748ced7e14d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -769,7 +769,7 @@ static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i) srcp = inet_sk(sp)->num; sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a8e89f16dc2c..a895089358aa 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -922,7 +922,7 @@ static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i) srcp = ntohs(inet->sport); sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 53f3e78cd2cf..b1a27f803d3d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1767,7 +1767,7 @@ static int unix_read_proc(char *buffer, char **start, off_t offset, struct unix_sock *u = unix_sk(s); unix_state_rlock(s); - len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld", + len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5lu", s, atomic_read(&s->refcnt), 0, -- cgit v1.2.3 From 0c2ff483e49250eacb8ad6ba5306970d84afc2b8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 13:58:24 -0800 Subject: Export ip_net_protocol_{register,unregister} and ip_nat_used_tuple. Minor cleanups to conntrack/irc modules/configuration. From Harald Welte and the netfilter team. --- include/linux/netfilter_ipv4/ip_conntrack.h | 5 ----- net/ipv4/netfilter/Config.in | 4 +--- net/ipv4/netfilter/Makefile | 6 ++---- net/ipv4/netfilter/ip_nat_standalone.c | 3 +++ 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index f0e8b9927eab..107cce0c67a3 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -82,10 +82,7 @@ struct ip_conntrack_expect #endif #include - -#if defined(CONFIG_IP_NF_IRC) || defined(CONFIG_IP_NF_IRC_MODULE) #include -#endif struct ip_conntrack { @@ -125,9 +122,7 @@ struct ip_conntrack union { struct ip_ct_ftp ct_ftp_info; -#if defined(CONFIG_IP_NF_IRC) || defined(CONFIG_IP_NF_IRC_MODULE) struct ip_ct_irc ct_irc_info; -#endif } help; #ifdef CONFIG_IP_NF_NAT_NEEDED diff --git a/net/ipv4/netfilter/Config.in b/net/ipv4/netfilter/Config.in index ec3b4e0de22f..47c703c34ddc 100644 --- a/net/ipv4/netfilter/Config.in +++ b/net/ipv4/netfilter/Config.in @@ -75,9 +75,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then dep_tristate ' MARK target support' CONFIG_IP_NF_TARGET_MARK $CONFIG_IP_NF_MANGLE fi dep_tristate ' LOG target support' CONFIG_IP_NF_TARGET_LOG $CONFIG_IP_NF_IPTABLES - if [ "$CONFIG_NETLINK" != "n" ]; then - dep_tristate ' ULOG target support' CONFIG_IP_NF_TARGET_ULOG $CONFIG_NETLINK $CONFIG_IP_NF_IPTABLES - fi + dep_tristate ' ULOG target support' CONFIG_IP_NF_TARGET_ULOG $CONFIG_IP_NF_IPTABLES dep_tristate ' TCPMSS target support' CONFIG_IP_NF_TARGET_TCPMSS $CONFIG_IP_NF_IPTABLES fi diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index a3d4e39fcac0..6e5a8a1cc0b7 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -31,15 +31,13 @@ ipchains-objs := $(ip_nf_compat-objs) ipchains_core.o # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o -# IRC support -obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o -obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o - # connection tracking helpers obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o +obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o # NAT helpers obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o +obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 6e774c653626..b36339d2bc3f 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -308,6 +308,8 @@ module_init(init); module_exit(fini); EXPORT_SYMBOL(ip_nat_setup_info); +EXPORT_SYMBOL(ip_nat_protocol_register); +EXPORT_SYMBOL(ip_nat_protocol_unregister); EXPORT_SYMBOL(ip_nat_helper_register); EXPORT_SYMBOL(ip_nat_helper_unregister); EXPORT_SYMBOL(ip_nat_expect_register); @@ -316,4 +318,5 @@ EXPORT_SYMBOL(ip_nat_cheat_check); EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); EXPORT_SYMBOL(ip_nat_seq_adjust); EXPORT_SYMBOL(ip_nat_delete_sack); +EXPORT_SYMBOL(ip_nat_used_tuple); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From c6892f5db683f8f8a234cd1244fcfaffa6701a6e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 13:59:57 -0800 Subject: Never set IP_DF for ICMP packets. --- net/ipv4/netfilter/ipt_REJECT.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 04caebcdb4b0..eaec9408ac13 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -234,11 +234,8 @@ static void send_unreach(struct sk_buff *skb_in, int code) iph->tos=tos; iph->tot_len = htons(length); - /* This abbreviates icmp->send->ip_build_xmit->ip_dont_fragment */ - if (!ipv4_config.no_pmtu_disc - && !(rt->u.dst.mxlock&(1<frag_off = htons(IP_DF); - else iph->frag_off = 0; + /* PMTU discovery never applies to ICMP packets. */ + iph->frag_off = 0; iph->ttl = MAXTTL; ip_select_ident(iph, &rt->u.dst, NULL); -- cgit v1.2.3 From 44303b296c993f2ac099065538becb854a843e52 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 14:00:51 -0800 Subject: RED packet scheduler bug fixes from Jamal Hadi Salim. --- net/sched/sch_gred.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 6778b3697653..dfd521b4bec6 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -7,7 +7,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Authors: J Hadi Salim (hadi@nortelnetworks.com) 1998,1999 + * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 * * 991129: - Bug fix with grio mode * - a better sing. AvgQ mode with Grio(WRED) @@ -436,7 +436,7 @@ static int gred_change(struct Qdisc *sch, struct rtattr *opt) if (table->tab[table->def] == NULL) { table->tab[table->def]= kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL); - if (NULL == table->tab[ctl->DP]) + if (NULL == table->tab[table->def]) return -ENOMEM; memset(table->tab[table->def], 0, @@ -498,7 +498,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) { unsigned long qave; struct rtattr *rta; - struct tc_gred_qopt *opt; + struct tc_gred_qopt *opt = NULL ; struct tc_gred_qopt *dst; struct gred_sched *table = (struct gred_sched *)sch->data; struct gred_sched_data *q; @@ -520,7 +520,6 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (!table->initd) { DPRINTK("NO GRED Queues setup!\n"); - return -1; } for (i=0;irta_len = skb->tail - b; + kfree(opt); return skb->len; rtattr_failure: + if (opt) + kfree(opt); DPRINTK("gred_dump: FAILURE!!!!\n"); /* also free the opt struct here */ -- cgit v1.2.3 From 0e28f14f80bbd210de2f73a0330564c109cbe45a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 14:10:37 -0800 Subject: Make sock_writeable (used mostly by datagram protocols) more reasonable. Kill all references to SOCK_MIN_WRITE_SPACE and kill its definition. Replace with appropriate sock_writeable calls. --- include/net/sock.h | 5 +---- net/irda/af_irda.c | 6 +++--- net/sunrpc/xprt.c | 8 ++------ 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 4c5d7c257d75..d50815f55ff3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -750,16 +750,13 @@ static inline void sk_wake_async(struct sock *sk, int how, int band) #define SOCK_MIN_SNDBUF 2048 #define SOCK_MIN_RCVBUF 256 -/* Must be less or equal SOCK_MIN_SNDBUF */ -#define SOCK_MIN_WRITE_SPACE SOCK_MIN_SNDBUF /* * Default write policy as shown to user space via poll/select/SIGIO - * Kernel internally doesn't use the MIN_WRITE_SPACE threshold. */ static inline int sock_writeable(struct sock *sk) { - return sock_wspace(sk) >= SOCK_MIN_WRITE_SPACE; + return atomic_read(&sk->wmem_alloc) < (sk->sndbuf / 2); } static inline int gfp_any(void) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 7ac7c5249732..2110b9b3a558 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1700,7 +1700,7 @@ static unsigned int irda_poll(struct file * file, struct socket *sock, if (sk->state == TCP_ESTABLISHED) { if ((self->tx_flow == FLOW_START) && - (sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= SOCK_MIN_WRITE_SPACE)) + sock_writeable(sk)) { mask |= POLLOUT | POLLWRNORM | POLLWRBAND; } @@ -1708,13 +1708,13 @@ static unsigned int irda_poll(struct file * file, struct socket *sock, break; case SOCK_SEQPACKET: if ((self->tx_flow == FLOW_START) && - (sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= SOCK_MIN_WRITE_SPACE)) + sock_writeable(sk)) { mask |= POLLOUT | POLLWRNORM | POLLWRBAND; } break; case SOCK_DGRAM: - if (sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= SOCK_MIN_WRITE_SPACE) + if (sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; break; default: diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3c429ef6ca93..0751536f7828 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -67,9 +67,6 @@ #include -/* Following value should be > 32k + RPC overhead */ -#define XPRT_MIN_WRITE_SPACE (35000 + SOCK_MIN_WRITE_SPACE) - extern spinlock_t rpc_queue_lock; /* @@ -1099,9 +1096,8 @@ udp_write_space(struct sock *sk) if (xprt->shutdown) return; - - /* Wait until we have enough socket memory */ - if (sock_wspace(sk) < min_t(int, sk->sndbuf,XPRT_MIN_WRITE_SPACE)) + /* Wait until we have enough socket memory. */ + if (sock_writeable(sk)) return; if (!xprt_test_and_set_wspace(xprt)) { -- cgit v1.2.3 From c41cbcb782bb220696c1c49b2544cac7db596fcd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 14:13:40 -0800 Subject: sock_register inet6_family_ops before we do init calls which might try to create ipv6 sockets. --- net/ipv6/af_inet6.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 628bf2fb97c8..4cceee46c88f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -675,6 +675,11 @@ static int __init inet6_init(void) */ inet6_register_protosw(&rawv6_protosw); + /* Register the family here so that the init calls below will + * be able to create sockets. (?? is this dangerous ??) + */ + (void) sock_register(&inet6_family_ops); + /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -719,9 +724,6 @@ static int __init inet6_init(void) udpv6_init(); tcpv6_init(); - /* Now the userspace is allowed to create INET6 sockets. */ - (void) sock_register(&inet6_family_ops); - return 0; #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 04c1e5a1457257b3325e0510cebedebb9b054a19 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Mar 2002 14:17:34 -0800 Subject: Major revamp of VLAN layer: 1) Add hw acceleration hooks for device drivers. 2) Move private declarations out of public includes. 3) Mark file local functions and data as static. 4) Use a small hash table for VLAN group lookups. 5) Correct all the locking and device ref counting. 6) No longer mark it as CONFIG_EXPERIMENTAL. --- include/linux/if_vlan.h | 155 +++++++--------- include/linux/netdevice.h | 12 ++ net/8021q/vlan.c | 440 ++++++++++++++++++++++++++++------------------ net/8021q/vlan.h | 48 ++++- net/8021q/vlan_dev.c | 316 ++++++++++++++++++++------------- net/8021q/vlanproc.c | 66 +++---- net/Config.in | 4 +- 7 files changed, 604 insertions(+), 437 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index d1d8425b181c..99d7f1663f30 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -52,70 +52,16 @@ struct vlan_hdr { unsigned short h_vlan_encapsulated_proto; /* packet type ID field (or len) */ }; -/* Find a VLAN device by the MAC address of it's Ethernet device, and - * it's VLAN ID. The default configuration is to have VLAN's scope - * to be box-wide, so the MAC will be ignored. The mac will only be - * looked at if we are configured to have a seperate set of VLANs per - * each MAC addressable interface. Note that this latter option does - * NOT follow the spec for VLANs, but may be useful for doing very - * large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs. - */ -struct net_device *find_802_1Q_vlan_dev(struct net_device* real_dev, - unsigned short VID); /* vlan.c */ +#define VLAN_VID_MASK 0xfff /* found in af_inet.c */ extern int (*vlan_ioctl_hook)(unsigned long arg); -/* found in vlan_dev.c */ -struct net_device_stats* vlan_dev_get_stats(struct net_device* dev); -int vlan_dev_rebuild_header(struct sk_buff *skb); -int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype); -int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, - unsigned len); -int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); -int vlan_dev_change_mtu(struct net_device *dev, int new_mtu); -int vlan_dev_set_mac_address(struct net_device *dev, void* addr); -int vlan_dev_open(struct net_device* dev); -int vlan_dev_stop(struct net_device* dev); -int vlan_dev_init(struct net_device* dev); -void vlan_dev_destruct(struct net_device* dev); -void vlan_dev_copy_and_sum(struct sk_buff *dest, unsigned char *src, - int length, int base); -int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); -int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); -int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val); - -/* VLAN multicast stuff */ -/* Delete all of the MC list entries from this vlan device. Also deals - * with the underlying device... - */ -void vlan_flush_mc_list(struct net_device* dev); -/* copy the mc_list into the vlan_info structure. */ -void vlan_copy_mc_list(struct dev_mc_list* mc_list, struct vlan_dev_info* vlan_info); -/** dmi is a single entry into a dev_mc_list, a single node. mc_list is - * an entire list, and we'll iterate through it. - */ -int vlan_should_add_mc(struct dev_mc_list *dmi, struct dev_mc_list *mc_list); -/** Taken from Gleb + Lennert's VLAN code, and modified... */ -void vlan_dev_set_multicast_list(struct net_device *vlan_dev); - -int vlan_collection_add_vlan(struct vlan_collection* vc, unsigned short vlan_id, - unsigned short flags); -int vlan_collection_remove_vlan(struct vlan_collection* vc, - struct net_device* vlan_dev); -int vlan_collection_remove_vlan_id(struct vlan_collection* vc, unsigned short vlan_id); - -/* found in vlan.c */ -/* Our listing of VLAN group(s) */ -extern struct vlan_group* p802_1Q_vlan_list; - #define VLAN_NAME "vlan" /* if this changes, algorithm will have to be reworked because this * depends on completely exhausting the VLAN identifier space. Thus - * it gives constant time look-up, but it many cases it wastes memory. + * it gives constant time look-up, but in many cases it wastes memory. */ #define VLAN_GROUP_ARRAY_LEN 4096 @@ -170,56 +116,73 @@ struct vlan_dev_info { /* inline functions */ -/* Used in vlan_skb_recv */ -static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) +static inline struct net_device_stats *vlan_dev_get_stats(struct net_device *dev) { - if (VLAN_DEV_INFO(skb->dev)->flags & 1) { - skb = skb_share_check(skb, GFP_ATOMIC); - if (skb) { - /* Lifted from Gleb's VLAN code... */ - memmove(skb->data - ETH_HLEN, - skb->data - VLAN_ETH_HLEN, 12); - skb->mac.raw += VLAN_HLEN; - } - } - - return skb; + return &(VLAN_DEV_INFO(dev)->dev_stats); } -static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev, - struct sk_buff* skb) +static inline __u32 vlan_get_ingress_priority(struct net_device *dev, + unsigned short vlan_tag) { - struct vlan_priority_tci_mapping *mp = - VLAN_DEV_INFO(dev)->egress_priority_map[(skb->priority & 0xF)]; - - while (mp) { - if (mp->priority == skb->priority) { - return mp->vlan_qos; /* This should already be shifted to mask - * correctly with the VLAN's TCI - */ - } - mp = mp->next; - } - return 0; -} + struct vlan_dev_info *vip = VLAN_DEV_INFO(dev); -static inline int vlan_dmi_equals(struct dev_mc_list *dmi1, - struct dev_mc_list *dmi2) -{ - return ((dmi1->dmi_addrlen == dmi2->dmi_addrlen) && - (memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0)); + return vip->ingress_priority_map[(vlan_tag >> 13) & 0x7]; } -static inline void vlan_destroy_mc_list(struct dev_mc_list *mc_list) +/* VLAN tx hw acceleration helpers. */ +struct vlan_skb_tx_cookie { + u32 magic; + u32 vlan_tag; +}; + +#define VLAN_TX_COOKIE_MAGIC 0x564c414e /* "VLAN" in ascii. */ +#define VLAN_TX_SKB_CB(__skb) ((struct vlan_skb_tx_cookie *)&((__skb)->cb[0])) +#define vlan_tx_tag_present(__skb) \ + (VLAN_TX_SKB_CB(__skb)->magic == VLAN_TX_COOKIE_MAGIC) +#define vlan_tx_tag_get(__skb) (VLAN_TX_SKB_CB(__skb)->vlan_tag) + +/* VLAN rx hw acceleration helper. This acts like netif_rx(). */ +static inline int vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, + unsigned short vlan_tag) { - struct dev_mc_list *dmi = mc_list; - struct dev_mc_list *next; + struct net_device_stats *stats; - while(dmi) { - next = dmi->next; - kfree(dmi); - dmi = next; + skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; + if (skb->dev == NULL) { + kfree_skb(skb); + + /* Not NET_RX_DROP, this is not being dropped + * due to congestion. + */ + return 0; } + + skb->dev->last_rx = jiffies; + + stats = vlan_dev_get_stats(skb->dev); + stats->rx_packets++; + stats->rx_bytes += skb->len; + + skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tag); + switch (skb->pkt_type) { + case PACKET_BROADCAST: + break; + + case PACKET_MULTICAST: + stats->multicast++; + break; + + case PACKET_OTHERHOST: + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the underlying + * device, and still route correctly. + */ + if (!memcmp(skb->mac.ethernet->h_dest, skb->dev->dev_addr, ETH_ALEN)) + skb->pkt_type = PACKET_HOST; + break; + }; + + return netif_rx(skb); } #endif /* __KERNEL__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8096e640c416..178f6a5a0fe6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,6 +40,7 @@ #endif struct divert_blk; +struct vlan_group; #define HAVE_ALLOC_NETDEV /* feature macro: alloc_xxxdev functions are available. */ @@ -357,6 +358,10 @@ struct net_device #define NETIF_F_DYNALLOC 16 /* Self-dectructable device. */ #define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ #define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ +#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ +#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ /* Called after device is detached from network. */ void (*uninit)(struct net_device *dev); @@ -398,6 +403,13 @@ struct net_device #define HAVE_TX_TIMEOUT void (*tx_timeout) (struct net_device *dev); + void (*vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); + int (*hard_header_parse)(struct sk_buff *skb, unsigned char *haddr); int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index f6be310ede54..0e10441071ed 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -8,7 +8,9 @@ * * Fixes: * Fix for packet capture - Nick Eggleston ; - * + * Add HW acceleration hooks - David S. Miller ; + * Correct all the locking - David S. Miller ; + * Use hash table for VLAN groups - David S. Miller * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -37,12 +39,15 @@ /* Global VLAN variables */ /* Our listing of VLAN group(s) */ -struct vlan_group *p802_1Q_vlan_list; +struct vlan_group *vlan_group_hash[VLAN_GRP_HASH_SIZE]; +spinlock_t vlan_group_lock = SPIN_LOCK_UNLOCKED; +#define vlan_grp_hashfn(IDX) ((((IDX) >> VLAN_GRP_HASH_SHIFT) ^ (IDX)) & VLAN_GRP_HASH_MASK) static char vlan_fullname[] = "802.1Q VLAN Support"; static unsigned int vlan_version = 1; -static unsigned int vlan_release = 6; -static char vlan_copyright[] = " Ben Greear "; +static unsigned int vlan_release = 7; +static char vlan_copyright[] = "Ben Greear "; +static char vlan_buggyright[] = "David S. Miller "; static int vlan_device_event(struct notifier_block *, unsigned long, void *); @@ -55,9 +60,6 @@ struct notifier_block vlan_notifier_block = { /* Determines interface naming scheme. */ unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD; -/* Counter for how many NON-VLAN protos we've received on a VLAN. */ -unsigned long vlan_bad_proto_recvd = 0; - /* DO reorder the header by default */ unsigned short vlan_default_dev_flags = 1; @@ -83,6 +85,8 @@ static int __init vlan_proto_init(void) printk(VLAN_INF "%s v%u.%u %s\n", vlan_fullname, vlan_version, vlan_release, vlan_copyright); + printk(VLAN_INF "All bugs added by %s\n", + vlan_buggyright); /* proc file system initialization */ err = vlan_proc_init(); @@ -100,71 +104,83 @@ static int __init vlan_proto_init(void) vlan_ioctl_hook = vlan_ioctl_handler; - printk(VLAN_INF "%s Initialization complete.\n", VLAN_NAME); return 0; } -/* - * Cleanup of groups before exit - */ - -static void vlan_group_cleanup(void) -{ - struct vlan_group *grp = NULL; - struct vlan_group *nextgroup; - - for (grp = p802_1Q_vlan_list; (grp != NULL);) { - nextgroup = grp->next; - kfree(grp); - grp = nextgroup; - } - p802_1Q_vlan_list = NULL; -} - /* * Module 'remove' entry point. * o delete /proc/net/router directory and static entries. */ static void __exit vlan_cleanup_module(void) { + int i; + + /* This table must be empty if there are no module + * references left. + */ + for (i = 0; i < VLAN_GRP_HASH_SIZE; i++) { + if (vlan_group_hash[i] != NULL) + BUG(); + } + /* Un-register us from receiving netdevice events */ unregister_netdevice_notifier(&vlan_notifier_block); dev_remove_pack(&vlan_packet_type); vlan_proc_cleanup(); - vlan_group_cleanup(); vlan_ioctl_hook = NULL; } module_init(vlan_proto_init); module_exit(vlan_cleanup_module); -/** Will search linearly for now, based on device index. Could - * hash, or directly link, this some day. --Ben - * TODO: Potential performance issue here. Linear search where N is - * the number of 'real' devices used by VLANs. - */ -struct vlan_group* vlan_find_group(int real_dev_ifindex) +/* Must be invoked with vlan_group_lock held. */ +static struct vlan_group *__vlan_find_group(int real_dev_ifindex) { - struct vlan_group *grp = NULL; + struct vlan_group *grp; - br_read_lock_bh(BR_NETPROTO_LOCK); - for (grp = p802_1Q_vlan_list; - ((grp != NULL) && (grp->real_dev_ifindex != real_dev_ifindex)); + for (grp = vlan_group_hash[vlan_grp_hashfn(real_dev_ifindex)]; + grp != NULL; grp = grp->next) { - /* nothing */ ; + if (grp->real_dev_ifindex == real_dev_ifindex) + break; } - br_read_unlock_bh(BR_NETPROTO_LOCK); return grp; } -/* Find the protocol handler. Assumes VID < 0xFFF. +/* Must hold vlan_group_lock. */ +static void __grp_hash(struct vlan_group *grp) +{ + struct vlan_group **head; + + head = &vlan_group_hash[vlan_grp_hashfn(grp->real_dev_ifindex)]; + grp->next = *head; + *head = grp; +} + +/* Must hold vlan_group_lock. */ +static void __grp_unhash(struct vlan_group *grp) +{ + struct vlan_group *next, **pprev; + + pprev = &vlan_group_hash[vlan_grp_hashfn(grp->real_dev_ifindex)]; + next = *pprev; + while (next != grp) { + pprev = &next->next; + next = *pprev; + } + *pprev = grp->next; +} + +/* Find the protocol handler. Assumes VID < VLAN_VID_MASK. + * + * Must be invoked with vlan_group_lock held. */ -struct net_device *find_802_1Q_vlan_dev(struct net_device *real_dev, - unsigned short VID) +struct net_device *__find_vlan_dev(struct net_device *real_dev, + unsigned short VID) { - struct vlan_group *grp = vlan_find_group(real_dev->ifindex); + struct vlan_group *grp = __vlan_find_group(real_dev->ifindex); if (grp) return grp->vlan_devices[VID]; @@ -172,109 +188,143 @@ struct net_device *find_802_1Q_vlan_dev(struct net_device *real_dev, return NULL; } -/** This method will explicitly do a dev_put on the device if do_dev_put - * is TRUE. This gets around a difficulty with reference counting, and - * the unregister-by-name (below). If do_locks is true, it will grab - * a lock before un-registering. If do_locks is false, it is assumed that - * the lock has already been grabbed externally... --Ben +/* This returns 0 if everything went fine. + * It will return 1 if the group was killed as a result. + * A negative return indicates failure. + * + * The RTNL lock must be held. */ -int unregister_802_1Q_vlan_dev(int real_dev_ifindex, unsigned short vlan_id, - int do_dev_put, int do_locks) +static int unregister_vlan_dev(struct net_device *real_dev, + unsigned short vlan_id) { struct net_device *dev = NULL; + int real_dev_ifindex = real_dev->ifindex; struct vlan_group *grp; + int i, ret; #ifdef VLAN_DEBUG printk(VLAN_DBG __FUNCTION__ ": VID: %i\n", vlan_id); #endif /* sanity check */ - if ((vlan_id >= 0xFFF) || (vlan_id <= 0)) + if ((vlan_id >= VLAN_VID_MASK) || (vlan_id <= 0)) return -EINVAL; - grp = vlan_find_group(real_dev_ifindex); + spin_lock_bh(&vlan_group_lock); + grp = __vlan_find_group(real_dev_ifindex); + spin_unlock_bh(&vlan_group_lock); + + ret = 0; + if (grp) { dev = grp->vlan_devices[vlan_id]; if (dev) { /* Remove proc entry */ vlan_proc_rem_dev(dev); - /* Take it out of our own structures */ - grp->vlan_devices[vlan_id] = NULL; + /* Take it out of our own structures, but be sure to + * interlock with HW accelerating devices or SW vlan + * input packet processing. + */ + if (real_dev->features & + (NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER)) { + real_dev->vlan_rx_kill_vid(real_dev, vlan_id); + } else { + br_write_lock(BR_NETPROTO_LOCK); + grp->vlan_devices[vlan_id] = NULL; + br_write_unlock(BR_NETPROTO_LOCK); + } - /* Take it out of the global list of devices. - * NOTE: This deletes dev, don't access it again!! + /* Caller unregisters (and if necessary, puts) + * VLAN device, but we get rid of the reference to + * real_dev here. */ + dev_put(real_dev); - if (do_dev_put) - dev_put(dev); + /* If the group is now empty, kill off the + * group. + */ + for (i = 0; i < VLAN_VID_MASK; i++) + if (grp->vlan_devices[i]) + break; - /* TODO: Please review this code. */ - if (do_locks) { - rtnl_lock(); - unregister_netdevice(dev); - rtnl_unlock(); - } else { - unregister_netdevice(dev); + if (i == VLAN_VID_MASK) { + if (real_dev->features & NETIF_F_HW_VLAN_RX) + real_dev->vlan_rx_register(real_dev, NULL); + + spin_lock_bh(&vlan_group_lock); + __grp_unhash(grp); + spin_unlock_bh(&vlan_group_lock); + + ret = 1; } MOD_DEC_USE_COUNT; } } - - return 0; + + return ret; } -int unregister_802_1Q_vlan_device(const char *vlan_IF_name) +static int unregister_vlan_device(const char *vlan_IF_name) { struct net_device *dev = NULL; + int ret; -#ifdef VLAN_DEBUG - printk(VLAN_DBG __FUNCTION__ ": unregister VLAN by name, name -:%s:-\n", - vlan_IF_name); -#endif dev = dev_get_by_name(vlan_IF_name); + ret = -EINVAL; if (dev) { if (dev->priv_flags & IFF_802_1Q_VLAN) { - return unregister_802_1Q_vlan_dev( - VLAN_DEV_INFO(dev)->real_dev->ifindex, - (unsigned short)(VLAN_DEV_INFO(dev)->vlan_id), - 1 /* do dev_put */, 1 /* do locking */); + rtnl_lock(); + + ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, + VLAN_DEV_INFO(dev)->vlan_id); + + dev_put(dev); + unregister_netdevice(dev); + + rtnl_unlock(); + + if (ret == 1) + ret = 0; } else { printk(VLAN_ERR __FUNCTION__ ": ERROR: Tried to remove a non-vlan device " "with VLAN code, name: %s priv_flags: %hX\n", dev->name, dev->priv_flags); dev_put(dev); - return -EPERM; + ret = -EPERM; } } else { #ifdef VLAN_DEBUG printk(VLAN_DBG __FUNCTION__ ": WARNING: Could not find dev.\n"); #endif - return -EINVAL; + ret = -EINVAL; } + + return ret; } /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns the device that was created, or NULL if there was * an error of some kind. */ -struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, +static struct net_device *register_vlan_device(const char *eth_IF_name, unsigned short VLAN_ID) { struct vlan_group *grp; struct net_device *new_dev; struct net_device *real_dev; /* the ethernet device */ int malloc_size = 0; + int r; #ifdef VLAN_DEBUG printk(VLAN_DBG __FUNCTION__ ": if_name -:%s:- vid: %i\n", eth_IF_name, VLAN_ID); #endif - if (VLAN_ID >= 0xfff) + if (VLAN_ID >= VLAN_VID_MASK) goto out_ret_null; /* find the device relating to eth_IF_name. */ @@ -282,14 +332,47 @@ struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, if (!real_dev) goto out_ret_null; - /* TODO: Make sure this device can really handle having a VLAN attached - * to it... + if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { + printk(VLAN_DBG __FUNCTION__ ": VLANs not supported on %s.\n", + real_dev->name); + goto out_put_dev; + } + + if ((real_dev->features & NETIF_F_HW_VLAN_RX) && + (real_dev->vlan_rx_register == NULL || + real_dev->vlan_rx_kill_vid == NULL)) { + printk(VLAN_DBG __FUNCTION__ ": Device %s has buggy VLAN hw accel.\n", + real_dev->name); + goto out_put_dev; + } + + if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && + (real_dev->vlan_rx_add_vid == NULL || + real_dev->vlan_rx_kill_vid == NULL)) { + printk(VLAN_DBG __FUNCTION__ ": Device %s has buggy VLAN hw accel.\n", + real_dev->name); + goto out_put_dev; + } + + /* From this point on, all the data structures must remain + * consistent. + */ + rtnl_lock(); + + /* The real device must be up and operating in order to + * assosciate a VLAN device with it. */ - if (find_802_1Q_vlan_dev(real_dev, VLAN_ID)) { + if (!(real_dev->flags & IFF_UP)) + goto out_unlock; + + spin_lock_bh(&vlan_group_lock); + r = (__find_vlan_dev(real_dev, VLAN_ID) != NULL); + spin_unlock_bh(&vlan_group_lock); + + if (r) { /* was already registered. */ printk(VLAN_DBG __FUNCTION__ ": ALREADY had VLAN registered\n"); - dev_put(real_dev); - return NULL; + goto out_unlock; } malloc_size = (sizeof(struct net_device)); @@ -298,15 +381,14 @@ struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, new_dev, malloc_size); if (new_dev == NULL) - goto out_put_dev; + goto out_unlock; memset(new_dev, 0, malloc_size); - /* set us up to not use a Qdisc, as the underlying Hardware device + /* Set us up to have no queue, as the underlying Hardware device * can do all the queueing we could want. */ - /* new_dev->qdisc_sleeping = &noqueue_qdisc; Not needed it seems. */ - new_dev->tx_queue_len = 0; /* This should effectively give us no queue. */ + new_dev->tx_queue_len = 0; /* Gotta set up the fields for the device. */ #ifdef VLAN_DEBUG @@ -368,8 +450,11 @@ struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, /* TODO: maybe just assign it to be ETHERNET? */ new_dev->type = real_dev->type; - /* Regular ethernet + 4 bytes (18 total). */ - new_dev->hard_header_len = VLAN_HLEN + real_dev->hard_header_len; + new_dev->hard_header_len = real_dev->hard_header_len; + if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) { + /* Regular ethernet + 4 bytes (18 total). */ + new_dev->hard_header_len += VLAN_HLEN; + } new_dev->priv = kmalloc(sizeof(struct vlan_dev_info), GFP_KERNEL); @@ -377,10 +462,8 @@ struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, new_dev->priv, sizeof(struct vlan_dev_info)); - if (new_dev->priv == NULL) { - kfree(new_dev); - goto out_put_dev; - } + if (new_dev->priv == NULL) + goto out_free_newdev; memset(new_dev->priv, 0, sizeof(struct vlan_dev_info)); @@ -390,15 +473,21 @@ struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, new_dev->open = vlan_dev_open; new_dev->stop = vlan_dev_stop; - new_dev->hard_header = vlan_dev_hard_header; - new_dev->hard_start_xmit = vlan_dev_hard_start_xmit; - new_dev->rebuild_header = vlan_dev_rebuild_header; + if (real_dev->features & NETIF_F_HW_VLAN_TX) { + new_dev->hard_header = real_dev->hard_header; + new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; + new_dev->rebuild_header = real_dev->rebuild_header; + } else { + new_dev->hard_header = vlan_dev_hard_header; + new_dev->hard_start_xmit = vlan_dev_hard_start_xmit; + new_dev->rebuild_header = vlan_dev_rebuild_header; + } new_dev->hard_header_parse = real_dev->hard_header_parse; new_dev->set_mac_address = vlan_dev_set_mac_address; new_dev->set_multicast_list = vlan_dev_set_multicast_list; - VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through 0xFFF */ + VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ VLAN_DEV_INFO(new_dev)->real_dev = real_dev; VLAN_DEV_INFO(new_dev)->dent = NULL; VLAN_DEV_INFO(new_dev)->flags = vlan_default_dev_flags; @@ -411,37 +500,39 @@ struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, /* So, got the sucker initialized, now lets place * it into our local structure. */ - grp = vlan_find_group(real_dev->ifindex); + spin_lock_bh(&vlan_group_lock); + grp = __vlan_find_group(real_dev->ifindex); + spin_unlock_bh(&vlan_group_lock); + + /* Note, we are running under the RTNL semaphore + * so it cannot "appear" on us. + */ if (!grp) { /* need to add a new group */ grp = kmalloc(sizeof(struct vlan_group), GFP_KERNEL); - VLAN_MEM_DBG("grp malloc, addr: %p size: %i\n", - grp, sizeof(struct vlan_group)); - if (!grp) { - kfree(new_dev->priv); - VLAN_FMEM_DBG("new_dev->priv free, addr: %p\n", - new_dev->priv); - kfree(new_dev); - VLAN_FMEM_DBG("new_dev free, addr: %p\n", new_dev); - - goto out_put_dev; - } + if (!grp) + goto out_free_newdev_priv; - printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); + /* printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); */ memset(grp, 0, sizeof(struct vlan_group)); grp->real_dev_ifindex = real_dev->ifindex; - br_write_lock_bh(BR_NETPROTO_LOCK); - grp->next = p802_1Q_vlan_list; - p802_1Q_vlan_list = grp; - br_write_unlock_bh(BR_NETPROTO_LOCK); + spin_lock_bh(&vlan_group_lock); + __grp_hash(grp); + spin_unlock_bh(&vlan_group_lock); + + if (real_dev->features & NETIF_F_HW_VLAN_RX) + real_dev->vlan_rx_register(real_dev, grp); } grp->vlan_devices[VLAN_ID] = new_dev; + vlan_proc_add_dev(new_dev); /* create it's proc entry */ - /* TODO: Please check this: RTNL --Ben */ - rtnl_lock(); + if (real_dev->features & NETIF_F_HW_VLAN_FILTER) + real_dev->vlan_rx_add_vid(real_dev, VLAN_ID); + register_netdevice(new_dev); + rtnl_unlock(); /* NOTE: We have a reference to the real device, @@ -453,6 +544,15 @@ struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, #endif return new_dev; +out_free_newdev_priv: + kfree(new_dev->priv); + +out_free_newdev: + kfree(new_dev); + +out_unlock: + rtnl_unlock(); + out_put_dev: dev_put(real_dev); @@ -464,78 +564,78 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, { struct net_device *dev = (struct net_device *)(ptr); struct vlan_group *grp = NULL; - int i = 0; + int i, flgs; struct net_device *vlandev = NULL; + spin_lock_bh(&vlan_group_lock); + grp = __vlan_find_group(dev->ifindex); + spin_unlock_bh(&vlan_group_lock); + + if (!grp) + goto out; + + /* It is OK that we do not hold the group lock right now, + * as we run under the RTNL lock. + */ + switch (event) { case NETDEV_CHANGEADDR: - /* Ignore for now */ - break; - case NETDEV_GOING_DOWN: /* Ignore for now */ break; case NETDEV_DOWN: - /* TODO: Please review this code. */ - /* put all related VLANs in the down state too. */ - for (grp = p802_1Q_vlan_list; grp != NULL; grp = grp->next) { - int flgs = 0; - - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - vlandev = grp->vlan_devices[i]; - if (!vlandev || - (VLAN_DEV_INFO(vlandev)->real_dev != dev) || - (!(vlandev->flags & IFF_UP))) - continue; - - flgs = vlandev->flags; - flgs &= ~IFF_UP; - dev_change_flags(vlandev, flgs); - } + /* Put all VLANs for this dev in the down state too. */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = grp->vlan_devices[i]; + if (!vlandev) + continue; + + flgs = vlandev->flags; + if (!(flgs & IFF_UP)) + continue; + + dev_change_flags(vlandev, flgs & ~IFF_UP); } break; case NETDEV_UP: - /* TODO: Please review this code. */ - /* put all related VLANs in the down state too. */ - for (grp = p802_1Q_vlan_list; grp != NULL; grp = grp->next) { - int flgs; - - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - vlandev = grp->vlan_devices[i]; - if (!vlandev || - (VLAN_DEV_INFO(vlandev)->real_dev != dev) || - (vlandev->flags & IFF_UP)) - continue; + /* Put all VLANs for this dev in the up state too. */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = grp->vlan_devices[i]; + if (!vlandev) + continue; - flgs = vlandev->flags; - flgs |= IFF_UP; - dev_change_flags(vlandev, flgs); - } + flgs = vlandev->flags; + if (flgs & IFF_UP) + continue; + + dev_change_flags(vlandev, flgs | IFF_UP); } break; case NETDEV_UNREGISTER: - /* TODO: Please review this code. */ - /* delete all related VLANs. */ - for (grp = p802_1Q_vlan_list; grp != NULL; grp = grp->next) { - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - vlandev = grp->vlan_devices[i]; - if (!vlandev || - (VLAN_DEV_INFO(vlandev)->real_dev != dev)) - continue; - - unregister_802_1Q_vlan_dev( - VLAN_DEV_INFO(vlandev)->real_dev->ifindex, - VLAN_DEV_INFO(vlandev)->vlan_id, - 0, 0); - vlandev = NULL; - } + /* Delete all VLANs for this dev. */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + int ret; + + vlandev = grp->vlan_devices[i]; + if (!vlandev) + continue; + + ret = unregister_vlan_dev(dev, + VLAN_DEV_INFO(vlandev)->vlan_id); + + unregister_netdev(vlandev); + + /* Group was destroyed? */ + if (ret == 1) + break; } break; }; +out: return NOTIFY_DONE; } @@ -612,7 +712,7 @@ int vlan_ioctl_handler(unsigned long arg) * talk to: args.dev1 We also have the * VLAN ID: args.u.VID */ - if (register_802_1Q_vlan_device(args.device1, args.u.VID)) { + if (register_vlan_device(args.device1, args.u.VID)) { err = 0; } else { err = -EINVAL; @@ -623,7 +723,7 @@ int vlan_ioctl_handler(unsigned long arg) /* Here, the args.dev1 is the actual VLAN we want * to get rid of. */ - err = unregister_802_1Q_vlan_device(args.device1); + err = unregister_vlan_device(args.device1); break; default: @@ -636,4 +736,4 @@ int vlan_ioctl_handler(unsigned long arg) return err; } - +MODULE_LICENSE("GPL"); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index fa260f2ce457..d490adb2db72 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -30,14 +30,48 @@ I'll bet they might prove useful again... --Ben extern unsigned short vlan_name_type; -/* Counter for how many NON-VLAN protos we've received on a VLAN. */ -extern unsigned long vlan_bad_proto_recvd; - int vlan_ioctl_handler(unsigned long arg); -/* Add some headers for the public VLAN methods. */ -int unregister_802_1Q_vlan_device(const char* vlan_IF_name); -struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name, - unsigned short VID); +#define VLAN_GRP_HASH_SHIFT 5 +#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT) +#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1) +extern struct vlan_group *vlan_group_hash[VLAN_GRP_HASH_SIZE]; +extern spinlock_t vlan_group_lock; + +/* Find a VLAN device by the MAC address of it's Ethernet device, and + * it's VLAN ID. The default configuration is to have VLAN's scope + * to be box-wide, so the MAC will be ignored. The mac will only be + * looked at if we are configured to have a seperate set of VLANs per + * each MAC addressable interface. Note that this latter option does + * NOT follow the spec for VLANs, but may be useful for doing very + * large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs. + * + * Must be invoked with vlan_group_lock held and that lock MUST NOT + * be dropped until a reference is obtained on the returned device. + * You may drop the lock earlier if you are running under the RTNL + * semaphore, however. + */ +struct net_device *__find_vlan_dev(struct net_device* real_dev, + unsigned short VID); /* vlan.c */ + +/* found in vlan_dev.c */ +int vlan_dev_rebuild_header(struct sk_buff *skb); +int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type* ptype); +int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, void *saddr, + unsigned len); +int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); +int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); +int vlan_dev_change_mtu(struct net_device *dev, int new_mtu); +int vlan_dev_set_mac_address(struct net_device *dev, void* addr); +int vlan_dev_open(struct net_device* dev); +int vlan_dev_stop(struct net_device* dev); +int vlan_dev_init(struct net_device* dev); +void vlan_dev_destruct(struct net_device* dev); +int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); +int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); +int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val); +void vlan_dev_set_multicast_list(struct net_device *vlan_dev); #endif /* !(__BEN_VLAN_802_1Q_INC__) */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 7da75dbadd93..26bf70e16772 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -38,12 +38,6 @@ #include #include -struct net_device_stats *vlan_dev_get_stats(struct net_device *dev) -{ - return &(((struct vlan_dev_info *)(dev->priv))->dev_stats); -} - - /* * Rebuild the Ethernet MAC header. This is called after an ARP * (or in future other address resolution) has completed on this @@ -78,6 +72,21 @@ int vlan_dev_rebuild_header(struct sk_buff *skb) return 0; } +static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) +{ + if (VLAN_DEV_INFO(skb->dev)->flags & 1) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb) { + /* Lifted from Gleb's VLAN code... */ + memmove(skb->data - ETH_HLEN, + skb->data - VLAN_ETH_HLEN, 12); + skb->mac.raw += VLAN_HLEN; + } + } + + return skb; +} + /* * Determine the packet's protocol ID. The rule here is that we * assume 802.3 if the type field is short enough to be a length. @@ -113,7 +122,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); - vid = (vlan_TCI & 0xFFF); + vid = (vlan_TCI & VLAN_VID_MASK); #ifdef VLAN_DEBUG printk(VLAN_DBG __FUNCTION__ ": skb: %p vlan_id: %hx\n", @@ -124,11 +133,18 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, * and then go on as usual. */ - /* we have 12 bits of vlan ID. */ - /* If it's NULL, we will tag it to be junked below */ - skb->dev = find_802_1Q_vlan_dev(dev, vid); + /* We have 12 bits of vlan ID. + * + * We must not drop the vlan_group_lock until we hold a + * reference to the device (netif_rx does that) or we + * fail. + */ + spin_lock_bh(&vlan_group_lock); + skb->dev = __find_vlan_dev(dev, vid); if (!skb->dev) { + spin_unlock_bh(&vlan_group_lock); + #ifdef VLAN_DEBUG printk(VLAN_DBG __FUNCTION__ ": ERROR: No net_device for VID: %i on dev: %s [%i]\n", (unsigned int)(vid), dev->name, dev->ifindex); @@ -137,6 +153,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, return -1; } + skb->dev->last_rx = jiffies; + /* Bump the rx counters for the VLAN device. */ stats = vlan_dev_get_stats(skb->dev); stats->rx_packets++; @@ -149,6 +167,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, */ if (dev != VLAN_DEV_INFO(skb->dev)->real_dev) { + spin_unlock_bh(&vlan_group_lock); + #ifdef VLAN_DEBUG printk(VLAN_DBG __FUNCTION__ ": dropping skb: %p because came in on wrong device, dev: %s real_dev: %s, skb_dev: %s\n", skb, dev->name, VLAN_DEV_INFO(skb->dev)->real_dev->name, skb->dev->name); @@ -161,7 +181,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, /* * Deal with ingress priority mapping. */ - skb->priority = VLAN_DEV_INFO(skb->dev)->ingress_priority_map[(ntohs(vhdr->h_vlan_TCI) >> 13) & 0x7]; + skb->priority = vlan_get_ingress_priority(skb->dev, ntohs(vhdr->h_vlan_TCI)); #ifdef VLAN_DEBUG printk(VLAN_DBG __FUNCTION__ ": priority: %lu for TCI: %hu (hbo)\n", @@ -174,9 +194,12 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, switch (skb->pkt_type) { case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ // stats->broadcast ++; // no such counter :-( + break; + case PACKET_MULTICAST: stats->multicast++; break; + case PACKET_OTHERHOST: /* Our lower layer thinks this is not local, let's make sure. * This allows the VLAN to have a different MAC than the underlying @@ -215,6 +238,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, /* TODO: Add a more specific counter here. */ stats->rx_errors++; } + spin_unlock_bh(&vlan_group_lock); return 0; } @@ -243,6 +267,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, /* TODO: Add a more specific counter here. */ stats->rx_errors++; } + spin_unlock_bh(&vlan_group_lock); return 0; } @@ -265,6 +290,24 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, /* TODO: Add a more specific counter here. */ stats->rx_errors++; } + spin_unlock_bh(&vlan_group_lock); + return 0; +} + +static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev, + struct sk_buff* skb) +{ + struct vlan_priority_tci_mapping *mp = + VLAN_DEV_INFO(dev)->egress_priority_map[(skb->priority & 0xF)]; + + while (mp) { + if (mp->priority == skb->priority) { + return mp->vlan_qos; /* This should already be shifted to mask + * correctly with the VLAN's TCI + */ + } + mp = mp->next; + } return 0; } @@ -396,8 +439,9 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) */ if (veth->h_vlan_proto != __constant_htons(ETH_P_8021Q)) { + unsigned short veth_TCI; + /* This is not a VLAN frame...but we can fix that! */ - unsigned short veth_TCI = 0; VLAN_DEV_INFO(dev)->cnt_encap_on_xmit++; #ifdef VLAN_DEBUG @@ -453,65 +497,44 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto); #endif - dev_queue_xmit(skb); stats->tx_packets++; /* for statics only */ stats->tx_bytes += skb->len; - return 0; -} -int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) -{ - /* TODO: gotta make sure the underlying layer can handle it, - * maybe an IFF_VLAN_CAPABLE flag for devices? - */ - if (VLAN_DEV_INFO(dev)->real_dev->mtu < new_mtu) - return -ERANGE; - - dev->mtu = new_mtu; + dev_queue_xmit(skb); - return new_mtu; + return 0; } -int vlan_dev_open(struct net_device *dev) +int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { - if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_UP)) - return -ENETDOWN; + struct net_device_stats *stats = vlan_dev_get_stats(dev); + struct vlan_skb_tx_cookie *cookie; - return 0; -} + stats->tx_packets++; + stats->tx_bytes += skb->len; -int vlan_dev_stop(struct net_device *dev) -{ - vlan_flush_mc_list(dev); - return 0; -} + skb->dev = VLAN_DEV_INFO(dev)->real_dev; + cookie = VLAN_TX_SKB_CB(skb); + cookie->magic = VLAN_TX_COOKIE_MAGIC; + cookie->vlan_tag = (VLAN_DEV_INFO(dev)->vlan_id | + vlan_dev_get_egress_qos_mask(dev, skb)); + + dev_queue_xmit(skb); -int vlan_dev_init(struct net_device *dev) -{ - /* TODO: figure this out, maybe do nothing?? */ return 0; } -void vlan_dev_destruct(struct net_device *dev) +int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) { - if (dev) { - vlan_flush_mc_list(dev); - if (dev->priv) { - dev_put(VLAN_DEV_INFO(dev)->real_dev); - if (VLAN_DEV_INFO(dev)->dent) { - printk(KERN_ERR __FUNCTION__ ": dent is NOT NULL!\n"); - - /* If we ever get here, there is a serious bug - * that must be fixed. - */ - } + /* TODO: gotta make sure the underlying layer can handle it, + * maybe an IFF_VLAN_CAPABLE flag for devices? + */ + if (VLAN_DEV_INFO(dev)->real_dev->mtu < new_mtu) + return -ERANGE; - kfree(dev->priv); + dev->mtu = new_mtu; - VLAN_FMEM_DBG("dev->priv free, addr: %p\n", dev->priv); - dev->priv = NULL; - } - } + return new_mtu; } int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) @@ -642,6 +665,124 @@ int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p) return 0; } +static inline int vlan_dmi_equals(struct dev_mc_list *dmi1, + struct dev_mc_list *dmi2) +{ + return ((dmi1->dmi_addrlen == dmi2->dmi_addrlen) && + (memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0)); +} + +/** dmi is a single entry into a dev_mc_list, a single node. mc_list is + * an entire list, and we'll iterate through it. + */ +static int vlan_should_add_mc(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) +{ + struct dev_mc_list *idmi; + + for (idmi = mc_list; idmi != NULL; ) { + if (vlan_dmi_equals(dmi, idmi)) { + if (dmi->dmi_users > idmi->dmi_users) + return 1; + else + return 0; + } else { + idmi = idmi->next; + } + } + + return 1; +} + +static inline void vlan_destroy_mc_list(struct dev_mc_list *mc_list) +{ + struct dev_mc_list *dmi = mc_list; + struct dev_mc_list *next; + + while(dmi) { + next = dmi->next; + kfree(dmi); + dmi = next; + } +} + +static void vlan_copy_mc_list(struct dev_mc_list *mc_list, struct vlan_dev_info *vlan_info) +{ + struct dev_mc_list *dmi, *new_dmi; + + vlan_destroy_mc_list(vlan_info->old_mc_list); + vlan_info->old_mc_list = NULL; + + for (dmi = mc_list; dmi != NULL; dmi = dmi->next) { + new_dmi = kmalloc(sizeof(*new_dmi), GFP_ATOMIC); + if (new_dmi == NULL) { + printk(KERN_ERR "vlan: cannot allocate memory. " + "Multicast may not work properly from now.\n"); + return; + } + + /* Copy whole structure, then make new 'next' pointer */ + *new_dmi = *dmi; + new_dmi->next = vlan_info->old_mc_list; + vlan_info->old_mc_list = new_dmi; + } +} + +static void vlan_flush_mc_list(struct net_device *dev) +{ + struct dev_mc_list *dmi = dev->mc_list; + + while (dmi) { + dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); + printk(KERN_INFO "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from vlan interface\n", + dev->name, + dmi->dmi_addr[0], + dmi->dmi_addr[1], + dmi->dmi_addr[2], + dmi->dmi_addr[3], + dmi->dmi_addr[4], + dmi->dmi_addr[5]); + dmi = dev->mc_list; + } + + /* dev->mc_list is NULL by the time we get here. */ + vlan_destroy_mc_list(VLAN_DEV_INFO(dev)->old_mc_list); + VLAN_DEV_INFO(dev)->old_mc_list = NULL; +} + +int vlan_dev_open(struct net_device *dev) +{ + if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_UP)) + return -ENETDOWN; + + return 0; +} + +int vlan_dev_stop(struct net_device *dev) +{ + vlan_flush_mc_list(dev); + return 0; +} + +int vlan_dev_init(struct net_device *dev) +{ + /* TODO: figure this out, maybe do nothing?? */ + return 0; +} + +void vlan_dev_destruct(struct net_device *dev) +{ + if (dev) { + vlan_flush_mc_list(dev); + if (dev->priv) { + if (VLAN_DEV_INFO(dev)->dent) + BUG(); + + kfree(dev->priv); + dev->priv = NULL; + } + } +} + /** Taken from Gleb + Lennert's VLAN code, and modified... */ void vlan_dev_set_multicast_list(struct net_device *vlan_dev) { @@ -707,68 +848,3 @@ void vlan_dev_set_multicast_list(struct net_device *vlan_dev) vlan_copy_mc_list(vlan_dev->mc_list, VLAN_DEV_INFO(vlan_dev)); } } - -/** dmi is a single entry into a dev_mc_list, a single node. mc_list is - * an entire list, and we'll iterate through it. - */ -int vlan_should_add_mc(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) -{ - struct dev_mc_list *idmi; - - for (idmi = mc_list; idmi != NULL; ) { - if (vlan_dmi_equals(dmi, idmi)) { - if (dmi->dmi_users > idmi->dmi_users) - return 1; - else - return 0; - } else { - idmi = idmi->next; - } - } - - return 1; -} - -void vlan_copy_mc_list(struct dev_mc_list *mc_list, struct vlan_dev_info *vlan_info) -{ - struct dev_mc_list *dmi, *new_dmi; - - vlan_destroy_mc_list(vlan_info->old_mc_list); - vlan_info->old_mc_list = NULL; - - for (dmi = mc_list; dmi != NULL; dmi = dmi->next) { - new_dmi = kmalloc(sizeof(*new_dmi), GFP_ATOMIC); - if (new_dmi == NULL) { - printk(KERN_ERR "vlan: cannot allocate memory. " - "Multicast may not work properly from now.\n"); - return; - } - - /* Copy whole structure, then make new 'next' pointer */ - *new_dmi = *dmi; - new_dmi->next = vlan_info->old_mc_list; - vlan_info->old_mc_list = new_dmi; - } -} - -void vlan_flush_mc_list(struct net_device *dev) -{ - struct dev_mc_list *dmi = dev->mc_list; - - while (dmi) { - dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - printk(KERN_INFO "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from vlan interface\n", - dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - dmi = dev->mc_list; - } - - /* dev->mc_list is NULL by the time we get here. */ - vlan_destroy_mc_list(VLAN_DEV_INFO(dev)->old_mc_list); - VLAN_DEV_INFO(dev)->old_mc_list = NULL; -} diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index c8482f9ab40f..58504f5c30a6 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -272,7 +272,7 @@ static int vlan_proc_get_vlan_info(char* buf, unsigned int cnt) { struct net_device *vlandev = NULL; struct vlan_group *grp = NULL; - int i = 0; + int h, i; char *nm_type = NULL; struct vlan_dev_info *dev_info = NULL; @@ -292,46 +292,34 @@ static int vlan_proc_get_vlan_info(char* buf, unsigned int cnt) nm_type = "UNKNOWN"; } - cnt += sprintf(buf + cnt, "Name-Type: %s bad_proto_recvd: %lu\n", - nm_type, vlan_bad_proto_recvd); + cnt += sprintf(buf + cnt, "Name-Type: %s\n", nm_type); - for (grp = p802_1Q_vlan_list; grp != NULL; grp = grp->next) { - /* loop through all devices for this device */ -#ifdef VLAN_DEBUG - printk(VLAN_DBG __FUNCTION__ ": found a group, addr: %p\n",grp); -#endif - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - vlandev = grp->vlan_devices[i]; - if (!vlandev) - continue; -#ifdef VLAN_DEBUG - printk(VLAN_DBG __FUNCTION__ - ": found a vlan_dev, addr: %p\n", vlandev); -#endif - if ((cnt + 100) > VLAN_PROC_BUFSZ) { - if ((cnt+strlen(term_msg)) < VLAN_PROC_BUFSZ) - cnt += sprintf(buf+cnt, "%s", term_msg); + spin_lock_bh(&vlan_group_lock); + for (h = 0; h < VLAN_GRP_HASH_SIZE; h++) { + for (grp = vlan_group_hash[h]; grp != NULL; grp = grp->next) { + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = grp->vlan_devices[i]; + if (!vlandev) + continue; - return cnt; - } - if (!vlandev->priv) { - printk(KERN_ERR __FUNCTION__ - ": ERROR: vlandev->priv is NULL\n"); - continue; - } + if ((cnt + 100) > VLAN_PROC_BUFSZ) { + if ((cnt+strlen(term_msg)) < VLAN_PROC_BUFSZ) + cnt += sprintf(buf+cnt, "%s", term_msg); - dev_info = VLAN_DEV_INFO(vlandev); + goto out; + } -#ifdef VLAN_DEBUG - printk(VLAN_DBG __FUNCTION__ - ": got a good vlandev, addr: %p\n", - VLAN_DEV_INFO(vlandev)); -#endif - cnt += sprintf(buf + cnt, "%-15s| %d | %s\n", - vlandev->name, dev_info->vlan_id, - dev_info->real_dev->name); + dev_info = VLAN_DEV_INFO(vlandev); + cnt += sprintf(buf + cnt, "%-15s| %d | %s\n", + vlandev->name, + dev_info->vlan_id, + dev_info->real_dev->name); + } } } +out: + spin_unlock_bh(&vlan_group_lock); + return cnt; } @@ -365,11 +353,7 @@ static int vlandev_get_info(char *buf, char **start, int cnt = 0; int i; -#ifdef VLAN_DEBUG - printk(VLAN_DBG __FUNCTION__ ": vlandev: %p\n", vlandev); -#endif - - if ((vlandev == NULL) || (!vlandev->priv_flags & IFF_802_1Q_VLAN)) + if ((vlandev == NULL) || (!(vlandev->priv_flags & IFF_802_1Q_VLAN))) return 0; dev_info = VLAN_DEV_INFO(vlandev); @@ -426,7 +410,7 @@ static int vlandev_get_info(char *buf, char **start, cnt += sprintf(buf + cnt, "EGRESSS priority Mappings: "); - for (i = 0; i<16; i++) { + for (i = 0; i < 16; i++) { mp = dev_info->egress_priority_map[i]; while (mp) { cnt += sprintf(buf + cnt, "%lu:%hu ", diff --git a/net/Config.in b/net/Config.in index d111df17c31e..506f1c1cd92d 100644 --- a/net/Config.in +++ b/net/Config.in @@ -44,10 +44,8 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then tristate ' Multi-Protocol Over ATM (MPOA) support' CONFIG_ATM_MPOA fi fi - - dep_tristate '802.1Q VLAN Support (EXPERIMENTAL)' CONFIG_VLAN_8021Q $CONFIG_EXPERIMENTAL - fi +tristate '802.1Q VLAN Support' CONFIG_VLAN_8021Q comment ' ' tristate 'The IPX protocol' CONFIG_IPX -- cgit v1.2.3