diff options
Diffstat (limited to 'net')
112 files changed, 2342 insertions, 872 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 35f43fd81c22..25562366d4c0 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -433,7 +433,6 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, /* set up method calls */ new_dev->init = vlan_dev_init; new_dev->destructor = vlan_dev_destruct; - new_dev->features |= NETIF_F_DYNALLOC ; /* new_dev->ifindex = 0; it will be set when added to * the global list. diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index b1751235dacd..907073996bc7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -785,6 +785,7 @@ void vlan_dev_destruct(struct net_device *dev) kfree(dev->priv); dev->priv = NULL; } + kfree(dev); } } diff --git a/net/Makefile b/net/Makefile index 1097e5a0a791..1d0cadca1288 100644 --- a/net/Makefile +++ b/net/Makefile @@ -5,7 +5,9 @@ # Rewritten to use lists instead of if-statements. # -obj-y := socket.o core/ +obj-y := nonet.o + +obj-$(CONFIG_NET) := socket.o core/ obj-$(CONFIG_COMPAT) += compat.o diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index b9aa77392a80..ee9b5fc8ca93 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -231,7 +231,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) kfree_skb(skb); return 1; } - atomic_add(skb->truesize, &ATM_SKB(skb)->vcc->tx_inuse); + atomic_add(skb->truesize, &ATM_SKB(skb)->vcc->sk->wmem_alloc); ATM_SKB(skb)->iovcnt = 0; ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; DPRINTK("(unit %d): atm_skb(%p)->vcc(%p)->dev(%p)\n", diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 9f7f6cfd05a3..e61899bfebe4 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -86,7 +86,7 @@ static int ebt_ip_check(const char *tablename, unsigned int hookmask, if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK) return -EINVAL; if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) { - if (!(info->bitmask & EBT_IPROTO)) + if (info->bitmask & EBT_IPROTO) return -EINVAL; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP) diff --git a/net/core/datagram.c b/net/core/datagram.c index f588cf30eae1..f83189e52b13 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -68,11 +68,9 @@ static inline int connection_based(struct sock *sk) static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; + DEFINE_WAIT(wait); - DECLARE_WAITQUEUE(wait, current); - - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(sk->sleep, &wait); + prepare_to_wait_exclusive(sk->sleep, &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); @@ -101,8 +99,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) error = 0; *timeo_p = schedule_timeout(*timeo_p); out: - current->state = TASK_RUNNING; - remove_wait_queue(sk->sleep, &wait); + finish_wait(sk->sleep, &wait); return error; interrupted: error = sock_intr_errno(*timeo_p); diff --git a/net/core/dev.c b/net/core/dev.c index f7f9cd622e89..2d5ae21a98de 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -547,6 +547,50 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) } /** + * dev_get_by_flags - find any device with given flags + * @if_flags: IFF_* values + * @mask: bitmask of bits in if_flags to check + * + * Search for any interface with the given flags. Returns NULL if a device + * is not found or a pointer to the device. The device returned has + * had a reference added and the pointer is safe until the user calls + * dev_put to indicate they have finished with it. + */ + +struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) +{ + struct net_device *dev; + + read_lock(&dev_base_lock); + dev = __dev_get_by_flags(if_flags, mask); + if (dev) + dev_hold(dev); + read_unlock(&dev_base_lock); + return dev; +} + +/** + * __dev_get_by_flags - find any device with given flags + * @if_flags: IFF_* values + * @mask: bitmask of bits in if_flags to check + * + * Search for any interface with the given flags. Returns NULL if a device + * is not found or a pointer to the device. The caller must hold either + * the RTNL semaphore or @dev_base_lock. + */ + +struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask) +{ + struct net_device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (((dev->flags ^ if_flags) & mask) == 0) + return dev; + } + return NULL; +} + +/** * dev_alloc_name - allocate a name for a device * @dev: device * @name: name format string @@ -2595,12 +2639,10 @@ int netdev_finish_unregister(struct net_device *dev) } #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, - (dev->features & NETIF_F_DYNALLOC)?"":", old style"); + (dev->destructor != NULL)?"":", old style"); #endif if (dev->destructor) dev->destructor(dev); - if (dev->features & NETIF_F_DYNALLOC) - kfree(dev); return 0; } @@ -2680,7 +2722,7 @@ int unregister_netdevice(struct net_device *dev) free_divert_blk(dev); #endif - if (dev->features & NETIF_F_DYNALLOC) { + if (dev->destructor != NULL) { #ifdef NET_REFCNT_DEBUG if (atomic_read(&dev->refcnt) != 1) printk(KERN_DEBUG "unregister_netdevice: holding %s " diff --git a/net/core/dst.c b/net/core/dst.c index 2cde81a0ca7a..ea1b6f6751bd 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -228,7 +228,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void _race_ _condition_. */ if (event!=NETDEV_DOWN && - !(dev->features & NETIF_F_DYNALLOC) && + dev->destructor == NULL && dst->output == dst_blackhole) { dst->dev = &loopback_dev; dev_put(dev); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b56a97ba7739..41a1165df458 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -208,6 +208,7 @@ struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) skb->len = 0; skb->data_len = 0; skb->csum = 0; + skb->local_df = 0; skb->cloned = 0; skb->pkt_type = PACKET_HOST; /* Default type */ skb->ip_summed = 0; @@ -375,6 +376,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) C(len); C(data_len); C(csum); + C(local_df); n->cloned = 1; C(pkt_type); C(ip_summed); @@ -438,6 +440,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac.raw = old->mac.raw + offset; memcpy(new->cb, old->cb, sizeof(old->cb)); atomic_set(&new->users, 1); + new->local_df = old->local_df; new->pkt_type = old->pkt_type; new->stamp = old->stamp; new->destructor = NULL; diff --git a/net/core/sock.c b/net/core/sock.c index 6f06e0d46229..da9bfa3ac520 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -746,17 +746,16 @@ void sock_kfree_s(struct sock *sk, void *mem, int size) */ static long sock_wait_for_wmem(struct sock * sk, long timeo) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); - add_wait_queue(sk->sleep, &wait); for (;;) { if (!timeo) break; if (signal_pending(current)) break; set_bit(SOCK_NOSPACE, &sk->socket->flags); - set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(sk->sleep, &wait, TASK_INTERRUPTIBLE); if (atomic_read(&sk->wmem_alloc) < sk->sndbuf) break; if (sk->shutdown & SEND_SHUTDOWN) @@ -765,8 +764,7 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo) break; timeo = schedule_timeout(timeo); } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sleep, &wait); + finish_wait(sk->sleep, &wait); return timeo; } @@ -860,19 +858,18 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, void __lock_sock(struct sock *sk) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); - add_wait_queue_exclusive(&sk->lock.wq, &wait); for(;;) { - current->state = TASK_UNINTERRUPTIBLE; + prepare_to_wait_exclusive(&sk->lock.wq, &wait, + TASK_UNINTERRUPTIBLE); spin_unlock_bh(&sk->lock.slock); schedule(); spin_lock_bh(&sk->lock.slock); if(!sock_owned_by_user(sk)) break; } - current->state = TASK_RUNNING; - remove_wait_queue(&sk->lock.wq, &wait); + finish_wait(&sk->lock.wq, &wait); } void __release_sock(struct sock *sk) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a89aa73220a6..2555a4e02dec 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -562,10 +562,9 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, static long inet_wait_for_connect(struct sock *sk, long timeo) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(sk->sleep, &wait); + prepare_to_wait(sk->sleep, &wait, TASK_INTERRUPTIBLE); /* Basic assumption: if someone sets sk->err, he _must_ * change state of the socket from TCP_SYN_*. @@ -578,10 +577,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) lock_sock(sk); if (signal_pending(current) || !timeo) break; - set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(sk->sleep, &wait, TASK_INTERRUPTIBLE); } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sleep, &wait); + finish_wait(sk->sleep, &wait); return timeo; } diff --git a/net/ipv4/ah.c b/net/ipv4/ah.c index 7d22a42d29b1..efb9d1d8114a 100644 --- a/net/ipv4/ah.c +++ b/net/ipv4/ah.c @@ -68,8 +68,10 @@ static int ah_output(struct sk_buff *skb) char buf[60]; } tmp_iph; - if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) - return -EINVAL; + if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) { + err = -EINVAL; + goto error_nolock; + } spin_lock_bh(&x->lock); if ((err = xfrm_state_check_expire(x)) != 0) @@ -139,8 +141,10 @@ static int ah_output(struct sk_buff *skb) x->curlft.bytes += skb->len; x->curlft.packets++; spin_unlock_bh(&x->lock); - if ((skb->dst = dst_pop(dst)) == NULL) + if ((skb->dst = dst_pop(dst)) == NULL) { + err = -EHOSTUNREACH; goto error_nolock; + } return NET_XMIT_BYPASS; error: diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index daacb350ae84..f4d4a3d88306 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -510,11 +510,11 @@ void arp_send(int type, int ptype, u32 dest_ip, */ skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) - + dev->hard_header_len + 15, GFP_ATOMIC); + + LL_RESERVED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return; - skb_reserve(skb, (dev->hard_header_len+15)&~15); + skb_reserve(skb, LL_RESERVED_SPACE(dev)); skb->nh.raw = skb->data; arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); skb->dev = dev; diff --git a/net/ipv4/esp.c b/net/ipv4/esp.c index 66fbb44c6274..58817c2374e8 100644 --- a/net/ipv4/esp.c +++ b/net/ipv4/esp.c @@ -32,8 +32,10 @@ int esp_output(struct sk_buff *skb) } tmp_iph; /* First, if the skb is not checksummed, complete checksum. */ - if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) - return -EINVAL; + if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) { + err = -EINVAL; + goto error_nolock; + } spin_lock_bh(&x->lock); if ((err = xfrm_state_check_expire(x)) != 0) @@ -143,8 +145,10 @@ int esp_output(struct sk_buff *skb) x->curlft.bytes += skb->len; x->curlft.packets++; spin_unlock_bh(&x->lock); - if ((skb->dst = dst_pop(dst)) == NULL) + if ((skb->dst = dst_pop(dst)) == NULL) { + err = -EHOSTUNREACH; goto error_nolock; + } return NET_XMIT_BYPASS; error: @@ -259,7 +263,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) if (esp->conf.padlen) mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1); - return mtu + x->props.header_len + esp->auth.icv_full_len; + return mtu + x->props.header_len + esp->auth.icv_trunc_len; } void esp4_err(struct sk_buff *skb, u32 info) @@ -365,6 +369,7 @@ int esp_init_state(struct xfrm_state *x, void *args) if (x->props.mode) x->props.header_len += 20; x->data = esp; + x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len; return 0; error: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 1053e9093bee..05600080cf7b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -211,7 +211,7 @@ static int igmp_send_report(struct net_device *dev, u32 group, int type) return -1; } - skb=alloc_skb(IGMP_SIZE+dev->hard_header_len+15, GFP_ATOMIC); + skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; @@ -219,7 +219,7 @@ static int igmp_send_report(struct net_device *dev, u32 group, int type) skb->dst = &rt->u.dst; - skb_reserve(skb, (dev->hard_header_len+15)&~15); + skb_reserve(skb, LL_RESERVED_SPACE(dev)); skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index f9fa6a1cefcc..ec94140ff2cf 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -92,7 +92,7 @@ int ip_forward(struct sk_buff *skb) goto sr_failed; /* We are about to mangle packet. Copy it! */ - if (skb_cow(skb, rt->u.dst.dev->hard_header_len+rt->u.dst.header_len)) + if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) goto drop; iph = skb->nh.iph; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a42f7803f578..375786d1f583 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -273,7 +273,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int nt = (struct ip_tunnel*)dev->priv; nt->dev = dev; dev->init = ipgre_tunnel_init; - dev->features |= NETIF_F_DYNALLOC; memcpy(&nt->parms, parms, sizeof(*parms)); nt->parms.name[IFNAMSIZ-1] = '\0'; strcpy(dev->name, nt->parms.name); @@ -305,6 +304,7 @@ failed: static void ipgre_tunnel_destructor(struct net_device *dev) { if (dev != &ipgre_fb_tunnel_dev) { + kfree(dev); MOD_DEC_USE_COUNT; } } @@ -824,7 +824,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; - max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen; + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 171a96e87f30..3f6dead5daa2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -162,13 +162,13 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct dst_entry *dst = skb->dst; struct hh_cache *hh = dst->hh; struct net_device *dev = dst->dev; + int hh_len = LL_RESERVED_SPACE(dev); /* Be paranoid, rather than too clever. */ - if (unlikely(skb_headroom(skb) < dev->hard_header_len - && dev->hard_header)) { + if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) { struct sk_buff *skb2; - skb2 = skb_realloc_headroom(skb, (dev->hard_header_len&~15) + 16); + skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); if (skb2 == NULL) { kfree_skb(skb); return -ENOMEM; @@ -440,7 +440,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) iph = skb->nh.iph; - if (unlikely(iph->frag_off & htons(IP_DF))) { + if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dst_pmtu(&rt->u.dst))); kfree_skb(skb); @@ -572,7 +572,7 @@ slow_path: * Allocate buffer. */ - if ((skb2 = alloc_skb(len+hlen+rt->u.dst.dev->hard_header_len+16,GFP_ATOMIC)) == NULL) { + if ((skb2 = alloc_skb(len+hlen+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n")); err = -ENOMEM; goto fail; @@ -583,7 +583,7 @@ slow_path: */ ip_copy_metadata(skb2, skb); - skb_reserve(skb2, (rt->u.dst.dev->hard_header_len&~15)+16); + skb_reserve(skb2, LL_RESERVED_SPACE(rt->u.dst.dev)); skb_put(skb2, len + hlen); skb2->nh.raw = skb2->data; skb2->h.raw = skb2->data + hlen; @@ -771,7 +771,7 @@ int ip_append_data(struct sock *sk, exthdrlen = 0; mtu = inet->cork.fragsize; } - hh_len = (rt->u.dst.dev->hard_header_len&~15) + 16; + hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen; @@ -793,6 +793,19 @@ int ip_append_data(struct sock *sk, inet->cork.length += length; + /* So, what's going on in the loop below? + * + * We use calculated fragment length to generate chained skb, + * each of segments is IP fragment ready for sending to network after + * adding appropriate IP header. + * + * Mistake is: + * + * If mtu-fragheaderlen is not 0 modulo 8, we generate additional + * small fragment of length (mtu-fragheaderlen)%8, even though + * it is not necessary. Not a big bug, but needs a fix. + */ + if ((skb = skb_peek_tail(&sk->write_queue)) == NULL) goto alloc_new_skb; @@ -815,6 +828,15 @@ alloc_new_skb: alloclen = maxfraglen; else alloclen = datalen + fragheaderlen; + + /* The last fragment gets additional space at tail. + * Note, with MSG_MORE we overallocate on fragments, + * because we have no idea what fragment will be + * the last. + */ + if (datalen == length) + alloclen += rt->u.dst.trailer_len; + if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len + 15, @@ -967,7 +989,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if (!(rt->u.dst.dev->features&NETIF_F_SG)) return -EOPNOTSUPP; - hh_len = (rt->u.dst.dev->hard_header_len&~15)+16; + hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); mtu = inet->cork.fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); @@ -1088,6 +1110,16 @@ int ip_push_pending_frames(struct sock *sk) #endif } + /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow + * to fragment the frame generated here. No matter, what transforms + * how transforms change size of the packet, it will come out. + */ + if (inet->pmtudisc != IP_PMTUDISC_DO) + skb->local_df = 1; + + /* DF bit is set when we want to see DF on outgoing frames. + * If local_df is set too, we still allow to fragment this frame + * locally. */ if (inet->pmtudisc == IP_PMTUDISC_DO || (!skb_shinfo(skb)->frag_list && ip_dont_fragment(sk, &rt->u.dst))) df = htons(IP_DF); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 7493fcb4aeff..1add72640b70 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -656,7 +656,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d struct net_device *dev = d->dev; struct sk_buff *skb; struct bootp_pkt *b; - int hh_len = (dev->hard_header_len + 15) & ~15; + int hh_len = LL_RESERVED_SPACE(dev); struct iphdr *h; /* Allocate packet */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 45534b28807d..d145db8137b5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -242,7 +242,6 @@ struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) nt = (struct ip_tunnel*)dev->priv; nt->dev = dev; dev->init = ipip_tunnel_init; - dev->features |= NETIF_F_DYNALLOC; memcpy(&nt->parms, parms, sizeof(*parms)); nt->parms.name[IFNAMSIZ-1] = '\0'; strcpy(dev->name, nt->parms.name); @@ -274,6 +273,7 @@ failed: static void ipip_tunnel_destructor(struct net_device *dev) { if (dev != &ipip_fb_tunnel_dev) { + kfree(dev); MOD_DEC_USE_COUNT; } } @@ -616,7 +616,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) /* * Okay, now see if we can stuff it in the buffer as-is. */ - max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr)); + max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 174697063db3..b6c5c1c872c4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -182,6 +182,11 @@ static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) return (struct net_device_stats*)dev->priv; } +static void vif_dev_destructor(struct net_device *dev) +{ + kfree(dev); +} + static struct net_device *ipmr_reg_vif(struct vifctl *v) { @@ -205,7 +210,7 @@ struct net_device *ipmr_reg_vif(struct vifctl *v) dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; dev->get_stats = reg_vif_get_stats; - dev->features |= NETIF_F_DYNALLOC; + dev->destructor = vif_dev_destructor; if (register_netdevice(dev)) { kfree(dev); @@ -1178,7 +1183,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, return; } - encap += dev->hard_header_len; + encap += LL_RESERVED_SPACE(dev); if (skb_headroom(skb) < encap || skb_cloned(skb) || !last) skb2 = skb_realloc_headroom(skb, (encap + 15)&~15); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e5943997b1d9..7860d8d8a743 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1205,14 +1205,24 @@ void arpt_unregister_table(struct arpt_table *table) } /* The built-in targets: standard (NULL) and error. */ -static struct arpt_target arpt_standard_target -= { { NULL, NULL }, ARPT_STANDARD_TARGET, NULL, NULL, NULL }; -static struct arpt_target arpt_error_target -= { { NULL, NULL }, ARPT_ERROR_TARGET, arpt_error, NULL, NULL }; - -static struct nf_sockopt_ops arpt_sockopts -= { { NULL, NULL }, PF_INET, ARPT_BASE_CTL, ARPT_SO_SET_MAX+1, do_arpt_set_ctl, - ARPT_BASE_CTL, ARPT_SO_GET_MAX+1, do_arpt_get_ctl, 0, NULL }; +static struct arpt_target arpt_standard_target = { + .name = ARPT_STANDARD_TARGET, +}; + +static struct arpt_target arpt_error_target = { + .name = ARPT_ERROR_TARGET, + .target = arpt_error, +}; + +static struct nf_sockopt_ops arpt_sockopts = { + .pf = PF_INET, + .set_optmin = ARPT_BASE_CTL, + .set_optmax = ARPT_SO_SET_MAX+1, + .set = do_arpt_set_ctl, + .get_optmin = ARPT_BASE_CTL, + .get_optmax = ARPT_SO_GET_MAX+1, + .get = do_arpt_get_ctl, +}; #ifdef CONFIG_PROC_FS static inline int print_name(const struct arpt_table *t, diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 6cd6013e2ecb..a50d47639d27 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1339,11 +1339,12 @@ getorigdst(struct sock *sk, int optval, void *user, int *len) return -ENOENT; } -static struct nf_sockopt_ops so_getorigdst -= { { NULL, NULL }, PF_INET, - 0, 0, NULL, /* Setsockopts */ - SO_ORIGINAL_DST, SO_ORIGINAL_DST+1, &getorigdst, - 0, NULL }; +static struct nf_sockopt_ops so_getorigdst = { + .pf = PF_INET, + .get_optmin = SO_ORIGINAL_DST, + .get_optmax = SO_ORIGINAL_DST+1, + .get = &getorigdst, +}; #define NET_IP_CONNTRACK_MAX 2089 #define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max" @@ -1367,7 +1368,6 @@ static ctl_table ip_conntrack_dir_table[] = { { .ctl_name = NET_IPV4, .procname = "ipv4", - .maxlen = 0, .mode = 0555, .child = ip_conntrack_table }, @@ -1378,7 +1378,6 @@ static ctl_table ip_conntrack_root_table[] = { { .ctl_name = CTL_NET, .procname = "net", - .maxlen = 0, .mode = 0555, .child = ip_conntrack_dir_table }, diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index fa43bb82bcea..51a48f3f6be4 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -555,9 +555,7 @@ ipq_rcv_dev_event(struct notifier_block *this, } static struct notifier_block ipq_dev_notifier = { - ipq_rcv_dev_event, - NULL, - 0 + .notifier_call = ipq_rcv_dev_event, }; static int @@ -577,9 +575,7 @@ ipq_rcv_nl_event(struct notifier_block *this, } static struct notifier_block ipq_nl_notifier = { - ipq_rcv_nl_event, - NULL, - 0 + .notifier_call = ipq_rcv_nl_event, }; static int sysctl_maxlen = IPQ_QMAX_DEFAULT; @@ -601,7 +597,6 @@ static ctl_table ipq_dir_table[] = { { .ctl_name = NET_IPV4, .procname = "ipv4", - .maxlen = 0, .mode = 0555, .child = ipq_table }, @@ -612,7 +607,6 @@ static ctl_table ipq_root_table[] = { { .ctl_name = CTL_NET, .procname = "net", - .maxlen = 0, .mode = 0555, .child = ipq_dir_table }, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 3b46c237e28f..aecaa824d3fe 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1663,21 +1663,42 @@ icmp_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct ipt_target ipt_standard_target -= { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL }; -static struct ipt_target ipt_error_target -= { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL }; - -static struct nf_sockopt_ops ipt_sockopts -= { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl, - IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL }; - -static struct ipt_match tcp_matchstruct -= { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL }; -static struct ipt_match udp_matchstruct -= { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL }; -static struct ipt_match icmp_matchstruct -= { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL }; +static struct ipt_target ipt_standard_target = { + .name = IPT_STANDARD_TARGET, +}; + +static struct ipt_target ipt_error_target = { + .name = IPT_ERROR_TARGET, + .target = ipt_error, +}; + +static struct nf_sockopt_ops ipt_sockopts = { + .pf = PF_INET, + .set_optmin = IPT_BASE_CTL, + .set_optmax = IPT_SO_SET_MAX+1, + .set = do_ipt_set_ctl, + .get_optmin = IPT_BASE_CTL, + .get_optmax = IPT_SO_GET_MAX+1, + .get = do_ipt_get_ctl, +}; + +static struct ipt_match tcp_matchstruct = { + .name = "tcp", + .match = &tcp_match, + .checkentry = &tcp_checkentry, +}; + +static struct ipt_match udp_matchstruct = { + .name = "udp", + .match = &udp_match, + .checkentry = &udp_checkentry, +}; + +static struct ipt_match icmp_matchstruct = { + .name = "icmp", + .match = &icmp_match, + .checkentry = &icmp_checkentry, +}; #ifdef CONFIG_PROC_FS static inline int print_name(const struct ipt_table *t, diff --git a/net/ipv4/netfilter/ipfwadm_core.c b/net/ipv4/netfilter/ipfwadm_core.c index fe270f207026..00c2e60fb222 100644 --- a/net/ipv4/netfilter/ipfwadm_core.c +++ b/net/ipv4/netfilter/ipfwadm_core.c @@ -1315,9 +1315,7 @@ int ipfw_device_event(struct notifier_block *this, unsigned long event, void *pt } static struct notifier_block ipfw_dev_notifier={ - ipfw_device_event, - NULL, - 0 + .notifier_call = ipfw_device_event, }; #endif diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c index 0087dd88b834..0b7827279481 100644 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ b/net/ipv4/netfilter/ipt_DSCP.c @@ -88,8 +88,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_dscp_reg -= { { NULL, NULL }, "DSCP", target, checkentry, NULL, THIS_MODULE }; +static struct ipt_target ipt_dscp_reg = { + .name = "DSCP", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index c5e221a162d1..02fa43bc08b8 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -164,8 +164,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_ecn_reg -= { { NULL, NULL }, "ECN", target, checkentry, NULL, THIS_MODULE }; +static struct ipt_target ipt_ecn_reg = { + .name = "ECN", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 5c14f3357dcc..2b0dca70ddc7 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -350,9 +350,12 @@ static int ipt_log_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_log_reg -= { { NULL, NULL }, "LOG", ipt_log_target, ipt_log_checkentry, NULL, - THIS_MODULE }; +static struct ipt_target ipt_log_reg = { + .name = "LOG", + .target = ipt_log_target, + .checkentry = ipt_log_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c index 63a998d6c719..88ee79eee79d 100644 --- a/net/ipv4/netfilter/ipt_MARK.c +++ b/net/ipv4/netfilter/ipt_MARK.c @@ -46,8 +46,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_mark_reg -= { { NULL, NULL }, "MARK", target, checkentry, NULL, THIS_MODULE }; +static struct ipt_target ipt_mark_reg = { + .name = "MARK", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index fcbc2341447e..17123f5be547 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -167,20 +167,19 @@ static int masq_inet_event(struct notifier_block *this, } static struct notifier_block masq_dev_notifier = { - masq_device_event, - NULL, - 0 + .notifier_call = masq_device_event, }; static struct notifier_block masq_inet_notifier = { - masq_inet_event, - NULL, - 0 + .notifier_call = masq_inet_event, }; -static struct ipt_target masquerade -= { { NULL, NULL }, "MASQUERADE", masquerade_target, masquerade_check, NULL, - THIS_MODULE }; +static struct ipt_target masquerade = { + .name = "MASQUERADE", + .target = masquerade_target, + .checkentry = masquerade_check, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c index ba003e5a0dd1..7d4e28a407f1 100644 --- a/net/ipv4/netfilter/ipt_MIRROR.c +++ b/net/ipv4/netfilter/ipt_MIRROR.c @@ -157,9 +157,12 @@ static int ipt_mirror_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_mirror_reg -= { { NULL, NULL }, "MIRROR", ipt_mirror_target, ipt_mirror_checkentry, NULL, - THIS_MODULE }; +static struct ipt_target ipt_mirror_reg = { + .name = "MIRROR", + .target = ipt_mirror_target, + .checkentry = ipt_mirror_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 2e3b4de377da..133abf5395bf 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -96,9 +96,12 @@ redirect_target(struct sk_buff **pskb, return ip_nat_setup_info(ct, &newrange, hooknum); } -static struct ipt_target redirect_reg -= { { NULL, NULL }, "REDIRECT", redirect_target, redirect_check, NULL, - THIS_MODULE }; +static struct ipt_target redirect_reg = { + .name = "REDIRECT", + .target = redirect_target, + .checkentry = redirect_check, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 83181dbaa8e7..a2a9ec092a60 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -386,8 +386,12 @@ static int check(const char *tablename, return 1; } -static struct ipt_target ipt_reject_reg -= { { NULL, NULL }, "REJECT", reject, check, NULL, THIS_MODULE }; +static struct ipt_target ipt_reject_reg = { + .name = "REJECT", + .target = reject, + .checkentry = check, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index e8be04200d2a..637cf61d27e2 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -238,9 +238,12 @@ ipt_tcpmss_checkentry(const char *tablename, return 0; } -static struct ipt_target ipt_tcpmss_reg -= { { NULL, NULL }, "TCPMSS", - ipt_tcpmss_target, ipt_tcpmss_checkentry, NULL, THIS_MODULE }; +static struct ipt_target ipt_tcpmss_reg = { + .name = "TCPMSS", + .target = ipt_tcpmss_target, + .checkentry = ipt_tcpmss_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 90d7173f3d0b..05d9a727c122 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -76,8 +76,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_tos_reg -= { { NULL, NULL }, "TOS", target, checkentry, NULL, THIS_MODULE }; +static struct ipt_target ipt_tos_reg = { + .name = "TOS", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 109e45d17cdc..f96309864ec7 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -304,9 +304,11 @@ static int ipt_ulog_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_ulog_reg = - { {NULL, NULL}, "ULOG", ipt_ulog_target, ipt_ulog_checkentry, NULL, -THIS_MODULE +static struct ipt_target ipt_ulog_reg = { + .name = "ULOG", + .target = ipt_ulog_target, + .checkentry = ipt_ulog_checkentry, + .me = THIS_MODULE, }; static int __init init(void) diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index daff148233f6..61bdc7a39a37 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -87,8 +87,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match ah_match -= { { NULL, NULL }, "ah", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match ah_match = { + .name = "ah", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c index be3ab8af2cd4..5932a74bdd15 100644 --- a/net/ipv4/netfilter/ipt_conntrack.c +++ b/net/ipv4/netfilter/ipt_conntrack.c @@ -100,8 +100,12 @@ static int check(const char *tablename, return 1; } -static struct ipt_match conntrack_match -= { { NULL, NULL }, "conntrack", &match, &check, NULL, THIS_MODULE }; +static struct ipt_match conntrack_match = { + .name = "conntrack", + .match = &match, + .checkentry = &check, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c index dabee1a4885e..7d840322b99b 100644 --- a/net/ipv4/netfilter/ipt_dscp.c +++ b/net/ipv4/netfilter/ipt_dscp.c @@ -40,8 +40,12 @@ static int checkentry(const char *tablename, const struct ipt_ip *ip, return 1; } -static struct ipt_match dscp_match = { { NULL, NULL }, "dscp", &match, - &checkentry, NULL, THIS_MODULE }; +static struct ipt_match dscp_match = { + .name = "dscp", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 0a0f53910a06..58d12ff190f4 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -101,8 +101,12 @@ static int checkentry(const char *tablename, const struct ipt_ip *ip, return 1; } -static struct ipt_match ecn_match = { { NULL, NULL }, "ecn", &match, - &checkentry, NULL, THIS_MODULE }; +static struct ipt_match ecn_match = { + .name = "ecn", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c index 3cac00648448..46ca560f358b 100644 --- a/net/ipv4/netfilter/ipt_esp.c +++ b/net/ipv4/netfilter/ipt_esp.c @@ -87,8 +87,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match esp_match -= { { NULL, NULL }, "esp", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match esp_match = { + .name = "esp", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c index b722f7c211e4..7f0997fbef5e 100644 --- a/net/ipv4/netfilter/ipt_helper.c +++ b/net/ipv4/netfilter/ipt_helper.c @@ -89,8 +89,12 @@ static int check(const char *tablename, return 1; } -static struct ipt_match helper_match -= { { NULL, NULL }, "helper", &match, &check, NULL, THIS_MODULE }; +static struct ipt_match helper_match = { + .name = "helper", + .match = &match, + .checkentry = &check, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_length.c b/net/ipv4/netfilter/ipt_length.c index 0cc00f1172ff..91cf0a76a89c 100644 --- a/net/ipv4/netfilter/ipt_length.c +++ b/net/ipv4/netfilter/ipt_length.c @@ -38,8 +38,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match length_match -= { { NULL, NULL }, "length", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match length_match = { + .name = "length", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c index 6f8124194d37..515acef2279e 100644 --- a/net/ipv4/netfilter/ipt_limit.c +++ b/net/ipv4/netfilter/ipt_limit.c @@ -115,9 +115,12 @@ ipt_limit_checkentry(const char *tablename, return 1; } -static struct ipt_match ipt_limit_reg -= { { NULL, NULL }, "limit", ipt_limit_match, ipt_limit_checkentry, NULL, - THIS_MODULE }; +static struct ipt_match ipt_limit_reg = { + .name = "limit", + .match = ipt_limit_match, + .checkentry = ipt_limit_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c index b320e29b13ed..7a5ed1c5993e 100644 --- a/net/ipv4/netfilter/ipt_mac.c +++ b/net/ipv4/netfilter/ipt_mac.c @@ -47,8 +47,12 @@ ipt_mac_checkentry(const char *tablename, return 1; } -static struct ipt_match mac_match -= { { NULL, NULL }, "mac", &match, &ipt_mac_checkentry, NULL, THIS_MODULE }; +static struct ipt_match mac_match = { + .name = "mac", + .match = &match, + .checkentry = &ipt_mac_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c index 05066530ee5c..14154f02aa80 100644 --- a/net/ipv4/netfilter/ipt_mark.c +++ b/net/ipv4/netfilter/ipt_mark.c @@ -33,8 +33,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match mark_match -= { { NULL, NULL }, "mark", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match mark_match = { + .name = "mark", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c index 5efaf9df61b0..6b7bc044f58d 100644 --- a/net/ipv4/netfilter/ipt_multiport.c +++ b/net/ipv4/netfilter/ipt_multiport.c @@ -86,8 +86,12 @@ checkentry(const char *tablename, && multiinfo->count <= IPT_MULTI_PORTS; } -static struct ipt_match multiport_match -= { { NULL, NULL }, "multiport", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match multiport_match = { + .name = "multiport", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index acb9997276b4..30f458312892 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -176,8 +176,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match owner_match -= { { NULL, NULL }, "owner", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match owner_match = { + .name = "owner", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c index 2978c1df81ba..82f72776a02f 100644 --- a/net/ipv4/netfilter/ipt_physdev.c +++ b/net/ipv4/netfilter/ipt_physdev.c @@ -63,8 +63,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match physdev_match -= { { NULL, NULL }, "physdev", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match physdev_match = { + .name = "physdev", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_pkttype.c b/net/ipv4/netfilter/ipt_pkttype.c index 3b6ccd70e6f9..b59cd2fddb2a 100644 --- a/net/ipv4/netfilter/ipt_pkttype.c +++ b/net/ipv4/netfilter/ipt_pkttype.c @@ -42,8 +42,12 @@ static int checkentry(const char *tablename, return 1; } -static struct ipt_match pkttype_match -= { { NULL, NULL }, "pkttype", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match pkttype_match = { + .name = "pkttype", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c index 1849bf882dcf..026f1039dc9e 100644 --- a/net/ipv4/netfilter/ipt_state.c +++ b/net/ipv4/netfilter/ipt_state.c @@ -41,8 +41,12 @@ static int check(const char *tablename, return 1; } -static struct ipt_match state_match -= { { NULL, NULL }, "state", &match, &check, NULL, THIS_MODULE }; +static struct ipt_match state_match = { + .name = "state", + .match = &match, + .checkentry = &check, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_tcpmss.c b/net/ipv4/netfilter/ipt_tcpmss.c index 001f7a83777a..0f3e38b9951e 100644 --- a/net/ipv4/netfilter/ipt_tcpmss.c +++ b/net/ipv4/netfilter/ipt_tcpmss.c @@ -91,8 +91,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match tcpmss_match -= { { NULL, NULL }, "tcpmss", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match tcpmss_match = { + .name = "tcpmss", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c index 4f51305e1b5f..11c94302c774 100644 --- a/net/ipv4/netfilter/ipt_tos.c +++ b/net/ipv4/netfilter/ipt_tos.c @@ -34,8 +34,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match tos_match -= { { NULL, NULL }, "tos", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match tos_match = { + .name = "tos", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index f4227e553fe2..b9657e1d9200 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -57,8 +57,12 @@ static int checkentry(const char *tablename, const struct ipt_ip *ip, return 1; } -static struct ipt_match ttl_match = { { NULL, NULL }, "ttl", &match, - &checkentry, NULL, THIS_MODULE }; +static struct ipt_match ttl_match = { + .name = "ttl", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/netfilter/ipt_unclean.c b/net/ipv4/netfilter/ipt_unclean.c index 74c6b9f0cddf..6c96d2729d94 100644 --- a/net/ipv4/netfilter/ipt_unclean.c +++ b/net/ipv4/netfilter/ipt_unclean.c @@ -580,8 +580,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match unclean_match -= { { NULL, NULL }, "unclean", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match unclean_match = { + .name = "unclean", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 577db46dbe7d..96b160ea59e7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -280,7 +280,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, int length, if (flags&MSG_PROBE) goto out; - hh_len = (rt->u.dst.dev->hard_header_len&~15) + 16; + hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); skb = sock_alloc_send_skb(sk, length+hh_len+15, flags&MSG_DONTWAIT, &err); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d3ede686db2b..1bcb395fdd5b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -659,7 +659,7 @@ static int wait_for_tcp_connect(struct sock *sk, int flags, long *timeo_p) { struct tcp_opt *tp = tcp_sk(sk); struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); + DEFINE_WAIT(wait); while ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { if (sk->err) @@ -671,16 +671,14 @@ static int wait_for_tcp_connect(struct sock *sk, int flags, long *timeo_p) if (signal_pending(tsk)) return sock_intr_errno(*timeo_p); - __set_task_state(tsk, TASK_INTERRUPTIBLE); - add_wait_queue(sk->sleep, &wait); + prepare_to_wait(sk->sleep, &wait, TASK_INTERRUPTIBLE); tp->write_pending++; release_sock(sk); *timeo_p = schedule_timeout(*timeo_p); lock_sock(sk); - __set_task_state(tsk, TASK_RUNNING); - remove_wait_queue(sk->sleep, &wait); + finish_wait(sk->sleep, &wait); tp->write_pending--; } return 0; @@ -700,16 +698,15 @@ static int wait_for_tcp_memory(struct sock *sk, long *timeo) int err = 0; long vm_wait = 0; long current_timeo = *timeo; - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); if (tcp_memory_free(sk)) current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; - add_wait_queue(sk->sleep, &wait); for (;;) { set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); - set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(sk->sleep, &wait, TASK_INTERRUPTIBLE); if (sk->err || (sk->shutdown & SEND_SHUTDOWN)) goto do_error; @@ -740,8 +737,7 @@ static int wait_for_tcp_memory(struct sock *sk, long *timeo) *timeo = current_timeo; } out: - current->state = TASK_RUNNING; - remove_wait_queue(sk->sleep, &wait); + finish_wait(sk->sleep, &wait); return err; do_error: @@ -1374,11 +1370,9 @@ static void cleanup_rbuf(struct sock *sk, int copied) static long tcp_data_wait(struct sock *sk, long timeo) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); - add_wait_queue(sk->sleep, &wait); - - __set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(sk->sleep, &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags); release_sock(sk); @@ -1389,8 +1383,7 @@ static long tcp_data_wait(struct sock *sk, long timeo) lock_sock(sk); clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags); - remove_wait_queue(sk->sleep, &wait); - __set_current_state(TASK_RUNNING); + finish_wait(sk->sleep, &wait); return timeo; } @@ -2017,12 +2010,10 @@ void tcp_close(struct sock *sk, long timeout) if (timeout) { struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(sk->sleep, &wait); + DEFINE_WAIT(wait); do { - set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(sk->sleep, &wait, TASK_INTERRUPTIBLE); if (!closing(sk)) break; release_sock(sk); @@ -2030,8 +2021,7 @@ void tcp_close(struct sock *sk, long timeout) lock_sock(sk); } while (!signal_pending(tsk) && timeout); - tsk->state = TASK_RUNNING; - remove_wait_queue(sk->sleep, &wait); + finish_wait(sk->sleep, &wait); } adjudge_to_death: @@ -2191,7 +2181,7 @@ int tcp_disconnect(struct sock *sk, int flags) static int wait_for_connect(struct sock *sk, long timeo) { struct tcp_opt *tp = tcp_sk(sk); - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); int err; /* @@ -2208,9 +2198,8 @@ static int wait_for_connect(struct sock *sk, long timeo) * our exclusiveness temporarily when we get woken up without * having to remove and re-insert us on the wait queue. */ - add_wait_queue_exclusive(sk->sleep, &wait); for (;;) { - current->state = TASK_INTERRUPTIBLE; + prepare_to_wait_exclusive(sk->sleep, &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (!tp->accept_queue) timeo = schedule_timeout(timeo); @@ -2228,8 +2217,7 @@ static int wait_for_connect(struct sock *sk, long timeo) if (!timeo) break; } - current->state = TASK_RUNNING; - remove_wait_queue(sk->sleep, &wait); + finish_wait(sk->sleep, &wait); return err; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 57a64d5f6905..384ae4f412df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -334,11 +334,11 @@ void tcp_listen_wlock(void) write_lock(&tcp_lhash_lock); if (atomic_read(&tcp_lhash_users)) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); - add_wait_queue_exclusive(&tcp_lhash_wait, &wait); for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + prepare_to_wait_exclusive(&tcp_lhash_wait, + &wait, TASK_UNINTERRUPTIBLE); if (!atomic_read(&tcp_lhash_users)) break; write_unlock_bh(&tcp_lhash_lock); @@ -346,8 +346,7 @@ void tcp_listen_wlock(void) write_lock_bh(&tcp_lhash_lock); } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&tcp_lhash_wait, &wait); + finish_wait(&tcp_lhash_wait, &wait); } } @@ -853,11 +852,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ __sk_dst_set(sk, &rt->u.dst); tcp_v4_setup_caps(sk, &rt->u.dst); - - /* DAVEM REDPEN: This used to sit above forced ext_header_len = 0 - * above, it was real bug. Is this one correct? - */ - tp->ext_header_len += rt->u.dst.header_len; + tp->ext2_header_len = rt->u.dst.header_len; if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(inet->saddr, @@ -868,6 +863,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->id = tp->write_seq ^ jiffies; err = tcp_connect(sk); + rt = NULL; if (err) goto failure; @@ -1611,7 +1607,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->ext_header_len = 0; if (newinet->opt) newtp->ext_header_len = newinet->opt->optlen; - newtp->ext_header_len += dst->header_len; + newtp->ext2_header_len = dst->header_len; newinet->id = newtp->write_seq ^ jiffies; tcp_sync_mss(newsk, dst_pmtu(dst)); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6a6790d17267..fd3baeb58b8f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -570,7 +570,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) mss_now = tp->mss_clamp; /* Now subtract optional transport overhead */ - mss_now -= tp->ext_header_len; + mss_now -= tp->ext_header_len + tp->ext2_header_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ if (mss_now < 48) @@ -591,7 +591,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) int large_mss; large_mss = 65535 - tp->af_specific->net_header_len - - tp->ext_header_len - tp->tcp_header_len; + tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len; if (tp->max_window && large_mss > (tp->max_window>>1)) large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len); diff --git a/net/ipv4/xfrm_policy.c b/net/ipv4/xfrm_policy.c index 79293260fd16..ca17e04041e3 100644 --- a/net/ipv4/xfrm_policy.c +++ b/net/ipv4/xfrm_policy.c @@ -347,6 +347,7 @@ static void xfrm_policy_timer(unsigned long data) struct xfrm_policy *xp = (struct xfrm_policy*)data; unsigned long now = (unsigned long)xtime.tv_sec; long next = LONG_MAX; + u32 index; if (xp->dead) goto out; @@ -368,10 +369,11 @@ out: return; expired: + index = xp->index; xfrm_pol_put(xp); /* Not 100% correct. id can be recycled in theory */ - xp = xfrm_policy_byid(0, xp->index, 1); + xp = xfrm_policy_byid(0, index, 1); if (xp) { xfrm_policy_kill(xp); xfrm_pol_put(xp); @@ -894,6 +896,7 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, int i; int err; int header_len = 0; + int trailer_len = 0; dst = dst_prev = NULL; @@ -919,6 +922,7 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, local = xfrm[i]->props.saddr.xfrm4_addr; } header_len += xfrm[i]->props.header_len; + trailer_len += xfrm[i]->props.trailer_len; } if (remote != fl->fl4_dst) { @@ -945,6 +949,7 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics)); dst_prev->path = &rt->u.dst; @@ -964,6 +969,7 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, x->u.rt.rt_gateway = rt->rt_gateway; x->u.rt.rt_spec_dst = rt0->rt_spec_dst; header_len -= x->u.dst.xfrm->props.header_len; + trailer_len -= x->u.dst.xfrm->props.trailer_len; } *dst_p = dst; return 0; @@ -987,6 +993,7 @@ xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx int i; int err = 0; int header_len = 0; + int trailer_len = 0; dst = dst_prev = NULL; @@ -1012,6 +1019,7 @@ xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx local = (struct in6_addr*)&xfrm[i]->props.saddr; } header_len += xfrm[i]->props.header_len; + trailer_len += xfrm[i]->props.trailer_len; } if (ipv6_addr_cmp(remote, fl->fl6_dst)) { @@ -1038,6 +1046,7 @@ xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics)); dst_prev->path = &rt->u.dst; @@ -1054,6 +1063,7 @@ xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx x->u.rt6.rt6i_gateway = rt0->rt6i_gateway; memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); header_len -= x->u.dst.xfrm->props.header_len; + trailer_len -= x->u.dst.xfrm->props.trailer_len; } *dst_p = dst; return 0; @@ -1082,6 +1092,17 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u32 genid; u16 family = (*dst_p)->ops->family; + switch (family) { + case AF_INET: + if (!fl->fl4_src) + fl->fl4_src = rt->rt_src; + if (!fl->fl4_dst) + fl->fl4_dst = rt->rt_dst; + case AF_INET6: + /* Still not clear... */ + default: + } + restart: genid = xfrm_policy_genid; policy = NULL; @@ -1120,8 +1141,6 @@ restart: * is required only for output policy. */ if (family == AF_INET) { - fl->oif = rt->u.dst.dev->ifindex; - fl->fl4_src = rt->rt_src; read_lock_bh(&policy->lock); for (dst = policy->bundles; dst; dst = dst->next) { struct xfrm_dst *xdst = (struct xfrm_dst*)dst; @@ -1451,10 +1470,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (pol->action == XFRM_POLICY_ALLOW) { if (pol->xfrm_nr != 0) { struct sec_path *sp; + static struct sec_path dummy; int i, k; if ((sp = skb->sp) == NULL) - goto reject; + sp = &dummy; /* For each tmpl search corresponding xfrm. * Order is _important_. Later we will implement @@ -1462,6 +1482,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, * are implied between each two transformations. */ for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { + if (pol->xfrm_vec[i].optional) + continue; switch (family) { case AF_INET: k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k); diff --git a/net/ipv4/xfrm_state.c b/net/ipv4/xfrm_state.c index 5492bcfb85ba..6f9b2693c696 100644 --- a/net/ipv4/xfrm_state.c +++ b/net/ipv4/xfrm_state.c @@ -501,7 +501,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) { - int nhead = x->props.header_len + skb->dst->dev->hard_header_len + int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) - skb_headroom(skb); if (nhead > 0) diff --git a/net/ipv4/xfrm_user.c b/net/ipv4/xfrm_user.c index a94f5023cf21..28a44311a9c5 100644 --- a/net/ipv4/xfrm_user.c +++ b/net/ipv4/xfrm_user.c @@ -46,8 +46,14 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) algp = RTA_DATA(rt); switch (type) { case XFRMA_ALG_AUTH: + if (!algp->alg_key_len && + strcmp(algp->alg_name, "digest_null") != 0) + return -EINVAL; + break; + case XFRMA_ALG_CRYPT: - if (!algp->alg_key_len) + if (!algp->alg_key_len && + strcmp(algp->alg_name, "cipher_null") != 0) return -EINVAL; break; diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 6d1c166c23bd..3fec5c7e0093 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_IPV6) += ipv6.o -ipv6-objs := af_inet6.o ip6_output.o ip6_input.o addrconf.o sit.o \ +ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6008ffc6fbcd..b7501ff428ba 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -174,19 +174,13 @@ const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; int ipv6_addr_type(struct in6_addr *addr) { + int type; u32 st; st = addr->s6_addr32[0]; - /* Consider all addresses with the first three bits different of - 000 and 111 as unicasts. - */ - if ((st & htonl(0xE0000000)) != htonl(0x00000000) && - (st & htonl(0xE0000000)) != htonl(0xE0000000)) - return IPV6_ADDR_UNICAST; - if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { - int type = IPV6_ADDR_MULTICAST; + type = IPV6_ADDR_MULTICAST; switch((st & htonl(0x00FF0000))) { case __constant_htonl(0x00010000): @@ -203,29 +197,53 @@ int ipv6_addr_type(struct in6_addr *addr) }; return type; } + /* check for reserved anycast addresses */ + + if ((st & htonl(0xE0000000)) && + ((addr->s6_addr32[2] == htonl(0xFDFFFFFF) && + (addr->s6_addr32[3] | htonl(0x7F)) == (u32)~0) || + (addr->s6_addr32[2] == 0 && addr->s6_addr32[3] == 0))) + type = IPV6_ADDR_ANYCAST; + else + type = IPV6_ADDR_UNICAST; + + /* Consider all addresses with the first three bits different of + 000 and 111 as finished. + */ + if ((st & htonl(0xE0000000)) != htonl(0x00000000) && + (st & htonl(0xE0000000)) != htonl(0xE0000000)) + return type; if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) - return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST); + return (IPV6_ADDR_LINKLOCAL | type); if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) - return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST); + return (IPV6_ADDR_SITELOCAL | type); if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { if (addr->s6_addr32[2] == 0) { - if (addr->s6_addr32[3] == 0) + if (addr->in6_u.u6_addr32[3] == 0) return IPV6_ADDR_ANY; if (addr->s6_addr32[3] == htonl(0x00000001)) - return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST); + return (IPV6_ADDR_LOOPBACK | type); - return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST); + return (IPV6_ADDR_COMPATv4 | type); } if (addr->s6_addr32[2] == htonl(0x0000ffff)) return IPV6_ADDR_MAPPED; } - return IPV6_ADDR_RESERVED; + st &= htonl(0xFF000000); + if (st == 0) + return IPV6_ADDR_RESERVED; + st &= htonl(0xFE000000); + if (st == htonl(0x02000000)) + return IPV6_ADDR_RESERVED; /* for NSAP */ + if (st == htonl(0x04000000)) + return IPV6_ADDR_RESERVED; /* for IPX */ + return type; } static void addrconf_del_timer(struct inet6_ifaddr *ifp) @@ -261,7 +279,6 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, add_timer(&ifp->timer); } - /* Nobody refers to this device, we may destroy it. */ void in6_dev_finish_destroy(struct inet6_dev *idev) @@ -358,24 +375,91 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev) return idev; } +void ipv6_addr_prefix(struct in6_addr *prefix, + struct in6_addr *addr, int prefix_len) +{ + unsigned long mask; + int ncopy, nbits; + + memset(prefix, 0, sizeof(*prefix)); + + if (prefix_len <= 0) + return; + if (prefix_len > 128) + prefix_len = 128; + + ncopy = prefix_len / 32; + switch (ncopy) { + case 4: prefix->s6_addr32[3] = addr->s6_addr32[3]; + case 3: prefix->s6_addr32[2] = addr->s6_addr32[2]; + case 2: prefix->s6_addr32[1] = addr->s6_addr32[1]; + case 1: prefix->s6_addr32[0] = addr->s6_addr32[0]; + case 0: break; + } + nbits = prefix_len % 32; + if (nbits == 0) + return; + + mask = ~((1 << (32 - nbits)) - 1); + mask = htonl(mask); + + prefix->s6_addr32[ncopy] = addr->s6_addr32[ncopy] & mask; +} + + +static void dev_forward_change(struct inet6_dev *idev) +{ + struct net_device *dev; + struct inet6_ifaddr *ifa; + struct in6_addr addr; + + if (!idev) + return; + dev = idev->dev; + if (dev && (dev->flags & IFF_MULTICAST)) { + ipv6_addr_all_routers(&addr); + + if (idev->cnf.forwarding) + ipv6_dev_mc_inc(dev, &addr); + else + ipv6_dev_mc_dec(dev, &addr); + } + for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { + ipv6_addr_prefix(&addr, &ifa->addr, ifa->prefix_len); + if (addr.s6_addr32[0] == 0 && addr.s6_addr32[1] == 0 && + addr.s6_addr32[2] == 0 && addr.s6_addr32[3] == 0) + continue; + if (idev->cnf.forwarding) + ipv6_dev_ac_inc(idev->dev, &addr); + else + ipv6_dev_ac_dec(idev->dev, &addr); + } +} + + static void addrconf_forward_change(struct inet6_dev *idev) { struct net_device *dev; - if (idev) + if (idev) { + dev_forward_change(idev); return; + } read_lock(&dev_base_lock); for (dev=dev_base; dev; dev=dev->next) { read_lock(&addrconf_lock); idev = __in6_dev_get(dev); - if (idev) + if (idev) { idev->cnf.forwarding = ipv6_devconf.forwarding; + dev_forward_change(idev); + } read_unlock(&addrconf_lock); } read_unlock(&dev_base_lock); } + /* Nobody refers to this ifaddr, destroy it */ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) @@ -658,30 +742,20 @@ static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref) #define IPV6_GET_SADDR_MAXSCORE(score) (score) #endif -int ipv6_get_saddr(struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) +int ipv6_dev_get_saddr(struct net_device *dev, + struct in6_addr *daddr, struct in6_addr *saddr, int onlink) { - int scope; struct inet6_ifaddr *ifp = NULL; struct inet6_ifaddr *match = NULL; - struct net_device *dev = NULL; struct inet6_dev *idev; - struct rt6_info *rt; + int scope; int err; int hiscore = -1, score; - rt = (struct rt6_info *) dst; - if (rt) - dev = rt->rt6i_dev; - - scope = ipv6_addr_scope(daddr); - if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) { - /* - * route for the "all destinations on link" rule - * when no routers are present - */ + if (!onlink) + scope = ipv6_addr_scope(daddr); + else scope = IFA_LINK; - } /* * known dev @@ -782,6 +856,24 @@ out: return err; } + +int ipv6_get_saddr(struct dst_entry *dst, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct rt6_info *rt; + struct net_device *dev = NULL; + int onlink; + + rt = (struct rt6_info *) dst; + if (rt) + dev = rt->rt6i_dev; + + onlink = (rt && (rt->rt6i_flags & RTF_ALLONLINK)); + + return ipv6_dev_get_saddr(dev, daddr, saddr, onlink); +} + + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) { struct inet6_dev *idev; @@ -889,7 +981,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) /* Join to solicited addr multicast group. */ -static void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) +void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) { struct in6_addr maddr; @@ -900,7 +992,7 @@ static void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) ipv6_dev_mc_inc(dev, &maddr); } -static void addrconf_leave_solict(struct net_device *dev, struct in6_addr *addr) +void addrconf_leave_solict(struct net_device *dev, struct in6_addr *addr) { struct in6_addr maddr; @@ -1937,6 +2029,15 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); spin_unlock_bh(&ifp->lock); } + + if (ifp->idev->cnf.forwarding) { + struct in6_addr addr; + + ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); + if (addr.s6_addr32[0] || addr.s6_addr32[1] || + addr.s6_addr32[2] || addr.s6_addr32[3]) + ipv6_dev_ac_inc(ifp->idev->dev, &addr); + } } #ifdef CONFIG_PROC_FS @@ -2267,6 +2368,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) break; case RTM_DELADDR: addrconf_leave_solict(ifp->idev->dev, &ifp->addr); + if (ifp->idev->cnf.forwarding) { + struct in6_addr addr; + + ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); + if (addr.s6_addr32[0] || addr.s6_addr32[1] || + addr.s6_addr32[2] || addr.s6_addr32[3]) + ipv6_dev_ac_dec(ifp->idev->dev, &addr); + } if (!ipv6_chk_addr(&ifp->addr, NULL)) ip6_rt_addr_del(&ifp->addr, ifp->idev->dev); break; @@ -2289,11 +2398,7 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, struct inet6_dev *idev = NULL; if (valp != &ipv6_devconf.forwarding) { - struct net_device *dev = dev_get_by_index(ctl->ctl_name); - if (dev) { - idev = in6_dev_get(dev); - dev_put(dev); - } + idev = (struct inet6_dev *)ctl->extra1; if (idev == NULL) return ret; } else @@ -2303,8 +2408,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, if (*valp) rt6_purge_dflt_routers(0); - if (idev) - in6_dev_put(idev); } return ret; @@ -2491,6 +2594,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf for (i=0; t->addrconf_vars[i].data; i++) { t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; t->addrconf_vars[i].de = NULL; + t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ } if (dev) { t->addrconf_dev[0].procname = dev->name; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0de85e16ed37..ed048980e8a7 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -74,6 +74,7 @@ MODULE_PARM(unloadable, "i"); /* IPv6 procfs goodies... */ #ifdef CONFIG_PROC_FS +extern int anycast6_get_info(char *, char **, off_t, int); extern int raw6_get_info(char *, char **, off_t, int); extern int tcp6_get_info(char *, char **, off_t, int); extern int udp6_get_info(char *, char **, off_t, int); @@ -381,6 +382,9 @@ int inet6_release(struct socket *sock) /* Free mc lists */ ipv6_sock_mc_close(sk); + /* Free ac lists */ + ipv6_sock_ac_close(sk); + return inet_release(sock); } @@ -785,6 +789,8 @@ static int __init inet6_init(void) goto proc_sockstat6_fail; if (!proc_net_create("snmp6", 0, afinet6_get_snmp)) goto proc_snmp6_fail; + if (!proc_net_create("anycast6", 0, anycast6_get_info)) + goto proc_anycast6_fail; #endif ipv6_netdev_notif_init(); ipv6_packet_init(); @@ -800,6 +806,8 @@ static int __init inet6_init(void) return 0; #ifdef CONFIG_PROC_FS +proc_anycast6_fail: + proc_net_remove("anycast6"); proc_snmp6_fail: proc_net_remove("sockstat6"); proc_sockstat6_fail: @@ -837,6 +845,7 @@ static void inet6_exit(void) proc_net_remove("udp6"); proc_net_remove("sockstat6"); proc_net_remove("snmp6"); + proc_net_remove("anycast6"); #endif /* Cleanup code parts. */ sit_cleanup(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 778fa3b3c52f..468d94db3d29 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -60,9 +60,11 @@ int ah6_output(struct sk_buff *skb) struct ah_data *ahp; u16 nh_offset = 0; u8 nexthdr; -printk(KERN_DEBUG "%s\n", __FUNCTION__); - if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) - return -EINVAL; + + if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) { + err = -EINVAL; + goto error_nolock; + } spin_lock_bh(&x->lock); if ((err = xfrm_state_check_expire(x)) != 0) @@ -134,8 +136,10 @@ printk(KERN_DEBUG "%s\n", __FUNCTION__); x->curlft.bytes += skb->len; x->curlft.packets++; spin_unlock_bh(&x->lock); - if ((skb->dst = dst_pop(dst)) == NULL) + if ((skb->dst = dst_pop(dst)) == NULL) { + err = -EHOSTUNREACH; goto error_nolock; + } return NET_XMIT_BYPASS; error: spin_unlock_bh(&x->lock); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c new file mode 100644 index 000000000000..1fd038191f34 --- /dev/null +++ b/net/ipv6/anycast.c @@ -0,0 +1,489 @@ +/* + * Anycast support for IPv6 + * Linux INET6 implementation + * + * Authors: + * David L Stevens (dlstevens@us.ibm.com) + * + * based heavily on net/ipv6/mcast.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/random.h> +#include <linux/string.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/route.h> +#include <linux/init.h> +#include <linux/proc_fs.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/if_inet6.h> +#include <net/ndisc.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> + +#include <net/checksum.h> + +/* Big ac list lock for all the sockets */ +static rwlock_t ipv6_sk_ac_lock = RW_LOCK_UNLOCKED; + +/* XXX ip6_addr_match() and ip6_onlink() really belong in net/core.c */ + +static int +ip6_addr_match(struct in6_addr *addr1, struct in6_addr *addr2, int prefix) +{ + __u32 mask; + int i; + + if (prefix > 128 || prefix < 0) + return 0; + if (prefix == 0) + return 1; + for (i=0; i<4; ++i) { + if (prefix >= 32) + mask = ~0; + else + mask = htonl(~0 << (32 - prefix)); + if ((addr1->s6_addr32[i] ^ addr2->s6_addr32[i]) & mask) + return 0; + prefix -= 32; + if (prefix <= 0) + break; + } + return 1; +} + +static int +ip6_onlink(struct in6_addr *addr, struct net_device *dev) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + int onlink; + + onlink = 0; + read_lock(&addrconf_lock); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { + onlink = ip6_addr_match(addr, &ifa->addr, + ifa->prefix_len); + if (onlink) + break; + } + read_unlock_bh(&idev->lock); + } + read_unlock(&addrconf_lock); + return onlink; +} + + +/* + * socket join an anycast group + */ + +int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct net_device *dev = NULL; + struct inet6_dev *idev; + struct ipv6_ac_socklist *pac; + int ishost = !ipv6_devconf.forwarding; + int err = 0; + + if (ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST) + return -EINVAL; + + pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); + if (pac == NULL) + return -ENOMEM; + pac->acl_next = NULL; + ipv6_addr_copy(&pac->acl_addr, addr); + + if (ifindex == 0) { + struct rt6_info *rt; + + rt = rt6_lookup(addr, NULL, 0, 0); + if (rt) { + dev = rt->rt6i_dev; + dev_hold(dev); + dst_release(&rt->u.dst); + } else if (ishost) { + sock_kfree_s(sk, pac, sizeof(*pac)); + return -EADDRNOTAVAIL; + } else { + /* router, no matching interface: just pick one */ + + dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK); + } + } else + dev = dev_get_by_index(ifindex); + + if (dev == NULL) { + sock_kfree_s(sk, pac, sizeof(*pac)); + return -ENODEV; + } + + idev = in6_dev_get(dev); + if (!idev) { + sock_kfree_s(sk, pac, sizeof(*pac)); + dev_put(dev); + if (ifindex) + return -ENODEV; + else + return -EADDRNOTAVAIL; + } + /* reset ishost, now that we have a specific device */ + ishost = !idev->cnf.forwarding; + in6_dev_put(idev); + + pac->acl_ifindex = dev->ifindex; + + /* XXX + * For hosts, allow link-local or matching prefix anycasts. + * This obviates the need for propagating anycast routes while + * still allowing some non-router anycast participation. + * + * allow anyone to join anycasts that don't require a special route + * and can't be spoofs of unicast addresses (reserved anycast only) + */ + if (!ip6_onlink(addr, dev)) { + if (ishost) + err = -EADDRNOTAVAIL; + else if (!capable(CAP_NET_ADMIN)) + err = -EPERM; + if (err) { + sock_kfree_s(sk, pac, sizeof(*pac)); + dev_put(dev); + return err; + } + } else if (!(ipv6_addr_type(addr) & IPV6_ADDR_ANYCAST) && + !capable(CAP_NET_ADMIN)) + return -EPERM; + + err = ipv6_dev_ac_inc(dev, addr); + if (err) { + sock_kfree_s(sk, pac, sizeof(*pac)); + dev_put(dev); + return err; + } + + write_lock_bh(&ipv6_sk_ac_lock); + pac->acl_next = np->ipv6_ac_list; + np->ipv6_ac_list = pac; + write_unlock_bh(&ipv6_sk_ac_lock); + + dev_put(dev); + + return 0; +} + +/* + * socket leave an anycast group + */ +int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct net_device *dev; + struct ipv6_ac_socklist *pac, *prev_pac; + + write_lock_bh(&ipv6_sk_ac_lock); + prev_pac = 0; + for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { + if ((ifindex == 0 || pac->acl_ifindex == ifindex) && + ipv6_addr_cmp(&pac->acl_addr, addr) == 0) + break; + prev_pac = pac; + } + if (!pac) { + write_unlock_bh(&ipv6_sk_ac_lock); + return -ENOENT; + } + if (prev_pac) + prev_pac->acl_next = pac->acl_next; + else + np->ipv6_ac_list = pac->acl_next; + + write_unlock_bh(&ipv6_sk_ac_lock); + + dev = dev_get_by_index(pac->acl_ifindex); + if (dev) { + ipv6_dev_ac_dec(dev, &pac->acl_addr); + dev_put(dev); + } + sock_kfree_s(sk, pac, sizeof(*pac)); + return 0; +} + +void ipv6_sock_ac_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct net_device *dev = 0; + struct ipv6_ac_socklist *pac; + int prev_index; + + write_lock_bh(&ipv6_sk_ac_lock); + pac = np->ipv6_ac_list; + np->ipv6_ac_list = 0; + write_unlock_bh(&ipv6_sk_ac_lock); + + prev_index = 0; + while (pac) { + struct ipv6_ac_socklist *next = pac->acl_next; + + if (pac->acl_ifindex != prev_index) { + if (dev) + dev_put(dev); + dev = dev_get_by_index(pac->acl_ifindex); + prev_index = pac->acl_ifindex; + } + if (dev) + ipv6_dev_ac_dec(dev, &pac->acl_addr); + sock_kfree_s(sk, pac, sizeof(*pac)); + pac = next; + } + if (dev) + dev_put(dev); +} + +int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex) +{ + struct ipv6_ac_socklist *pac; + struct ipv6_pinfo *np = inet6_sk(sk); + int found; + + found = 0; + read_lock(&ipv6_sk_ac_lock); + for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) { + if (ifindex && pac->acl_ifindex != ifindex) + continue; + found = ipv6_addr_cmp(&pac->acl_addr, addr) == 0; + if (found) + break; + } + read_unlock(&ipv6_sk_ac_lock); + + return found; +} + +static void aca_put(struct ifacaddr6 *ac) +{ + if (atomic_dec_and_test(&ac->aca_refcnt)) { + in6_dev_put(ac->aca_idev); + kfree(ac); + } +} + +/* + * device anycast group inc (add if not found) + */ +int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) +{ + struct ifacaddr6 *aca; + struct inet6_dev *idev; + + idev = in6_dev_get(dev); + + if (idev == NULL) + return -EINVAL; + + write_lock_bh(&idev->lock); + if (idev->dead) { + write_unlock_bh(&idev->lock); + in6_dev_put(idev); + return -ENODEV; + } + + for (aca = idev->ac_list; aca; aca = aca->aca_next) { + if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0) { + aca->aca_users++; + write_unlock_bh(&idev->lock); + in6_dev_put(idev); + return 0; + } + } + + /* + * not found: create a new one. + */ + + aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC); + + if (aca == NULL) { + write_unlock_bh(&idev->lock); + in6_dev_put(idev); + return -ENOMEM; + } + + memset(aca, 0, sizeof(struct ifacaddr6)); + + ipv6_addr_copy(&aca->aca_addr, addr); + aca->aca_idev = idev; + aca->aca_users = 1; + atomic_set(&aca->aca_refcnt, 2); + aca->aca_lock = SPIN_LOCK_UNLOCKED; + + aca->aca_next = idev->ac_list; + idev->ac_list = aca; + write_unlock_bh(&idev->lock); + + ip6_rt_addr_add(&aca->aca_addr, dev); + + addrconf_join_solict(dev, &aca->aca_addr); + + aca_put(aca); + return 0; +} + +/* + * device anycast group decrement + */ +int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) +{ + struct inet6_dev *idev; + struct ifacaddr6 *aca, *prev_aca; + + idev = in6_dev_get(dev); + if (idev == NULL) + return -ENODEV; + + write_lock_bh(&idev->lock); + prev_aca = 0; + for (aca = idev->ac_list; aca; aca = aca->aca_next) { + if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0) + break; + prev_aca = aca; + } + if (!aca) { + write_unlock_bh(&idev->lock); + in6_dev_put(idev); + return -ENOENT; + } + if (--aca->aca_users > 0) { + write_unlock_bh(&idev->lock); + in6_dev_put(idev); + return 0; + } + if (prev_aca) + prev_aca->aca_next = aca->aca_next; + else + idev->ac_list = aca->aca_next; + write_unlock_bh(&idev->lock); + addrconf_leave_solict(dev, &aca->aca_addr); + + ip6_rt_addr_del(&aca->aca_addr, dev); + + aca_put(aca); + in6_dev_put(idev); + return 0; +} + +/* + * check if the interface has this anycast address + */ +static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) +{ + struct inet6_dev *idev; + struct ifacaddr6 *aca; + + idev = in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + for (aca = idev->ac_list; aca; aca = aca->aca_next) + if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0) + break; + read_unlock_bh(&idev->lock); + in6_dev_put(idev); + return aca != 0; + } + return 0; +} + +/* + * check if given interface (or any, if dev==0) has this anycast address + */ +int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr) +{ + if (dev) + return ipv6_chk_acast_dev(dev, addr); + read_lock(&dev_base_lock); + for (dev=dev_base; dev; dev=dev->next) + if (ipv6_chk_acast_dev(dev, addr)) + break; + read_unlock(&dev_base_lock); + return dev != 0; +} + + +#ifdef CONFIG_PROC_FS +int anycast6_get_info(char *buffer, char **start, off_t offset, int length) +{ + off_t pos=0, begin=0; + struct ifacaddr6 *im; + int len=0; + struct net_device *dev; + + read_lock(&dev_base_lock); + for (dev = dev_base; dev; dev = dev->next) { + struct inet6_dev *idev; + + if ((idev = in6_dev_get(dev)) == NULL) + continue; + + read_lock_bh(&idev->lock); + for (im = idev->ac_list; im; im = im->aca_next) { + int i; + + len += sprintf(buffer+len,"%-4d %-15s ", dev->ifindex, dev->name); + + for (i=0; i<16; i++) + len += sprintf(buffer+len, "%02x", im->aca_addr.s6_addr[i]); + + len += sprintf(buffer+len, " %5d\n", im->aca_users); + + pos=begin+len; + if (pos < offset) { + len=0; + begin=pos; + } + if (pos > offset+length) { + read_unlock_bh(&idev->lock); + in6_dev_put(idev); + goto done; + } + } + read_unlock_bh(&idev->lock); + in6_dev_put(idev); + } + +done: + read_unlock(&dev_base_lock); + + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + if (len<0) + len=0; + return len; +} + +#endif diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8443bb7b31b5..8dc3c0ebc083 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -118,10 +118,12 @@ int esp6_output(struct sk_buff *skb) int alen; int nfrags; u8 nexthdr; -printk(KERN_DEBUG "%s\n", __FUNCTION__); + /* First, if the skb is not checksummed, complete checksum. */ - if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) - return -EINVAL; + if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) { + err = -EINVAL; + goto error_nolock; + } spin_lock_bh(&x->lock); if ((err = xfrm_state_check_expire(x)) != 0) @@ -239,8 +241,10 @@ printk(KERN_DEBUG "%s\n", __FUNCTION__); x->curlft.bytes += skb->len; x->curlft.packets++; spin_unlock_bh(&x->lock); - if ((skb->dst = dst_pop(dst)) == NULL) + if ((skb->dst = dst_pop(dst)) == NULL) { + err = -EHOSTUNREACH; goto error_nolock; + } return NET_XMIT_BYPASS; error: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 436a2f65525f..0cab9069f353 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -369,7 +369,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb) saddr = &skb->nh.ipv6h->daddr; - if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST) + if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST || + ipv6_chk_acast_addr(0, saddr)) saddr = NULL; msg.icmph.icmp6_type = ICMPV6_ECHO_REPLY; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4fabfaf06008..cf1356a96514 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -358,6 +358,24 @@ done: retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); break; } + case IPV6_JOIN_ANYCAST: + case IPV6_LEAVE_ANYCAST: + { + struct ipv6_mreq mreq; + + if (optlen != sizeof(struct ipv6_mreq)) + goto e_inval; + + retv = -EFAULT; + if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) + break; + + if (optname == IPV6_JOIN_ANYCAST) + retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); + else + retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); + break; + } case IPV6_ROUTER_ALERT: retv = ip6_ra_control(sk, val, NULL); break; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 331a6fba4600..0933086c5c5f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -413,10 +413,13 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, struct in6_addr *daddr, struct in6_addr *solicited_addr, int router, int solicited, int override, int inc_opt) { + static struct in6_addr tmpaddr; + struct inet6_ifaddr *ifp; struct flowi fl; struct rt6_info *rt = NULL; struct dst_entry* dst; struct sock *sk = ndisc_socket->sk; + struct in6_addr *src_addr; struct nd_msg *msg; int len; struct sk_buff *skb; @@ -428,7 +431,18 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, if (!rt) return; - ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, solicited_addr, daddr); + /* for anycast or proxy, solicited_addr != src_addr */ + ifp = ipv6_get_ifaddr(solicited_addr, dev); + if (ifp) { + src_addr = solicited_addr; + in6_ifa_put(ifp); + } else { + if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr, 0)) + return; + src_addr = &tmpaddr; + } + + ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr); ndisc_rt_init(rt, dev, neigh); dst = (struct dst_entry*)rt; @@ -456,7 +470,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, } skb_reserve(skb, (dev->hard_header_len + 15) & ~15); - ip6_nd_hdr(sk, skb, dev, solicited_addr, daddr, IPPROTO_ICMPV6, len); + ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len); skb->h.raw = (unsigned char*) msg = (struct nd_msg *) skb_put(skb, len); @@ -470,13 +484,13 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, msg->icmph.icmp6_override = !!override; /* Set the target address. */ - ipv6_addr_copy(&msg->target, solicited_addr); + ipv6_addr_copy(&msg->target, src_addr); if (inc_opt) ndisc_fill_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, dev->addr_len); /* checksum */ - msg->icmph.icmp6_cksum = csum_ipv6_magic(solicited_addr, daddr, len, + msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len, IPPROTO_ICMPV6, csum_partial((__u8 *) msg, len, 0)); @@ -793,6 +807,50 @@ void ndisc_recv_ns(struct sk_buff *skb) } } in6_ifa_put(ifp); + } else if (ipv6_chk_acast_addr(dev, &msg->target)) { + struct inet6_dev *idev = in6_dev_get(dev); + int addr_type = ipv6_addr_type(saddr); + + /* anycast */ + + if (!idev) { + /* XXX: count this drop? */ + return; + } + + if (addr_type == IPV6_ADDR_ANY) { + struct in6_addr maddr; + + ipv6_addr_all_nodes(&maddr); + ndisc_send_na(dev, NULL, &maddr, &msg->target, + idev->cnf.forwarding, 0, 0, 1); + in6_dev_put(idev); + return; + } + + if (addr_type & IPV6_ADDR_UNICAST) { + int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST; + if (inc) + nd_tbl.stats.rcv_probes_mcast++; + else + nd_tbl.stats.rcv_probes_ucast++; + + /* + * update / create cache entry + * for the source adddress + */ + + neigh = neigh_event_ns(&nd_tbl, lladdr, saddr, skb->dev); + + if (neigh || !dev->hard_header) { + ndisc_send_na(dev, neigh, saddr, + &msg->target, + idev->cnf.forwarding, 1, 0, inc); + if (neigh) + neigh_release(neigh); + } + } + in6_dev_put(idev); } else { struct inet6_dev *in6_dev = in6_dev_get(dev); int addr_type = ipv6_addr_type(saddr); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 198177c08fde..72d2e0c08dfc 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -558,9 +558,7 @@ ipq_rcv_dev_event(struct notifier_block *this, } static struct notifier_block ipq_dev_notifier = { - ipq_rcv_dev_event, - NULL, - 0 + .notifier_call = ipq_rcv_dev_event, }; static int @@ -580,9 +578,7 @@ ipq_rcv_nl_event(struct notifier_block *this, } static struct notifier_block ipq_nl_notifier = { - ipq_rcv_nl_event, - NULL, - 0 + .notifier_call = ipq_rcv_nl_event, }; static int sysctl_maxlen = IPQ_QMAX_DEFAULT; @@ -604,7 +600,6 @@ static ctl_table ipq_dir_table[] = { { .ctl_name = NET_IPV6, .procname = "ipv6", - .maxlen = 0, .mode = 0555, .child = ipq_table }, @@ -615,7 +610,6 @@ static ctl_table ipq_root_table[] = { { .ctl_name = CTL_NET, .procname = "net", - .maxlen = 0, .mode = 0555, .child = ipq_dir_table }, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4eb41575b63e..d7727f3426d1 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1735,21 +1735,42 @@ icmp6_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct ip6t_target ip6t_standard_target -= { { NULL, NULL }, IP6T_STANDARD_TARGET, NULL, NULL, NULL }; -static struct ip6t_target ip6t_error_target -= { { NULL, NULL }, IP6T_ERROR_TARGET, ip6t_error, NULL, NULL }; - -static struct nf_sockopt_ops ip6t_sockopts -= { { NULL, NULL }, PF_INET6, IP6T_BASE_CTL, IP6T_SO_SET_MAX+1, do_ip6t_set_ctl, - IP6T_BASE_CTL, IP6T_SO_GET_MAX+1, do_ip6t_get_ctl, 0, NULL }; - -static struct ip6t_match tcp_matchstruct -= { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL }; -static struct ip6t_match udp_matchstruct -= { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL }; -static struct ip6t_match icmp6_matchstruct -= { { NULL, NULL }, "icmp6", &icmp6_match, &icmp6_checkentry, NULL }; +static struct ip6t_target ip6t_standard_target = { + .name = IP6T_STANDARD_TARGET, +}; + +static struct ip6t_target ip6t_error_target = { + .name = IP6T_ERROR_TARGET, + .target = ip6t_error, +}; + +static struct nf_sockopt_ops ip6t_sockopts = { + .pf = PF_INET6, + .set_optmin = IP6T_BASE_CTL, + .set_optmax = IP6T_SO_SET_MAX+1, + .set = do_ip6t_set_ctl, + .get_optmin = IP6T_BASE_CTL, + .get_optmax = IP6T_SO_GET_MAX+1, + .get = do_ip6t_get_ctl, +}; + +static struct ip6t_match tcp_matchstruct = { + .name = "tcp", + .match = &tcp_match, + .checkentry = &tcp_checkentry, +}; + +static struct ip6t_match udp_matchstruct = { + .name = "udp", + .match = &udp_match, + .checkentry = &udp_checkentry, +}; + +static struct ip6t_match icmp6_matchstruct = { + .name = "icmp6", + .match = &icmp6_match, + .checkentry = &icmp6_checkentry, +}; #ifdef CONFIG_PROC_FS static inline int print_name(const struct ip6t_table *t, diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 427cee7dedac..ca22024b7b5a 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -26,17 +26,6 @@ struct ahhdr { __u32 spi; }; -int ipv6_ext_hdr(u8 nexthdr) -{ - return ( (nexthdr == NEXTHDR_HOP) || - (nexthdr == NEXTHDR_ROUTING) || - (nexthdr == NEXTHDR_FRAGMENT) || - (nexthdr == NEXTHDR_AUTH) || - (nexthdr == NEXTHDR_ESP) || - (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); -} - /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline int spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) @@ -79,7 +68,7 @@ match(const struct sk_buff *skb, len = skb->len - ptr; temp = 0; - while (ipv6_ext_hdr(nexthdr)) { + while (ip6t_ext_hdr(nexthdr)) { struct ipv6_opt_hdr *hdr; DEBUGP("ipv6_ah header iteration \n"); @@ -200,8 +189,12 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match ah_match -= { { NULL, NULL }, "ah", &match, &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match ah_match = { + .name = "ah", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index 861a83797ece..1032865cdaf0 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -29,17 +29,6 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); #define DEBUGP(format, args...) #endif -int ipv6_ext_hdr(u8 nexthdr) -{ - return ( (nexthdr == NEXTHDR_HOP) || - (nexthdr == NEXTHDR_ROUTING) || - (nexthdr == NEXTHDR_FRAGMENT) || - (nexthdr == NEXTHDR_AUTH) || - (nexthdr == NEXTHDR_ESP) || - (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); -} - /* * (Type & 0xC0) >> 6 * 0 -> ignorable @@ -84,7 +73,7 @@ match(const struct sk_buff *skb, len = skb->len - ptr; temp = 0; - while (ipv6_ext_hdr(nexthdr)) { + while (ip6t_ext_hdr(nexthdr)) { struct ipv6_opt_hdr *hdr; DEBUGP("ipv6_opts header iteration \n"); @@ -265,12 +254,15 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match opts_match +static struct ip6t_match opts_match = { #if HOPBYHOP -= { { NULL, NULL }, "hbh", &match, &checkentry, NULL, THIS_MODULE }; + .name = "hbh", #else -= { { NULL, NULL }, "dst", &match, &checkentry, NULL, THIS_MODULE }; + .name = "dst", #endif + .match = &match, + .checkentry = &checkentry, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index d889411fbe81..828a91510ffe 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -23,17 +23,6 @@ struct esphdr { __u32 spi; }; -int ipv6_ext_hdr(u8 nexthdr) -{ - return ( (nexthdr == NEXTHDR_HOP) || - (nexthdr == NEXTHDR_ROUTING) || - (nexthdr == NEXTHDR_FRAGMENT) || - (nexthdr == NEXTHDR_AUTH) || - (nexthdr == NEXTHDR_ESP) || - (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); -} - /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline int spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) @@ -74,7 +63,7 @@ match(const struct sk_buff *skb, len = skb->len - ptr; temp = 0; - while (ipv6_ext_hdr(nexthdr)) { + while (ip6t_ext_hdr(nexthdr)) { struct ipv6_opt_hdr *hdr; int hdrlen; @@ -168,8 +157,12 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match esp_match -= { { NULL, NULL }, "esp", &match, &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match esp_match = { + .name = "esp", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 806f2c8769d6..b5b7c07107f3 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -69,8 +69,12 @@ ip6t_eui64_checkentry(const char *tablename, return 1; } -static struct ip6t_match eui64_match -= { { NULL, NULL }, "eui64", &match, &ip6t_eui64_checkentry, NULL, THIS_MODULE }; +static struct ip6t_match eui64_match = { + .name = "eui64", + .match = &match, + .checkentry = &ip6t_eui64_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 2adbc51ec44e..544f07c879d2 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -44,17 +44,6 @@ struct fraghdr { __u32 id; }; -int ipv6_ext_hdr(u8 nexthdr) -{ - return ( (nexthdr == NEXTHDR_HOP) || - (nexthdr == NEXTHDR_ROUTING) || - (nexthdr == NEXTHDR_FRAGMENT) || - (nexthdr == NEXTHDR_AUTH) || - (nexthdr == NEXTHDR_ESP) || - (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); -} - /* Returns 1 if the id is matched by the range, 0 otherwise */ static inline int id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) @@ -93,7 +82,7 @@ match(const struct sk_buff *skb, len = skb->len - ptr; temp = 0; - while (ipv6_ext_hdr(nexthdr)) { + while (ip6t_ext_hdr(nexthdr)) { struct ipv6_opt_hdr *hdr; DEBUGP("ipv6_frag header iteration \n"); @@ -232,8 +221,12 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match frag_match -= { { NULL, NULL }, "frag", &match, &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match frag_match = { + .name = "frag", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index a7effeb501e1..aed55d7bca37 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -29,17 +29,6 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); #define DEBUGP(format, args...) #endif -int ipv6_ext_hdr(u8 nexthdr) -{ - return ( (nexthdr == NEXTHDR_HOP) || - (nexthdr == NEXTHDR_ROUTING) || - (nexthdr == NEXTHDR_FRAGMENT) || - (nexthdr == NEXTHDR_AUTH) || - (nexthdr == NEXTHDR_ESP) || - (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); -} - /* * (Type & 0xC0) >> 6 * 0 -> ignorable @@ -84,7 +73,7 @@ match(const struct sk_buff *skb, len = skb->len - ptr; temp = 0; - while (ipv6_ext_hdr(nexthdr)) { + while (ip6t_ext_hdr(nexthdr)) { struct ipv6_opt_hdr *hdr; DEBUGP("ipv6_opts header iteration \n"); @@ -265,12 +254,16 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match opts_match +static struct ip6t_match opts_match = { #if HOPBYHOP -= { { NULL, NULL }, "hbh", &match, &checkentry, NULL, THIS_MODULE }; + .name = "hbh", #else -= { { NULL, NULL }, "dst", &match, &checkentry, NULL, THIS_MODULE }; + .name = "dst", #endif + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index 7a780255662f..c9f79395e3b6 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -56,8 +56,12 @@ static int checkentry(const char *tablename, const struct ip6t_ip6 *ip, return 1; } -static struct ip6t_match hl_match = { { NULL, NULL }, "hl", &match, - &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match hl_match = { + .name = "hl", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 1e6d85d751b5..66a0bbd843c2 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -24,17 +24,6 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); #define DEBUGP(format, args...) #endif -int ipv6_ext_hdr(u8 nexthdr) -{ - return ( (nexthdr == NEXTHDR_HOP) || - (nexthdr == NEXTHDR_ROUTING) || - (nexthdr == NEXTHDR_FRAGMENT) || - (nexthdr == NEXTHDR_AUTH) || - (nexthdr == NEXTHDR_ESP) || - (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); -} - static int ipv6header_match(const struct sk_buff *skb, const struct net_device *in, @@ -95,7 +84,7 @@ ipv6header_match(const struct sk_buff *skb, temp = 0; - while (ipv6_ext_hdr(nexthdr)) { + while (ip6t_ext_hdr(nexthdr)) { struct ipv6_opt_hdr *hdr; int hdrlen; @@ -196,14 +185,12 @@ ipv6header_destroy(void *matchinfo, return; } -static struct ip6t_match -ip6t_ipv6header_match = { - { NULL, NULL }, - "ipv6header", - &ipv6header_match, - &ipv6header_checkentry, - &ipv6header_destroy, - THIS_MODULE +static struct ip6t_match ip6t_ipv6header_match = { + .name = "ipv6header", + .match = &ipv6header_match, + .checkentry = &ipv6header_checkentry, + .destroy = &ipv6header_destroy, + .me = THIS_MODULE, }; static int __init ipv6header_init(void) diff --git a/net/ipv6/netfilter/ip6t_length.c b/net/ipv6/netfilter/ip6t_length.c index 3e6035d2784d..1d67d5034b1e 100644 --- a/net/ipv6/netfilter/ip6t_length.c +++ b/net/ipv6/netfilter/ip6t_length.c @@ -34,8 +34,12 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match length_match -= { { NULL, NULL }, "length", &match, &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match length_match = { + .name = "length", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_limit.c b/net/ipv6/netfilter/ip6t_limit.c index ab6aed1f9f63..38dc46f14030 100644 --- a/net/ipv6/netfilter/ip6t_limit.c +++ b/net/ipv6/netfilter/ip6t_limit.c @@ -115,9 +115,12 @@ ip6t_limit_checkentry(const char *tablename, return 1; } -static struct ip6t_match ip6t_limit_reg -= { { NULL, NULL }, "limit", ip6t_limit_match, ip6t_limit_checkentry, NULL, - THIS_MODULE }; +static struct ip6t_match ip6t_limit_reg = { + .name = "limit", + .match = ip6t_limit_match, + .checkentry = ip6t_limit_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c index e4771d3ce81d..60b607ca7518 100644 --- a/net/ipv6/netfilter/ip6t_mac.c +++ b/net/ipv6/netfilter/ip6t_mac.c @@ -47,8 +47,12 @@ ip6t_mac_checkentry(const char *tablename, return 1; } -static struct ip6t_match mac_match -= { { NULL, NULL }, "mac", &match, &ip6t_mac_checkentry, NULL, THIS_MODULE }; +static struct ip6t_match mac_match = { + .name = "mac", + .match = &match, + .checkentry = &ip6t_mac_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_mark.c b/net/ipv6/netfilter/ip6t_mark.c index 6a7b61cc0b99..3b728a9ebaef 100644 --- a/net/ipv6/netfilter/ip6t_mark.c +++ b/net/ipv6/netfilter/ip6t_mark.c @@ -33,8 +33,12 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match mark_match -= { { NULL, NULL }, "mark", &match, &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match mark_match = { + .name = "mark", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c index 0d1074422301..ea39654f9e85 100644 --- a/net/ipv6/netfilter/ip6t_multiport.c +++ b/net/ipv6/netfilter/ip6t_multiport.c @@ -84,8 +84,12 @@ checkentry(const char *tablename, && multiinfo->count <= IP6T_MULTI_PORTS; } -static struct ip6t_match multiport_match -= { { NULL, NULL }, "multiport", &match, &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match multiport_match = { + .name = "multiport", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 544543dc0a93..4a9f0bee30c4 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -142,8 +142,12 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match owner_match -= { { NULL, NULL }, "owner", &match, &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match owner_match = { + .name = "owner", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index ecdb2eed2636..b5823593c02f 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -21,17 +21,6 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); #define DEBUGP(format, args...) #endif -int ipv6_ext_hdr(u8 nexthdr) -{ - return ( (nexthdr == NEXTHDR_HOP) || - (nexthdr == NEXTHDR_ROUTING) || - (nexthdr == NEXTHDR_FRAGMENT) || - (nexthdr == NEXTHDR_AUTH) || - (nexthdr == NEXTHDR_ESP) || - (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); -} - /* Returns 1 if the id is matched by the range, 0 otherwise */ static inline int segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) @@ -71,7 +60,7 @@ match(const struct sk_buff *skb, len = skb->len - ptr; temp = 0; - while (ipv6_ext_hdr(nexthdr)) { + while (ip6t_ext_hdr(nexthdr)) { struct ipv6_opt_hdr *hdr; DEBUGP("ipv6_rt header iteration \n"); @@ -287,8 +276,12 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match rt_match -= { { NULL, NULL }, "rt", &match, &checkentry, NULL, THIS_MODULE }; +static struct ip6t_match rt_match = { + .name = "rt", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 652066452aa9..3b3490325222 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -181,7 +181,6 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int nt = (struct ip_tunnel*)dev->priv; nt->dev = dev; dev->init = ipip6_tunnel_init; - dev->features |= NETIF_F_DYNALLOC; memcpy(&nt->parms, parms, sizeof(*parms)); nt->parms.name[IFNAMSIZ-1] = '\0'; strcpy(dev->name, nt->parms.name); @@ -213,6 +212,7 @@ failed: static void ipip6_tunnel_destructor(struct net_device *dev) { if (dev != &ipip6_fb_tunnel_dev) { + kfree(dev); MOD_DEC_USE_COUNT; } } @@ -552,7 +552,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) /* * Okay, now see if we can stuff it in the buffer as-is. */ - max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr)); + max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index c7782f1774ad..aad0321c1469 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -372,6 +372,11 @@ static void irda_task_timer_expired(void *data) irda_task_kick(task); } +static void irda_device_destructor(struct net_device *dev) +{ + kfree(dev); +} + /* * Function irda_device_setup (dev) * @@ -385,8 +390,7 @@ int irda_device_setup(struct net_device *dev) dev->hard_header_len = 0; dev->addr_len = 0; - dev->features |= NETIF_F_DYNALLOC; - /* dev->destructor = irda_device_destructor; */ + dev->destructor = irda_device_destructor; dev->type = ARPHRD_IRDA; dev->tx_queue_len = 8; /* Window size + 1 s-frame */ diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 3776b71b5b90..8639f6a5ea07 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -62,16 +62,6 @@ int irlan_eth_init(struct net_device *dev) dev->get_stats = irlan_eth_get_stats; dev->set_multicast_list = irlan_eth_set_multicast_list; - /* NETIF_F_DYNALLOC feature was set by irlan_eth_init() and would - * cause the unregister_netdev() to do asynch completion _and_ - * kfree self->dev afterwards. Which is really bad because the - * netdevice was not allocated separately but is embedded in - * our control block and therefore gets freed with *self. - * The only reason why this would have been enabled is to hide - * some netdev refcount issues. If unregister_netdev() blocks - * forever, tell us about it... */ - //dev->features |= NETIF_F_DYNALLOC; - ether_setup(dev); /* diff --git a/net/key/af_key.c b/net/key/af_key.c index 32f49d659275..eb93b99b3379 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -900,6 +900,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, return ERR_PTR(-EINVAL); key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; if (key != NULL && + sa->sadb_sa_auth != SADB_X_AALG_NULL && ((key->sadb_key_bits+7) / 8 == 0 || (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t))) return ERR_PTR(-EINVAL); diff --git a/net/netsyms.c b/net/netsyms.c index 3efc8182aa99..29bf1aa61b0d 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -547,6 +547,8 @@ EXPORT_SYMBOL(register_netdevice); EXPORT_SYMBOL(unregister_netdevice); EXPORT_SYMBOL(netdev_state_change); EXPORT_SYMBOL(dev_new_index); +EXPORT_SYMBOL(dev_get_by_flags); +EXPORT_SYMBOL(__dev_get_by_flags); EXPORT_SYMBOL(dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(dev_get_by_name); diff --git a/net/nonet.c b/net/nonet.c new file mode 100644 index 000000000000..ffaf8363f74f --- /dev/null +++ b/net/nonet.c @@ -0,0 +1,28 @@ +/* + * net/nonet.c + * + * Dummy functions to allow us to configure network support entirely + * out of the kernel. + * + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) Matthew Wilcox 2003 + */ + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> + +void __init sock_init(void) +{ + printk(KERN_INFO "Linux NoNET1.0 for Linux 2.6\n"); +} + +static int sock_no_open(struct inode *irrelevant, struct file *dontcare) +{ + return -ENXIO; +} + +struct file_operations bad_sock_fops = { + .open = sock_no_open, +}; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 82533ec23d0a..7eb8564e63c6 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -508,7 +508,7 @@ static void sch_atm_dequeue(unsigned long data) ATM_SKB(skb)->vcc = flow->vcc; memcpy(skb_push(skb,flow->hdr_len),flow->hdr, flow->hdr_len); - atomic_add(skb->truesize,&flow->vcc->tx_inuse); + atomic_add(skb->truesize,&flow->vcc->sk->wmem_alloc); ATM_SKB(skb)->iovcnt = 0; /* atm.atm_options are already set by atm_tc_enqueue */ (void) flow->vcc->send(flow->vcc,skb); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 230b5602004d..ff98ab3f7fda 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -181,7 +181,7 @@ sctp_association_t *sctp_association_init(sctp_association_t *asoc, else asoc->rwnd = sk->rcvbuf; - asoc->a_rwnd = 0; + asoc->a_rwnd = asoc->rwnd; asoc->rwnd_over = 0; @@ -360,9 +360,25 @@ static void sctp_association_destroy(sctp_association_t *asoc) } } +/* Change the primary destination address for the peer. */ +void sctp_assoc_set_primary(struct sctp_association *asoc, + struct sctp_transport *transport) +{ + asoc->peer.primary_path = transport; + + /* Set a default msg_name for events. */ + memcpy(&asoc->peer.primary_addr, &transport->ipaddr, + sizeof(union sctp_addr)); + + /* If the primary path is changing, assume that the + * user wants to use this new path. + */ + if (transport->active) + asoc->peer.active_path = transport; +} /* Add a transport address to an association. */ -struct sctp_transport *sctp_assoc_add_peer(sctp_association_t *asoc, +struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, const union sctp_addr *addr, int priority) { @@ -397,17 +413,16 @@ struct sctp_transport *sctp_assoc_add_peer(sctp_association_t *asoc, * If not and the current association PMTU is higher than the new * peer's PMTU, reset the association PMTU to the new peer's PMTU. */ - if (asoc->pmtu) { + if (asoc->pmtu) asoc->pmtu = min_t(int, peer->pmtu, asoc->pmtu); - } else { + else asoc->pmtu = peer->pmtu; - } SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " "%d\n", asoc, asoc->pmtu); - asoc->frag_point = asoc->pmtu - - (SCTP_IP_OVERHEAD + sizeof(sctp_data_chunk_t)); + asoc->frag_point = asoc->pmtu; + asoc->frag_point -= SCTP_IP_OVERHEAD + sizeof(struct sctp_data_chunk); /* The asoc->peer.port might not be meaningful yet, but * initialize the packet structure anyway. @@ -460,11 +475,7 @@ struct sctp_transport *sctp_assoc_add_peer(sctp_association_t *asoc, /* If we do not yet have a primary path, set one. */ if (NULL == asoc->peer.primary_path) { - asoc->peer.primary_path = peer; - /* Set a default msg_name for events. */ - memcpy(&asoc->peer.primary_addr, &peer->ipaddr, - sizeof(union sctp_addr)); - asoc->peer.active_path = peer; + sctp_assoc_set_primary(asoc, peer); asoc->peer.retran_path = peer; } @@ -603,7 +614,7 @@ void sctp_association_put(sctp_association_t *asoc) /* Allocate the next TSN, Transmission Sequence Number, for the given * association. */ -__u32 __sctp_association_get_next_tsn(sctp_association_t *asoc) +__u32 sctp_association_get_next_tsn(sctp_association_t *asoc) { /* From Section 1.6 Serial Number Arithmetic: * Transmission Sequence Numbers wrap around when they reach @@ -618,7 +629,7 @@ __u32 __sctp_association_get_next_tsn(sctp_association_t *asoc) } /* Allocate 'num' TSNs by incrementing the association's TSN by num. */ -__u32 __sctp_association_get_tsn_block(sctp_association_t *asoc, int num) +__u32 sctp_association_get_tsn_block(sctp_association_t *asoc, int num) { __u32 retval = asoc->next_tsn; @@ -942,7 +953,7 @@ struct sctp_transport *sctp_assoc_choose_shutdown_transport(sctp_association_t * { /* If this is the first time SHUTDOWN is sent, use the active path, * else use the retran path. If the last SHUTDOWN was sent over the - * retran path, update the retran path and use it. + * retran path, update the retran path and use it. */ if (!asoc->shutdown_last_sent_to) return asoc->peer.active_path; @@ -983,6 +994,24 @@ void sctp_assoc_sync_pmtu(sctp_association_t *asoc) __FUNCTION__, asoc, asoc->pmtu, asoc->frag_point); } +/* Should we send a SACK to update our peer? */ +static inline int sctp_peer_needs_update(struct sctp_association *asoc) +{ + switch (asoc->state) { + case SCTP_STATE_ESTABLISHED: + case SCTP_STATE_SHUTDOWN_PENDING: + case SCTP_STATE_SHUTDOWN_RECEIVED: + if ((asoc->rwnd > asoc->a_rwnd) && + ((asoc->rwnd - asoc->a_rwnd) >= + min_t(__u32, (asoc->base.sk->rcvbuf >> 1), asoc->pmtu))) + return 1; + break; + default: + break; + } + return 0; +} + /* Increase asoc's rwnd by len and send any window update SACK if needed. */ void sctp_assoc_rwnd_increase(sctp_association_t *asoc, int len) { @@ -1009,10 +1038,8 @@ void sctp_assoc_rwnd_increase(sctp_association_t *asoc, int len) * The algorithm used is similar to the one described in * Section 4.2.3.3 of RFC 1122. */ - if ((asoc->state == SCTP_STATE_ESTABLISHED) && - (asoc->rwnd > asoc->a_rwnd) && - ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->rcvbuf >> 1), asoc->pmtu))) { + if (sctp_peer_needs_update(asoc)) { + asoc->a_rwnd = asoc->rwnd; SCTP_DEBUG_PRINTK("%s: Sending window update SACK- asoc: %p " "rwnd: %u a_rwnd: %u\n", __FUNCTION__, asoc, asoc->rwnd, asoc->a_rwnd); @@ -1020,9 +1047,6 @@ void sctp_assoc_rwnd_increase(sctp_association_t *asoc, int len) if (!sack) return; - /* Update the last advertised rwnd value. */ - asoc->a_rwnd = asoc->rwnd; - asoc->peer.sack_needed = 0; sctp_outq_tail(&asoc->outqueue, sack); @@ -1046,7 +1070,8 @@ void sctp_assoc_rwnd_decrease(sctp_association_t *asoc, int len) asoc->rwnd = 0; } SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u)\n", - __FUNCTION__, asoc, len, asoc->rwnd, asoc->rwnd_over); + __FUNCTION__, asoc, len, asoc->rwnd, + asoc->rwnd_over); } /* Build the bind address list for the association based on info from the diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 2ae655f2c775..26c62125226b 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -302,7 +302,7 @@ int sctp_bind_addr_match(sctp_bind_addr_t *bp, const union sctp_addr *addr, static int sctp_copy_one_addr(sctp_bind_addr_t *dest, union sctp_addr *addr, sctp_scope_t scope, int priority, int flags) { - sctp_protocol_t *proto = sctp_get_protocol(); + struct sctp_protocol *proto = sctp_get_protocol(); int error = 0; if (sctp_is_any(addr)) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8efbd4af013e..1f4cdc25d81c 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -65,7 +65,7 @@ static void sctp_endpoint_bh_rcv(sctp_endpoint_t *ep); /* Create a sctp_endpoint_t with all that boring stuff initialized. * Returns NULL if there isn't enough memory. */ -sctp_endpoint_t *sctp_endpoint_new(sctp_protocol_t *proto, +sctp_endpoint_t *sctp_endpoint_new(struct sctp_protocol *proto, struct sock *sk, int priority) { sctp_endpoint_t *ep; @@ -89,7 +89,8 @@ fail: /* * Initialize the base fields of the endpoint structure. */ -sctp_endpoint_t *sctp_endpoint_init(sctp_endpoint_t *ep, sctp_protocol_t *proto, +sctp_endpoint_t *sctp_endpoint_init(sctp_endpoint_t *ep, + struct sctp_protocol *proto, struct sock *sk, int priority) { struct sctp_opt *sp = sctp_sk(sk); @@ -194,6 +195,8 @@ void sctp_endpoint_destroy(sctp_endpoint_t *ep) { SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); + ep->base.sk->state = SCTP_SS_CLOSED; + /* Unlink this endpoint, so we can't find it again! */ sctp_unhash_endpoint(ep); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e0703d0727fa..1133d3fd93bb 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -432,6 +432,62 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr) return retval; } +/* Create and initialize a new sk for the socket to be returned by accept(). */ +struct sock *sctp_v6_create_accept_sk(struct sock *sk, + struct sctp_association *asoc) +{ + struct inet_opt *inet = inet_sk(sk); + struct sock *newsk; + struct inet_opt *newinet; + struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct sctp6_sock *newsctp6sk; + + newsk = sk_alloc(PF_INET6, GFP_KERNEL, sizeof(struct sctp6_sock), + sk->slab); + if (!newsk) + goto out; + + sock_init_data(NULL, newsk); + + newsk->type = SOCK_STREAM; + + newsk->prot = sk->prot; + newsk->no_check = sk->no_check; + newsk->reuse = sk->reuse; + + newsk->destruct = inet_sock_destruct; + newsk->zapped = 0; + newsk->family = PF_INET6; + newsk->protocol = IPPROTO_SCTP; + newsk->backlog_rcv = sk->prot->backlog_rcv; + + newsctp6sk = (struct sctp6_sock *)newsk; + newsctp6sk->pinet6 = &newsctp6sk->inet6; + + newinet = inet_sk(newsk); + newnp = inet6_sk(newsk); + + memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + + ipv6_addr_copy(&newnp->daddr, &asoc->peer.primary_addr.v6.sin6_addr); + + newinet->sport = inet->sport; + newinet->dport = asoc->peer.port; + +#ifdef INET_REFCNT_DEBUG + atomic_inc(&inet6_sock_nr); + atomic_inc(&inet_sock_nr); +#endif + + if (0 != newsk->prot->init(newsk)) { + inet_sock_release(newsk); + newsk = NULL; + } + +out: + return newsk; +} + /* Initialize a PF_INET6 socket msg_name. */ static void sctp_inet6_msgname(char *msgname, int *addr_len) { @@ -564,6 +620,20 @@ static int sctp_inet6_bind_verify(struct sctp_opt *opt, union sctp_addr *addr) return af->available(addr); } +/* Fill in Supported Address Type information for INIT and INIT-ACK + * chunks. Note: In the future, we may want to look at sock options + * to determine whether a PF_INET6 socket really wants to have IPV4 + * addresses. + * Returns number of addresses supported. + */ +static int sctp_inet6_supported_addrs(const struct sctp_opt *opt, + __u16 *types) +{ + types[0] = SCTP_PARAM_IPV4_ADDRESS; + types[1] = SCTP_PARAM_IPV6_ADDRESS; + return 2; +} + static struct proto_ops inet6_seqpacket_ops = { .family = PF_INET6, .release = inet6_release, @@ -583,7 +653,7 @@ static struct proto_ops inet6_seqpacket_ops = { .mmap = sock_no_mmap, }; -static struct inet_protosw sctpv6_protosw = { +static struct inet_protosw sctpv6_seqpacket_protosw = { .type = SOCK_SEQPACKET, .protocol = IPPROTO_SCTP, .prot = &sctp_prot, @@ -592,6 +662,15 @@ static struct inet_protosw sctpv6_protosw = { .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; +static struct inet_protosw sctpv6_stream_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SCTP, + .prot = &sctp_prot, + .ops = &inet6_seqpacket_ops, + .capability = -1, + .no_check = 0, + .flags = SCTP_PROTOSW_FLAG +}; static struct inet6_protocol sctpv6_protocol = { .handler = sctp_rcv, @@ -626,6 +705,8 @@ static struct sctp_pf sctp_pf_inet6_specific = { .af_supported = sctp_inet6_af_supported, .cmp_addr = sctp_inet6_cmp_addr, .bind_verify = sctp_inet6_bind_verify, + .supported_addrs = sctp_inet6_supported_addrs, + .create_accept_sk = sctp_v6_create_accept_sk, .af = &sctp_ipv6_specific, }; @@ -636,8 +717,9 @@ int sctp_v6_init(void) if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0) return -EAGAIN; - /* Add SCTPv6 to inetsw6 linked list. */ - inet6_register_protosw(&sctpv6_protosw); + /* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */ + inet6_register_protosw(&sctpv6_seqpacket_protosw); + inet6_register_protosw(&sctpv6_stream_protosw); /* Register the SCTP specfic PF_INET6 functions. */ sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6); @@ -656,6 +738,7 @@ void sctp_v6_exit(void) { list_del(&sctp_ipv6_specific.list); inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP); - inet6_unregister_protosw(&sctpv6_protosw); + inet6_unregister_protosw(&sctpv6_seqpacket_protosw); + inet6_unregister_protosw(&sctpv6_stream_protosw); unregister_inet6addr_notifier(&sctp_inetaddr_notifier); } diff --git a/net/sctp/output.c b/net/sctp/output.c index d7826c2216e6..c02f99d602f0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -62,17 +62,16 @@ #include <net/sctp/sm.h> /* Forward declarations for private helpers. */ -static void sctp_packet_reset(sctp_packet_t *packet); -static sctp_xmit_t sctp_packet_append_data(sctp_packet_t *packet, - sctp_chunk_t *chunk); +static void sctp_packet_reset(struct sctp_packet *packet); +static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk); /* Config a packet. * This appears to be a followup set of initializations.) */ -sctp_packet_t *sctp_packet_config(sctp_packet_t *packet, - __u32 vtag, - int ecn_capable, - sctp_packet_phandler_t *prepend_handler) +struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, + __u32 vtag, int ecn_capable, + sctp_packet_phandler_t *prepend_handler) { int packet_empty = (packet->size == SCTP_IP_OVERHEAD); @@ -89,10 +88,9 @@ sctp_packet_t *sctp_packet_config(sctp_packet_t *packet, } /* Initialize the packet structure. */ -sctp_packet_t *sctp_packet_init(sctp_packet_t *packet, - struct sctp_transport *transport, - __u16 sport, - __u16 dport) +struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, + struct sctp_transport *transport, + __u16 sport, __u16 dport) { packet->transport = transport; packet->source_port = sport; @@ -109,14 +107,12 @@ sctp_packet_t *sctp_packet_init(sctp_packet_t *packet, } /* Free a packet. */ -void sctp_packet_free(sctp_packet_t *packet) +void sctp_packet_free(struct sctp_packet *packet) { - sctp_chunk_t *chunk; + struct sctp_chunk *chunk; - while (NULL != - (chunk = (sctp_chunk_t *)skb_dequeue(&packet->chunks))) { + while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks))) sctp_free_chunk(chunk); - } if (packet->malloced) kfree(packet); @@ -129,8 +125,8 @@ void sctp_packet_free(sctp_packet_t *packet) * as it can fit in the packet, but any more data that does not fit in this * packet can be sent only after receiving the COOKIE_ACK. */ -sctp_xmit_t sctp_packet_transmit_chunk(sctp_packet_t *packet, - sctp_chunk_t *chunk) +sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk) { sctp_xmit_t retval; int error = 0; @@ -152,6 +148,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(sctp_packet_t *packet, case SCTP_XMIT_MUST_FRAG: case SCTP_XMIT_RWND_FULL: case SCTP_XMIT_OK: + case SCTP_XMIT_NAGLE_DELAY: break; }; @@ -161,7 +158,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(sctp_packet_t *packet, /* Append a chunk to the offered packet reporting back any inability to do * so. */ -sctp_xmit_t sctp_packet_append_chunk(sctp_packet_t *packet, sctp_chunk_t *chunk) +sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); @@ -182,7 +180,7 @@ sctp_xmit_t sctp_packet_append_chunk(sctp_packet_t *packet, sctp_chunk_t *chunk) /* Both control chunks and data chunks with TSNs are * non-fragmentable. */ - int fragmentable = sctp_chunk_is_data(chunk) && + int fragmentable = sctp_chunk_is_data(chunk) && (!chunk->has_tsn); if (packet_empty) { if (fragmentable) { @@ -223,7 +221,7 @@ append: } /* It is OK to send this chunk. */ - skb_queue_tail(&packet->chunks, (struct sk_buff *)chunk); + __skb_queue_tail(&packet->chunks, (struct sk_buff *)chunk); packet->size += chunk_len; finish: return retval; @@ -234,18 +232,18 @@ finish: * * The return value is a normal kernel error return value. */ -int sctp_packet_transmit(sctp_packet_t *packet) +int sctp_packet_transmit(struct sctp_packet *packet) { struct sctp_transport *transport = packet->transport; - sctp_association_t *asoc = transport->asoc; + struct sctp_association *asoc = transport->asoc; struct sctphdr *sh; __u32 crc32; struct sk_buff *nskb; - sctp_chunk_t *chunk; + struct sctp_chunk *chunk; struct sock *sk; int err = 0; int padding; /* How much padding do we need? */ - __u8 packet_has_data = 0; + __u8 has_data = 0; struct dst_entry *dst; /* Do NOT generate a chunkless packet... */ @@ -253,7 +251,7 @@ int sctp_packet_transmit(sctp_packet_t *packet) return err; /* Set up convenience variables... */ - chunk = (sctp_chunk_t *) (packet->chunks.next); + chunk = (struct sctp_chunk *) (packet->chunks.next); sk = chunk->skb->sk; /* Allocate the new skb. */ @@ -291,8 +289,7 @@ int sctp_packet_transmit(sctp_packet_t *packet) * [This whole comment explains WORD_ROUND() below.] */ SCTP_DEBUG_PRINTK("***sctp_transmit_packet***\n"); - while (NULL != (chunk = (sctp_chunk_t *) - skb_dequeue(&packet->chunks))) { + while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks))) { chunk->num_times_sent++; chunk->sent_at = jiffies; if (sctp_chunk_is_data(chunk)) { @@ -309,7 +306,7 @@ int sctp_packet_transmit(sctp_packet_t *packet) chunk->rtt_in_progress = 1; transport->rto_pending = 1; } - packet_has_data = 1; + has_data = 1; } memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); @@ -399,7 +396,7 @@ int sctp_packet_transmit(sctp_packet_t *packet) asoc->peer.last_sent_to = transport; } - if (packet_has_data) { + if (has_data) { struct timer_list *timer; unsigned long timeout; @@ -456,9 +453,9 @@ no_route: /* * This private function resets the packet to a fresh state. */ -static void sctp_packet_reset(sctp_packet_t *packet) +static void sctp_packet_reset(struct sctp_packet *packet) { - sctp_chunk_t *chunk = NULL; + struct sctp_chunk *chunk = NULL; packet->size = SCTP_IP_OVERHEAD; @@ -473,13 +470,16 @@ static void sctp_packet_reset(sctp_packet_t *packet) } /* This private function handles the specifics of appending DATA chunks. */ -static sctp_xmit_t sctp_packet_append_data(sctp_packet_t *packet, - sctp_chunk_t *chunk) +static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; size_t datasize, rwnd, inflight; struct sctp_transport *transport = packet->transport; __u32 max_burst_bytes; + struct sctp_association *asoc = transport->asoc; + struct sctp_opt *sp = sctp_sk(asoc->base.sk); + struct sctp_outq *q = &asoc->outqueue; /* RFC 2960 6.1 Transmission of DATA Chunks * @@ -494,8 +494,8 @@ static sctp_xmit_t sctp_packet_append_data(sctp_packet_t *packet, * receiver to the data sender. */ - rwnd = transport->asoc->peer.rwnd; - inflight = transport->asoc->outqueue.outstanding_bytes; + rwnd = asoc->peer.rwnd; + inflight = asoc->outqueue.outstanding_bytes; datasize = sctp_data_size(chunk); @@ -517,7 +517,7 @@ static sctp_xmit_t sctp_packet_append_data(sctp_packet_t *packet, * if ((flightsize + Max.Burst * MTU) < cwnd) * cwnd = flightsize + Max.Burst * MTU */ - max_burst_bytes = transport->asoc->max_burst * transport->asoc->pmtu; + max_burst_bytes = asoc->max_burst * asoc->pmtu; if ((transport->flight_size + max_burst_bytes) < transport->cwnd) { transport->cwnd = transport->flight_size + max_burst_bytes; SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " @@ -543,27 +543,44 @@ static sctp_xmit_t sctp_packet_append_data(sctp_packet_t *packet, * When a Fast Retransmit is being performed the sender SHOULD * ignore the value of cwnd and SHOULD NOT delay retransmission. */ - if (!chunk->fast_retransmit) { + if (!chunk->fast_retransmit) if (transport->flight_size >= transport->cwnd) { retval = SCTP_XMIT_RWND_FULL; goto finish; } + + /* Nagle's algorithm to solve small-packet problem: + * Inhibit the sending of new chunks when new outgoing data arrives + * if any previously transmitted data on the connection remains + * unacknowledged. + */ + if (!sp->nodelay && SCTP_IP_OVERHEAD == packet->size && + q->outstanding_bytes && SCTP_STATE_ESTABLISHED == asoc->state) { + unsigned len = datasize + q->out_qlen; + + /* Check whether this chunk and all the rest of pending + * data will fit or delay in hopes of bundling a full + * sized packet. + */ + if (len < asoc->pmtu - SCTP_IP_OVERHEAD) { + retval = SCTP_XMIT_NAGLE_DELAY; + goto finish; + } } /* Keep track of how many bytes are in flight over this transport. */ transport->flight_size += datasize; /* Keep track of how many bytes are in flight to the receiver. */ - transport->asoc->outqueue.outstanding_bytes += datasize; + asoc->outqueue.outstanding_bytes += datasize; /* Update our view of the receiver's rwnd. */ - if (datasize < rwnd) { + if (datasize < rwnd) rwnd -= datasize; - } else { + else rwnd = 0; - } - transport->asoc->peer.rwnd = rwnd; + asoc->peer.rwnd = rwnd; finish: return retval; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index b5697e4cc2a3..d2fb050019e5 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1,7 +1,7 @@ /* SCTP kernel reference Implementation * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. - * Copyright (c) 2001 Intel Corp. + * Copyright (c) 2001-2003 Intel Corp. * Copyright (c) 2001-2003 International Business Machines Corp. * * This file is part of the SCTP kernel reference Implementation @@ -62,6 +62,43 @@ static void sctp_check_transmitted(struct sctp_outq *q, sctp_sackhdr_t *sack, __u32 highest_new_tsn); +/* Add data to the front of the queue. */ +static inline void sctp_outq_head_data(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + __skb_queue_head(&q->out, (struct sk_buff *)ch); + q->out_qlen += ch->skb->len; + return; +} + +/* Take data from the front of the queue. */ +static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q) +{ + struct sctp_chunk *ch; + ch = (struct sctp_chunk *)__skb_dequeue(&q->out); + if (ch) + q->out_qlen -= ch->skb->len; + return ch; +} +/* Add data chunk to the end of the queue. */ +static inline void sctp_outq_tail_data(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + __skb_queue_tail(&q->out, (struct sk_buff *)ch); + q->out_qlen += ch->skb->len; + return; +} + +/* Insert a chunk behind chunk 'pos'. */ +static inline void sctp_outq_insert_data(struct sctp_outq *q, + struct sctp_chunk *ch, + struct sctp_chunk *pos) +{ + __skb_insert((struct sk_buff *)ch, (struct sk_buff *)pos->prev, + (struct sk_buff *)pos, pos->list); + q->out_qlen += ch->skb->len; +} + /* Generate a new outqueue. */ struct sctp_outq *sctp_outq_new(sctp_association_t *asoc) { @@ -97,6 +134,7 @@ void sctp_outq_init(sctp_association_t *asoc, struct sctp_outq *q) q->empty = 1; q->malloced = 0; + q->out_qlen = 0; } /* Free the outqueue structure and any related pending chunks. @@ -125,7 +163,7 @@ void sctp_outq_teardown(struct sctp_outq *q) sctp_free_chunk(chunk); } - /* Throw away any chunks in the retransmit queue. */ + /* Throw away any chunks in the retransmit queue. */ list_for_each_safe(lchunk, temp, &q->retransmit) { list_del(lchunk); chunk = list_entry(lchunk, sctp_chunk_t, transmitted_list); @@ -133,7 +171,7 @@ void sctp_outq_teardown(struct sctp_outq *q) } /* Throw away any leftover data chunks. */ - while ((chunk = (sctp_chunk_t *) skb_dequeue(&q->out))) + while ((chunk = sctp_outq_dequeue_data(q))) sctp_free_chunk(chunk); /* Throw away any leftover control chunks. */ @@ -192,7 +230,7 @@ int sctp_outq_tail(struct sctp_outq *q, sctp_chunk_t *chunk) sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "Illegal Chunk"); - skb_queue_tail(&q->out, (struct sk_buff *) chunk); + sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) SCTP_INC_STATS(SctpOutUnorderChunks); else @@ -201,7 +239,7 @@ int sctp_outq_tail(struct sctp_outq *q, sctp_chunk_t *chunk) break; }; } else { - skb_queue_tail(&q->control, (struct sk_buff *) chunk); + __skb_queue_tail(&q->control, (struct sk_buff *) chunk); SCTP_INC_STATS(SctpOutCtrlChunks); } @@ -241,7 +279,7 @@ void sctp_retransmit_insert(struct list_head *tlchunk, struct sctp_outq *q) } /* Mark all the eligible packets on a transport for retransmission. */ -void sctp_retransmit_mark(struct sctp_outq *q, +void sctp_retransmit_mark(struct sctp_outq *q, struct sctp_transport *transport, __u8 fast_retransmit) { @@ -351,7 +389,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * * The return value is a normal kernel error return value. */ -static int sctp_outq_flush_rtx(struct sctp_outq *q, sctp_packet_t *pkt, +static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int rtx_timeout, int *start_timer) { struct list_head *lqueue; @@ -385,17 +423,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, sctp_packet_t *pkt, while (lchunk) { chunk = list_entry(lchunk, sctp_chunk_t, transmitted_list); -#if 0 - /* If a chunk has been tried for more than SCTP_DEF_MAX_SEND - * times, discard it, and check the empty flag of the outqueue. - * - * --xguo - */ - if (chunk->snd_count > SCTP_DEF_MAX_SEND) { - sctp_free_chunk(chunk); - continue; - } -#endif /* Make sure that Gap Acked TSNs are not retransmitted. A * simple approach is just to move such TSNs out of the @@ -461,8 +488,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, sctp_packet_t *pkt, * queue. 'pos' points to the next chunk in the output queue after the * chunk that is currently in the process of fragmentation. */ -void sctp_xmit_frag(struct sctp_outq *q, struct sk_buff *pos, - sctp_packet_t *packet, sctp_chunk_t *frag, __u32 tsn) +void sctp_xmit_frag(struct sctp_outq *q, struct sctp_chunk *pos, + struct sctp_packet *packet, struct sctp_chunk *frag, __u32 tsn) { struct sctp_transport *transport = packet->transport; struct sk_buff_head *queue = &q->out; @@ -480,11 +507,10 @@ void sctp_xmit_frag(struct sctp_outq *q, struct sk_buff *pos, SCTP_DEBUG_PRINTK("sctp_xmit_frag: q not empty. " "adding 0x%x to outqueue\n", ntohl(frag->subh.data_hdr->tsn)); - if (pos) { - skb_insert(pos, (struct sk_buff *) frag); - } else { - skb_queue_tail(queue, (struct sk_buff *) frag); - } + if (pos) + sctp_outq_insert_data(q, frag, pos); + else + sctp_outq_tail_data(q, frag); return; } @@ -496,11 +522,10 @@ void sctp_xmit_frag(struct sctp_outq *q, struct sk_buff *pos, SCTP_DEBUG_PRINTK("sctp_xmit_frag: rwnd full. " "adding 0x%x to outqueue\n", ntohl(frag->subh.data_hdr->tsn)); - if (pos) { - skb_insert(pos, (struct sk_buff *) frag); - } else { - skb_queue_tail(queue, (struct sk_buff *) frag); - } + if (pos) + sctp_outq_insert_data(q, frag, pos); + else + sctp_outq_tail_data(q, frag); break; case SCTP_XMIT_OK: @@ -512,11 +537,10 @@ void sctp_xmit_frag(struct sctp_outq *q, struct sk_buff *pos, SCTP_DEBUG_PRINTK("sctp_xmit_frag: force output " "failed. adding 0x%x to outqueue\n", ntohl(frag->subh.data_hdr->tsn)); - if (pos) { - skb_insert(pos, (struct sk_buff *) frag); - } else { - skb_queue_tail(queue, (struct sk_buff *) frag); - } + if (pos) + sctp_outq_insert_data(q, frag, pos); + else + sctp_outq_tail_data(q, frag); } else { SCTP_DEBUG_PRINTK("sctp_xmit_frag: force output " "success. 0x%x sent\n", @@ -537,14 +561,14 @@ void sctp_xmit_frag(struct sctp_outq *q, struct sk_buff *pos, * The argument 'frag' point to the first fragment and it holds the list * of all the other fragments in the 'frag_list' field. */ -void sctp_xmit_fragmented_chunks(struct sctp_outq *q, sctp_packet_t *packet, +void sctp_xmit_fragmented_chunks(struct sctp_outq *q, struct sctp_packet *pkt, sctp_chunk_t *frag) { sctp_association_t *asoc = frag->asoc; struct list_head *lfrag, *frag_list; __u32 tsn; int nfrags = 1; - struct sk_buff *pos; + struct sctp_chunk *pos; /* Count the number of fragments. */ frag_list = &frag->frag_list; @@ -553,17 +577,17 @@ void sctp_xmit_fragmented_chunks(struct sctp_outq *q, sctp_packet_t *packet, } /* Get a TSN block of nfrags TSNs. */ - tsn = __sctp_association_get_tsn_block(asoc, nfrags); + tsn = sctp_association_get_tsn_block(asoc, nfrags); - pos = skb_peek(&q->out); + pos = (struct sctp_chunk *)skb_peek(&q->out); /* Transmit the first fragment. */ - sctp_xmit_frag(q, pos, packet, frag, tsn++); + sctp_xmit_frag(q, pos, pkt, frag, tsn++); /* Transmit the rest of fragments. */ frag_list = &frag->frag_list; list_for_each(lfrag, frag_list) { frag = list_entry(lfrag, sctp_chunk_t, frag_list); - sctp_xmit_frag(q, pos, packet, frag, tsn++); + sctp_xmit_frag(q, pos, pkt, frag, tsn++); } } @@ -595,7 +619,7 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk, old_flags = chunk->chunk_hdr->flags; if (old_flags & SCTP_DATA_FIRST_FRAG) flags = SCTP_DATA_FIRST_FRAG; - else + else flags = SCTP_DATA_MIDDLE_FRAG; /* Make the first fragment. */ @@ -672,15 +696,14 @@ err: * * Description: Send everything in q which we legally can, subject to * congestion limitations. - * - * Note: This function can be called from multiple contexts so appropriate + * * Note: This function can be called from multiple contexts so appropriate * locking concerns must be made. Today we use the sock lock to protect * this function. */ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) { - sctp_packet_t *packet; - sctp_packet_t singleton; + struct sctp_packet *packet; + struct sctp_packet singleton; sctp_association_t *asoc = q->asoc; int ecn_capable = asoc->peer.ecn_capable; __u16 sport = asoc->base.bind_addr.port; @@ -719,7 +742,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) } queue = &q->control; - while (NULL != (chunk = (sctp_chunk_t *)skb_dequeue(queue))) { + while ((chunk = (sctp_chunk_t *)skb_dequeue(queue))) { /* Pick the right transport to use. */ new_transport = chunk->transport; @@ -852,7 +875,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) /* Finally, transmit new packets. */ start_timer = 0; queue = &q->out; - while (NULL != (chunk = (sctp_chunk_t *) skb_dequeue(queue))) { + + while (NULL != (chunk = sctp_outq_dequeue_data(q))) { /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ @@ -925,6 +949,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) switch (status) { case SCTP_XMIT_PMTU_FULL: case SCTP_XMIT_RWND_FULL: + case SCTP_XMIT_NAGLE_DELAY: /* We could not append this chunk, so put * the chunk back on the output queue. */ @@ -932,7 +957,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) "not transmit TSN: 0x%x, status: %d\n", ntohl(chunk->subh.data_hdr->tsn), status); - skb_queue_head(queue, (struct sk_buff *)chunk); + sctp_outq_head_data(q, chunk); goto sctp_flush_out; break; @@ -994,6 +1019,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) } sctp_flush_out: + /* Before returning, examine all the transports touched in * this call. Right now, we bluntly force clear all the * transports. Things might change after we implement Nagle. @@ -1003,7 +1029,7 @@ sctp_flush_out: */ while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL ) { struct sctp_transport *t = list_entry(ltransport, - struct sctp_transport, + struct sctp_transport, send_ready); if (t != transport) transport = t; @@ -1125,7 +1151,7 @@ int sctp_outq_sack(struct sctp_outq *q, sctp_sackhdr_t *sack) * This is a MASSIVE candidate for optimization. */ list_for_each(pos, transport_list) { - transport = list_entry(pos, struct sctp_transport, + transport = list_entry(pos, struct sctp_transport, transports); sctp_check_transmitted(q, &transport->transmitted, transport, sack, highest_new_tsn); @@ -1163,11 +1189,10 @@ int sctp_outq_sack(struct sctp_outq *q, sctp_sackhdr_t *sack) sack_a_rwnd = ntohl(sack->a_rwnd); outstanding = q->outstanding_bytes; - if (outstanding < sack_a_rwnd) { + if (outstanding < sack_a_rwnd) sack_a_rwnd -= outstanding; - } else { + else sack_a_rwnd = 0; - } asoc->peer.rwnd = sack_a_rwnd; @@ -1179,7 +1204,7 @@ int sctp_outq_sack(struct sctp_outq *q, sctp_sackhdr_t *sack) goto finish; list_for_each(pos, transport_list) { - transport = list_entry(pos, struct sctp_transport, + transport = list_entry(pos, struct sctp_transport, transports); q->empty = q->empty && list_empty(&transport->transmitted); if (!q->empty) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 85a5a2941af5..a1c98e3618cc 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -58,7 +58,7 @@ #include <net/inet_common.h> /* Global data structures. */ -sctp_protocol_t sctp_proto; +struct sctp_protocol sctp_proto; struct proc_dir_entry *proc_net_sctp; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics); @@ -152,7 +152,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, /* Extract our IP addresses from the system and stash them in the * protocol structure. */ -static void __sctp_get_local_addr_list(sctp_protocol_t *proto) +static void __sctp_get_local_addr_list(struct sctp_protocol *proto) { struct net_device *dev; struct list_head *pos; @@ -168,7 +168,7 @@ static void __sctp_get_local_addr_list(sctp_protocol_t *proto) read_unlock(&dev_base_lock); } -static void sctp_get_local_addr_list(sctp_protocol_t *proto) +static void sctp_get_local_addr_list(struct sctp_protocol *proto) { long flags __attribute__ ((unused)); @@ -178,7 +178,7 @@ static void sctp_get_local_addr_list(sctp_protocol_t *proto) } /* Free the existing local addresses. */ -static void __sctp_free_local_addr_list(sctp_protocol_t *proto) +static void __sctp_free_local_addr_list(struct sctp_protocol *proto) { struct sockaddr_storage_list *addr; struct list_head *pos, *temp; @@ -191,7 +191,7 @@ static void __sctp_free_local_addr_list(sctp_protocol_t *proto) } /* Free the existing local addresses. */ -static void sctp_free_local_addr_list(sctp_protocol_t *proto) +static void sctp_free_local_addr_list(struct sctp_protocol *proto) { long flags __attribute__ ((unused)); @@ -201,8 +201,9 @@ static void sctp_free_local_addr_list(sctp_protocol_t *proto) } /* Copy the local addresses which are valid for 'scope' into 'bp'. */ -int sctp_copy_local_addr_list(sctp_protocol_t *proto, sctp_bind_addr_t *bp, - sctp_scope_t scope, int priority, int copy_flags) +int sctp_copy_local_addr_list(struct sctp_protocol *proto, + struct sctp_bind_addr *bp, sctp_scope_t scope, + int priority, int copy_flags) { struct sockaddr_storage_list *addr; int error = 0; @@ -331,7 +332,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr) static int sctp_v4_available(const union sctp_addr *addr) { int ret = inet_addr_type(addr->v4.sin_addr.s_addr); - + /* FIXME: ip_nonlocal_bind sysctl support. */ if (addr->v4.sin_addr.s_addr != INADDR_ANY && ret != RTN_LOCAL) @@ -380,7 +381,7 @@ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr) /* Returns a valid dst cache entry for the given source and destination ip * addresses. If an association is passed, trys to get a dst entry with a - * source adddress that matches an address in the bind address list. + * source adddress that matches an address in the bind address list. */ struct dst_entry *sctp_v4_get_dst(sctp_association_t *asoc, union sctp_addr *daddr, @@ -479,6 +480,61 @@ void sctp_v4_get_saddr(sctp_association_t *asoc, } +/* Create and initialize a new sk for the socket returned by accept(). */ +struct sock *sctp_v4_create_accept_sk(struct sock *sk, + struct sctp_association *asoc) +{ + struct sock *newsk; + struct inet_opt *inet = inet_sk(sk); + struct inet_opt *newinet; + + newsk = sk_alloc(PF_INET, GFP_KERNEL, sizeof(struct sctp_sock), + sk->slab); + if (!newsk) + goto out; + + sock_init_data(NULL, newsk); + + newsk->type = SOCK_STREAM; + + newsk->prot = sk->prot; + newsk->no_check = sk->no_check; + newsk->reuse = sk->reuse; + + newsk->destruct = inet_sock_destruct; + newsk->zapped = 0; + newsk->family = PF_INET; + newsk->protocol = IPPROTO_SCTP; + newsk->backlog_rcv = sk->prot->backlog_rcv; + + newinet = inet_sk(newsk); + newinet->sport = inet->sport; + newinet->saddr = inet->saddr; + newinet->rcv_saddr = inet->saddr; + newinet->dport = asoc->peer.port; + newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; + newinet->pmtudisc = inet->pmtudisc; + newinet->id = 0; + + newinet->ttl = sysctl_ip_default_ttl; + newinet->mc_loop = 1; + newinet->mc_ttl = 1; + newinet->mc_index = 0; + newinet->mc_list = NULL; + +#ifdef INET_REFCNT_DEBUG + atomic_inc(&inet_sock_nr); +#endif + + if (0 != newsk->prot->init(newsk)) { + inet_sock_release(newsk); + newsk = NULL; + } + +out: + return newsk; +} + /* Event handler for inet address addition/deletion events. * Basically, whenever there is an event, we re-build our local address list. */ @@ -501,10 +557,13 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long event, */ int sctp_ctl_sock_init(void) { - int err = 0; - int family = PF_INET; + int err; + sa_family_t family; - SCTP_V6(family = PF_INET6;) + if (sctp_get_pf_specific(PF_INET6)) + family = PF_INET6; + else + family = PF_INET; err = sock_create(family, SOCK_SEQPACKET, IPPROTO_SCTP, &sctp_ctl_socket); @@ -630,6 +689,16 @@ static int sctp_inet_bind_verify(struct sctp_opt *opt, union sctp_addr *addr) return sctp_v4_available(addr); } +/* Fill in Supported Address Type information for INIT and INIT-ACK + * chunks. Returns number of addresses supported. + */ +static int sctp_inet_supported_addrs(const struct sctp_opt *opt, + __u16 *types) +{ + types[0] = SCTP_PARAM_IPV4_ADDRESS; + return 1; +} + /* Wrapper routine that calls the ip transmit routine. */ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *transport, int ipfragok) @@ -652,6 +721,8 @@ static struct sctp_pf sctp_pf_inet = { .af_supported = sctp_inet_af_supported, .cmp_addr = sctp_inet_cmp_addr, .bind_verify = sctp_inet_bind_verify, + .supported_addrs = sctp_inet_supported_addrs, + .create_accept_sk = sctp_v4_create_accept_sk, .af = &sctp_ipv4_specific, }; @@ -682,7 +753,7 @@ struct proto_ops inet_seqpacket_ops = { }; /* Registration with AF_INET family. */ -struct inet_protosw sctp_protosw = { +static struct inet_protosw sctp_seqpacket_protosw = { .type = SOCK_SEQPACKET, .protocol = IPPROTO_SCTP, .prot = &sctp_prot, @@ -691,6 +762,15 @@ struct inet_protosw sctp_protosw = { .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; +static struct inet_protosw sctp_stream_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SCTP, + .prot = &sctp_prot, + .ops = &inet_seqpacket_ops, + .capability = -1, + .no_check = 0, + .flags = SCTP_PROTOSW_FLAG +}; /* Register with IP layer. */ static struct inet_protocol sctp_protocol = { @@ -756,7 +836,7 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) static int __init init_sctp_mibs(void) { int i; - + sctp_statistics[0] = kmalloc_percpu(sizeof (struct sctp_mib), GFP_KERNEL); if (!sctp_statistics[0]) @@ -778,7 +858,7 @@ static int __init init_sctp_mibs(void) } } return 0; - + } static void cleanup_sctp_mibs(void) @@ -797,14 +877,15 @@ __init int sctp_init(void) if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) return -EAGAIN; - /* Add SCTP to inetsw linked list. */ - inet_register_protosw(&sctp_protosw); + /* Add SCTP(TCP and UDP style) to inetsw linked list. */ + inet_register_protosw(&sctp_seqpacket_protosw); + inet_register_protosw(&sctp_stream_protosw); /* Allocate and initialise sctp mibs. */ status = init_sctp_mibs(); - if (status) + if (status) goto err_init_mibs; - + /* Initialize proc fs directory. */ sctp_proc_init(); @@ -831,7 +912,7 @@ __init int sctp_init(void) /* Valid.Cookie.Life - 60 seconds */ sctp_proto.valid_cookie_life = 60 * HZ; - /* Whether Cookie Preservative is enabled(1) or not(0) */ + /* Whether Cookie Preservative is enabled(1) or not(0) */ sctp_proto.cookie_preserve_enable = 1; /* Max.Burst - 4 */ @@ -920,7 +1001,7 @@ __init int sctp_init(void) INIT_LIST_HEAD(&sctp_proto.local_addr_list); sctp_proto.local_addr_lock = SPIN_LOCK_UNLOCKED; - /* Register notifier for inet address additions/deletions. */ + /* Register notifier for inet address additions/deletions. */ register_inetaddr_notifier(&sctp_inetaddr_notifier); sctp_get_local_addr_list(&sctp_proto); @@ -942,9 +1023,10 @@ err_ahash_alloc: sctp_dbg_objcnt_exit(); sctp_proc_exit(); cleanup_sctp_mibs(); -err_init_mibs: +err_init_mibs: inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); - inet_unregister_protosw(&sctp_protosw); + inet_unregister_protosw(&sctp_seqpacket_protosw); + inet_unregister_protosw(&sctp_stream_protosw); return status; } @@ -977,7 +1059,8 @@ __exit void sctp_exit(void) cleanup_sctp_mibs(); inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); - inet_unregister_protosw(&sctp_protosw); + inet_unregister_protosw(&sctp_seqpacket_protosw); + inet_unregister_protosw(&sctp_stream_protosw); } module_init(sctp_init); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 0677dbbbd802..0f3f35affe8e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -68,29 +68,6 @@ /* RFC 2960 3.3.2 Initiation (INIT) (1) * - * Note 4: This parameter, when present, specifies all the - * address types the sending endpoint can support. The absence - * of this parameter indicates that the sending endpoint can - * support any address type. - */ -static const sctp_supported_addrs_param_t sat_param = { - { - SCTP_PARAM_SUPPORTED_ADDRESS_TYPES, - __constant_htons(SCTP_SAT_LEN), - } -}; - -/* gcc 3.2 doesn't allow initialization of zero-length arrays. So the above - * structure is split and the address types array is initialized using a - * fixed length array. - */ -static const __u16 sat_addr_types[2] = { - SCTP_PARAM_IPV4_ADDRESS, - SCTP_V6(SCTP_PARAM_IPV6_ADDRESS,) -}; - -/* RFC 2960 3.3.2 Initiation (INIT) (1) - * * Note 2: The ECN capable field is reserved for future use of * Explicit Congestion Notification. */ @@ -174,7 +151,10 @@ sctp_chunk_t *sctp_make_init(const sctp_association_t *asoc, union sctp_params addrs; size_t chunksize; sctp_chunk_t *retval = NULL; - int addrs_len = 0; + int num_types, addrs_len = 0; + struct sctp_opt *sp; + sctp_supported_addrs_param_t sat; + __u16 types[2]; /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -195,7 +175,11 @@ sctp_chunk_t *sctp_make_init(const sctp_association_t *asoc, init.num_inbound_streams = htons(asoc->c.sinit_max_instreams); init.initial_tsn = htonl(asoc->c.initial_tsn); - chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN; + /* How many address types are needed? */ + sp = sctp_sk(asoc->base.sk); + num_types = sp->pf->supported_addrs(sp, types); + + chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); chunksize += sizeof(ecap_param); chunksize += vparam_len; @@ -220,8 +204,18 @@ sctp_chunk_t *sctp_make_init(const sctp_association_t *asoc, retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v); - sctp_addto_chunk(retval, sizeof(sctp_paramhdr_t), &sat_param); - sctp_addto_chunk(retval, sizeof(sat_addr_types), sat_addr_types); + /* RFC 2960 3.3.2 Initiation (INIT) (1) + * + * Note 4: This parameter, when present, specifies all the + * address types the sending endpoint can support. The absence + * of this parameter indicates that the sending endpoint can + * support any address type. + */ + sat.param_hdr.type = SCTP_PARAM_SUPPORTED_ADDRESS_TYPES; + sat.param_hdr.length = htons(SCTP_SAT_LEN(num_types)); + sctp_addto_chunk(retval, sizeof(sat), &sat); + sctp_addto_chunk(retval, num_types * sizeof(__u16), &types); + sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); nodata: if (addrs.v) @@ -604,7 +598,7 @@ sctp_chunk_t *sctp_make_sack(const sctp_association_t *asoc) /* Initialize the SACK header. */ sack.cum_tsn_ack = htonl(ctsn); - sack.a_rwnd = htonl(asoc->rwnd); + sack.a_rwnd = htonl(asoc->a_rwnd); sack.num_gap_ack_blocks = htons(num_gabs); sack.num_dup_tsns = htons(num_dup_tsns); @@ -1159,7 +1153,7 @@ int sctp_datachunks_from_user(sctp_association_t *asoc, first_len = max; /* Encourage Cookie-ECHO bundling. */ - if (asoc->state < SCTP_STATE_ESTABLISHED) { + if (asoc->state < SCTP_STATE_COOKIE_ECHOED) { whole = msg_len / (max - SCTP_ARBITRARY_COOKIE_ECHO_LEN); /* Account for the DATA to be bundled with the COOKIE-ECHO. */ @@ -1282,7 +1276,7 @@ void sctp_chunk_assign_tsn(sctp_chunk_t *chunk) * assign a TSN. */ chunk->subh.data_hdr->tsn = - htonl(__sctp_association_get_next_tsn(chunk->asoc)); + htonl(sctp_association_get_next_tsn(chunk->asoc)); chunk->has_tsn = 1; } } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1228f55dfdfb..004b6d2f0b03 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -105,8 +105,8 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *, sctp_association_t *, #define DEBUG_POST_SFX \ SCTP_DEBUG_PRINTK("sctp_do_sm post sfx: error %d, asoc %p[%s]\n", \ error, asoc, \ - sctp_state_tbl[sctp_id2assoc(ep->base.sk, \ - sctp_assoc2id(asoc))?asoc->state:SCTP_STATE_CLOSED]) + sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \ + sctp_assoc2id(asoc)))?asoc->state:SCTP_STATE_CLOSED]) /* * This is the master state machine processing function. @@ -256,7 +256,7 @@ int sctp_cmd_interpreter(sctp_event_t event_type, sctp_subtype_t subtype, sctp_cmd_t *cmd; sctp_chunk_t *new_obj; sctp_chunk_t *chunk = NULL; - sctp_packet_t *packet; + struct sctp_packet *packet; struct list_head *pos; struct timer_list *timer; unsigned long timeout; @@ -716,13 +716,12 @@ int sctp_gen_sack(sctp_association_t *asoc, int force, sctp_cmd_seq_t *commands) asoc->peer.sack_needed = 1; goto out; } else { + if (asoc->a_rwnd > asoc->rwnd) + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (!sack) goto nomem; - /* Update the last advertised rwnd value. */ - asoc->a_rwnd = asoc->rwnd; - asoc->peer.sack_needed = 0; error = sctp_outq_tail(&asoc->outqueue, sack); @@ -1223,13 +1222,35 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, sctp_association_t *asoc, static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, sctp_association_t *asoc, sctp_state_t state) { + + struct sock *sk = asoc->base.sk; + struct sctp_opt *sp = sctp_sk(sk); + asoc->state = state; asoc->state_timestamp = jiffies; - /* Wake up any process waiting for the association to - * get established. + if ((SCTP_STATE_ESTABLISHED == asoc->state) || + (SCTP_STATE_CLOSED == asoc->state)) { + /* Wake up any processes waiting in the asoc's wait queue in + * sctp_wait_for_connect() or sctp_wait_for_sndbuf(). + */ + if (waitqueue_active(&asoc->wait)) + wake_up_interruptible(&asoc->wait); + + /* Wake up any processes waiting in the sk's sleep queue of + * a TCP-style or UDP-style peeled-off socket in + * sctp_wait_for_accept() or sctp_wait_for_packet(). + * For a UDP-style socket, the waiters are woken up by the + * notifications. + */ + if (SCTP_SOCKET_UDP != sp->type) + sk->state_change(sk); + } + + /* Change the sk->state of a TCP-style socket that has sucessfully + * completed a connect() call. */ if ((SCTP_STATE_ESTABLISHED == asoc->state) && - (waitqueue_active(&asoc->wait))) - wake_up_interruptible(&asoc->wait); + (SCTP_SOCKET_TCP == sp->type) && (SCTP_SS_CLOSED == sk->state)) + sk->state = SCTP_SS_ESTABLISHED; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e2709bd8e59f..9e2862892013 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -189,7 +189,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const sctp_endpoint_t *ep, sctp_chunk_t *repl; sctp_association_t *new_asoc; sctp_chunk_t *err_chunk; - sctp_packet_t *packet; + struct sctp_packet *packet; sctp_unrecognized_param_t *unk_param; int len; @@ -354,10 +354,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const sctp_endpoint_t *ep, sctp_init_chunk_t *initchunk; __u32 init_tag; sctp_chunk_t *err_chunk; - sctp_packet_t *packet; + struct sctp_packet *packet; sctp_disposition_t ret; - /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. @@ -912,14 +911,14 @@ static int sctp_sf_send_restart_abort(union sctp_addr *ssa, sctp_cmd_seq_t *commands) { int len; - sctp_packet_t *pkt; + struct sctp_packet *pkt; sctp_addr_param_t *addrparm; sctp_errhdr_t *errhdr; sctp_endpoint_t *ep; char buffer[sizeof(sctp_errhdr_t) + sizeof(sctp_addr_param_t)]; - /* Build the error on the stack. We are way to malloc - * malloc crazy throughout the code today. + /* Build the error on the stack. We are way to malloc crazy + * throughout the code today. */ errhdr = (sctp_errhdr_t *)buffer; addrparm = (sctp_addr_param_t *)errhdr->variable; @@ -1105,11 +1104,10 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( sctp_chunk_t *repl; sctp_association_t *new_asoc; sctp_chunk_t *err_chunk; - sctp_packet_t *packet; + struct sctp_packet *packet; sctp_unrecognized_param_t *unk_param; int len; - /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. @@ -2351,7 +2349,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const sctp_endpoint_t *ep, * room. Note: Playing nice with a confused sender. A * malicious sender can still eat up all our buffer * space and in the future we may want to detect and - * do more drastic reneging. + * do more drastic reneging. */ if (sctp_tsnmap_has_gap(&asoc->peer.tsn_map) && (sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1) == tsn) { @@ -2751,7 +2749,7 @@ sctp_disposition_t sctp_sf_tabort_8_4_8(const sctp_endpoint_t *ep, void *arg, sctp_cmd_seq_t *commands) { - sctp_packet_t *packet = NULL; + struct sctp_packet *packet = NULL; sctp_chunk_t *chunk = arg; sctp_chunk_t *abort; @@ -2953,7 +2951,7 @@ sctp_disposition_t sctp_sf_shut_8_4_5(const sctp_endpoint_t *ep, void *arg, sctp_cmd_seq_t *commands) { - sctp_packet_t *packet = NULL; + struct sctp_packet *packet = NULL; sctp_chunk_t *chunk = arg; sctp_chunk_t *shut; @@ -4377,13 +4375,13 @@ sctp_sackhdr_t *sctp_sm_pull_sack(sctp_chunk_t *chunk) /* Create an ABORT packet to be sent as a response, with the specified * error causes. */ -sctp_packet_t *sctp_abort_pkt_new(const sctp_endpoint_t *ep, +struct sctp_packet *sctp_abort_pkt_new(const sctp_endpoint_t *ep, const sctp_association_t *asoc, sctp_chunk_t *chunk, const void *payload, size_t paylen) { - sctp_packet_t *packet; + struct sctp_packet *packet; sctp_chunk_t *abort; packet = sctp_ootb_pkt_new(asoc, chunk); @@ -4413,10 +4411,10 @@ sctp_packet_t *sctp_abort_pkt_new(const sctp_endpoint_t *ep, } /* Allocate a packet for responding in the OOTB conditions. */ -sctp_packet_t *sctp_ootb_pkt_new(const sctp_association_t *asoc, +struct sctp_packet *sctp_ootb_pkt_new(const sctp_association_t *asoc, const sctp_chunk_t *chunk) { - sctp_packet_t *packet; + struct sctp_packet *packet; struct sctp_transport *transport; __u16 sport; __u16 dport; @@ -4449,7 +4447,7 @@ sctp_packet_t *sctp_ootb_pkt_new(const sctp_association_t *asoc, goto nomem; /* Allocate a new packet for sending the response. */ - packet = t_new(sctp_packet_t, GFP_ATOMIC); + packet = t_new(struct sctp_packet, GFP_ATOMIC); if (!packet) goto nomem_packet; @@ -4471,7 +4469,7 @@ nomem: } /* Free the packet allocated earlier for responding in the OOTB condition. */ -void sctp_ootb_pkt_free(sctp_packet_t *packet) +void sctp_ootb_pkt_free(struct sctp_packet *packet) { sctp_transport_free(packet->transport); sctp_packet_free(packet); @@ -4484,7 +4482,7 @@ void sctp_send_stale_cookie_err(const sctp_endpoint_t *ep, sctp_cmd_seq_t *commands, sctp_chunk_t *err_chunk) { - sctp_packet_t *packet; + struct sctp_packet *packet; if (err_chunk) { packet = sctp_ootb_pkt_new(asoc, chunk); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 85fcc4fa6ee9..bb91784b0c68 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -88,12 +88,46 @@ static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, int msg_len); static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); +static int sctp_wait_for_accept(struct sock *sk, long timeo); static inline int sctp_verify_addr(struct sock *, union sctp_addr *, int); static int sctp_bindx_add(struct sock *, struct sockaddr_storage *, int); static int sctp_bindx_rem(struct sock *, struct sockaddr_storage *, int); static int sctp_do_bind(struct sock *, union sctp_addr *, int); static int sctp_autobind(struct sock *sk); +static void sctp_sock_migrate(struct sock *, struct sock *, + struct sctp_association *, sctp_socket_type_t); +/* Look up the association by its id. If this is not a UDP-style + * socket, the ID field is always ignored. + */ +sctp_association_t *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) +{ + sctp_association_t *asoc = NULL; + + /* If this is not a UDP-style socket, assoc id should be + * ignored. + */ + if (SCTP_SOCKET_UDP != sctp_sk(sk)->type) { + if (!list_empty(&sctp_sk(sk)->ep->asocs)) + asoc = list_entry(sctp_sk(sk)->ep->asocs.next, + sctp_association_t, asocs); + return asoc; + } + + /* First, verify that this is a kernel address. */ + if (sctp_is_valid_kaddr((unsigned long) id)) { + sctp_association_t *temp = (sctp_association_t *) id; + + /* Verify that this _is_ an sctp_association_t + * data structure and if so, that the socket matches. + */ + if ((SCTP_ASSOC_EYECATCHER == temp->eyecatcher) && + (temp->base.sk == sk)) + asoc = temp; + } + + return asoc; +} /* API 3.1.2 bind() - UDP Style Syntax * The syntax of bind() is, @@ -818,19 +852,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } } } else { - /* For a peeled-off socket, ignore any associd specified by - * the user with SNDRCVINFO. - */ - if (SCTP_SOCKET_UDP_HIGH_BANDWIDTH == sp->type) { - if (list_empty(&ep->asocs)) { - err = -EINVAL; - goto out_unlock; - } - asoc = list_entry(ep->asocs.next, sctp_association_t, - asocs); - } else if (associd) { - asoc = sctp_id2assoc(sk, associd); - } + asoc = sctp_id2assoc(sk, associd); if (!asoc) { err = -EINVAL; goto out_unlock; @@ -1007,7 +1029,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, sctp_set_owner_w(chunk); /* This flag, in the UDP model, requests the SCTP stack to - * override the primary destination address with the + * override the primary destination address with the * address found with the sendto/sendmsg call. */ if (sinfo_flags & MSG_ADDR_OVER) { @@ -1126,17 +1148,19 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr int err = 0; int skb_len; - SCTP_DEBUG_PRINTK("sctp_recvmsg(" - "%s: %p, %s: %p, %s: %d, %s: %d, %s: " - "0x%x, %s: %p)\n", - "sk", sk, - "msghdr", msg, - "len", len, - "knoblauch", noblock, - "flags", flags, - "addr_len", addr_len); + SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %d, %s: %d, %s: " + "0x%x, %s: %p)\n", "sk", sk, "msghdr", msg, + "len", len, "knoblauch", noblock, + "flags", flags, "addr_len", addr_len); sctp_lock_sock(sk); + + if ((SCTP_SOCKET_TCP == sp->type) && + (SCTP_SS_ESTABLISHED != sk->state)) { + err = -ENOTCONN; + goto out; + } + skb = sctp_skb_recv_datagram(sk, flags, noblock, &err); if (!skb) goto out; @@ -1207,7 +1231,7 @@ out: return err; } -static inline int sctp_setsockopt_disable_fragments(struct sock *sk, +static int sctp_setsockopt_disable_fragments(struct sock *sk, char *optval, int optlen) { int val; @@ -1223,8 +1247,8 @@ static inline int sctp_setsockopt_disable_fragments(struct sock *sk, return 0; } -static inline int sctp_setsockopt_set_events(struct sock *sk, char *optval, - int optlen) +static int sctp_setsockopt_events(struct sock *sk, char *optval, + int optlen) { if (optlen != sizeof(struct sctp_event_subscribe)) return -EINVAL; @@ -1233,7 +1257,7 @@ static inline int sctp_setsockopt_set_events(struct sock *sk, char *optval, return 0; } -static inline int sctp_setsockopt_autoclose(struct sock *sk, char *optval, +static int sctp_setsockopt_autoclose(struct sock *sk, char *optval, int optlen) { struct sctp_opt *sp = sctp_sk(sk); @@ -1250,9 +1274,8 @@ static inline int sctp_setsockopt_autoclose(struct sock *sk, char *optval, return 0; } -static inline int sctp_setsockopt_set_peer_addr_params(struct sock *sk, - char *optval, - int optlen) +static int sctp_setsockopt_peer_addr_params(struct sock *sk, + char *optval, int optlen) { struct sctp_paddrparams params; sctp_association_t *asoc; @@ -1290,8 +1313,7 @@ static inline int sctp_setsockopt_set_peer_addr_params(struct sock *sk, error = sctp_primitive_REQUESTHEARTBEAT (asoc, trans); if (error) return error; - } - else { + } else { /* The value of the heartbeat interval, in milliseconds. A value of 0, * when modifying the parameter, specifies that the heartbeat on this * address should be disabled. @@ -1311,7 +1333,7 @@ static inline int sctp_setsockopt_set_peer_addr_params(struct sock *sk, return 0; } -static inline int sctp_setsockopt_initmsg(struct sock *sk, char *optval, +static int sctp_setsockopt_initmsg(struct sock *sk, char *optval, int optlen) { if (optlen != sizeof(struct sctp_initmsg)) @@ -1336,7 +1358,7 @@ static inline int sctp_setsockopt_initmsg(struct sock *sk, char *optval, * sinfo_timetolive. The user must provide the sinfo_assoc_id field in * to this call if the caller is using the UDP model. */ -static inline int sctp_setsockopt_set_default_send_param(struct sock *sk, +static int sctp_setsockopt_default_send_param(struct sock *sk, char *optval, int optlen) { struct sctp_sndrcvinfo info; @@ -1359,6 +1381,66 @@ static inline int sctp_setsockopt_set_default_send_param(struct sock *sk, return 0; } +/* 7.1.10 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR) + * + * Requests that the local SCTP stack use the enclosed peer address as + * the association primary. The enclosed address must be one of the + * association peer's addresses. + */ +static int sctp_setsockopt_peer_prim(struct sock *sk, char *optval, int optlen) +{ + struct sctp_setpeerprim prim; + struct sctp_association *asoc; + union sctp_addr *addr; + struct sctp_transport *trans; + + if (optlen != sizeof(struct sctp_setpeerprim)) + return -EINVAL; + + if (copy_from_user(&prim, optval, sizeof(struct sctp_setpeerprim))) + return -EFAULT; + + asoc = sctp_id2assoc(sk, prim.sspp_assoc_id); + if (!asoc) + return -EINVAL; + + /* Find the requested address. */ + addr = (union sctp_addr *) &(prim.sspp_addr); + + trans = sctp_assoc_lookup_paddr(asoc, addr); + if (!trans) + return -ENOENT; + + sctp_assoc_set_primary(asoc, trans); + + return 0; +} + +/* + * + * 7.1.5 SCTP_NODELAY + * + * Turn on/off any Nagle-like algorithm. This means that packets are + * generally sent as soon as possible and no unnecessary delays are + * introduced, at the cost of more packets in the network. Expects an + * integer boolean flag. + */ +static int sctp_setsockopt_nodelay(struct sock *sk, char *optval, + int optlen) +{ + __u8 val; + + if (optlen < sizeof(__u8)) + return -EINVAL; + + if (get_user(val, (__u8 *)optval)) + return -EFAULT; + + sctp_sk(sk)->nodelay = (val == 0) ? 0 : 1; + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -1434,7 +1516,7 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_SET_EVENTS: - retval = sctp_setsockopt_set_events(sk, optval, optlen); + retval = sctp_setsockopt_events(sk, optval, optlen); break; case SCTP_AUTOCLOSE: @@ -1442,8 +1524,7 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_SET_PEER_ADDR_PARAMS: - retval = sctp_setsockopt_set_peer_addr_params(sk, optval, - optlen); + retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); break; case SCTP_INITMSG: @@ -1451,8 +1532,16 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_SET_DEFAULT_SEND_PARAM: - retval = sctp_setsockopt_set_default_send_param(sk, - optval, optlen); + retval = sctp_setsockopt_default_send_param(sk, optval, + optlen); + break; + + case SCTP_SET_PEER_PRIMARY_ADDR: + retval = sctp_setsockopt_peer_prim(sk, optval, optlen); + break; + + case SCTP_NODELAY: + retval = sctp_setsockopt_nodelay(sk, optval, optlen); break; default: @@ -1503,8 +1592,14 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *uaddr, sp = sctp_sk(sk); ep = sp->ep; - /* connect() cannot be done on a peeled-off socket. */ - if (SCTP_SOCKET_UDP_HIGH_BANDWIDTH == sp->type) { + /* connect() cannot be done on a socket that is already in ESTABLISHED + * state - UDP-style peeled off socket or a TCP-style socket that + * is already connected. + * It cannot be done even on a TCP-style listening socket. + */ + if ((SCTP_SS_ESTABLISHED == sk->state) || + ((SCTP_SOCKET_TCP == sp->type) && + (SCTP_SS_LISTENING == sk->state))) { err = -EISCONN; goto out_unlock; } @@ -1513,6 +1608,8 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *uaddr, if (err) goto out_unlock; + if (addr_len > sizeof(to)) + addr_len = sizeof(to); memcpy(&to, uaddr, addr_len); to.v4.sin_port = ntohs(to.v4.sin_port); @@ -1585,13 +1682,63 @@ SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) return -EOPNOTSUPP; /* STUB */ } -/* FIXME: Write comments. */ +/* 4.1.4 accept() - TCP Style Syntax + * + * Applications use accept() call to remove an established SCTP + * association from the accept queue of the endpoint. A new socket + * descriptor will be returned from accept() to represent the newly + * formed association. + */ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) { - int error = -EOPNOTSUPP; + struct sctp_opt *sp; + struct sctp_endpoint *ep; + struct sock *newsk = NULL; + struct sctp_association *assoc; + long timeo; + int error = 0; + + sctp_lock_sock(sk); - *err = error; - return NULL; + sp = sctp_sk(sk); + ep = sp->ep; + + if (SCTP_SOCKET_TCP != sp->type) { + error = -EOPNOTSUPP; + goto out; + } + + if (SCTP_SS_LISTENING != sk->state) { + error = -EINVAL; + goto out; + } + + timeo = sock_rcvtimeo(sk, sk->socket->file->f_flags & O_NONBLOCK); + + error = sctp_wait_for_accept(sk, timeo); + if (error) + goto out; + + /* We treat the list of associations on the endpoint as the accept + * queue and pick the first association on the list. + */ + assoc = list_entry(ep->asocs.next, struct sctp_association, asocs); + + newsk = sp->pf->create_accept_sk(sk, assoc); + if (!newsk) { + error = -ENOMEM; + goto out; + } + + /* Populate the fields of the newsk from the oldsk and migrate the + * assoc to the newsk. + */ + sctp_sock_migrate(sk, newsk, assoc, SCTP_SOCKET_TCP); + +out: + sctp_release_sock(sk); + *err = error; + return newsk; } /* FIXME: Write Comments. */ @@ -1607,7 +1754,7 @@ SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) SCTP_STATIC int sctp_init_sock(struct sock *sk) { sctp_endpoint_t *ep; - sctp_protocol_t *proto; + struct sctp_protocol *proto; struct sctp_opt *sp; SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); @@ -1617,7 +1764,16 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp = sctp_sk(sk); /* Initialize the SCTP per socket area. */ - sp->type = SCTP_SOCKET_UDP; + switch (sk->type) { + case SOCK_SEQPACKET: + sp->type = SCTP_SOCKET_UDP; + break; + case SOCK_STREAM: + sp->type = SCTP_SOCKET_TCP; + break; + default: + return -ESOCKTNOSUPPORT; + } /* FIXME: The next draft (04) of the SCTP Sockets Extensions * should include a socket option for manipulating these @@ -1665,7 +1821,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->disable_fragments = 0; /* Turn on/off any Nagle-like algorithm. */ - sp->nodelay = 0; + sp->nodelay = 1; /* Auto-close idle associations after the configured * number of seconds. A value of 0 disables this @@ -1714,11 +1870,17 @@ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) /* STUB */ } +/* 7.2.1 Association Status (SCTP_STATUS) + + * Applications can retrieve current status information about an + * association, including association state, peer receiver window size, + * number of unacked data chunks, and number of data chunks pending + * receipt. This information is read-only. + */ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, char *optval, int *optlen) { struct sctp_status status; - sctp_endpoint_t *ep; sctp_association_t *assoc = NULL; struct sctp_transport *transport; sctp_assoc_t associd; @@ -1735,20 +1897,10 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, char *optval, } associd = status.sstat_assoc_id; - if ((SCTP_SOCKET_UDP_HIGH_BANDWIDTH != sctp_sk(sk)->type) && associd) { - assoc = sctp_id2assoc(sk, associd); - if (!assoc) { - retval = -EINVAL; - goto out; - } - } else { - ep = sctp_sk(sk)->ep; - if (list_empty(&ep->asocs)) { - retval = -EINVAL; - goto out; - } - - assoc = list_entry(ep->asocs.next, sctp_association_t, asocs); + assoc = sctp_id2assoc(sk, associd); + if (!assoc) { + retval = -EINVAL; + goto out; } transport = assoc->peer.primary_path; @@ -1788,7 +1940,7 @@ out: return (retval); } -static inline int sctp_getsockopt_disable_fragments(struct sock *sk, int len, +static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, char *optval, int *optlen) { int val; @@ -1805,7 +1957,7 @@ static inline int sctp_getsockopt_disable_fragments(struct sock *sk, int len, return 0; } -static inline int sctp_getsockopt_set_events(struct sock *sk, int len, char *optval, int *optlen) +static int sctp_getsockopt_set_events(struct sock *sk, int len, char *optval, int *optlen) { if (len != sizeof(struct sctp_event_subscribe)) return -EINVAL; @@ -1814,7 +1966,7 @@ static inline int sctp_getsockopt_set_events(struct sock *sk, int len, char *opt return 0; } -static inline int sctp_getsockopt_autoclose(struct sock *sk, int len, char *optval, int *optlen) +static int sctp_getsockopt_autoclose(struct sock *sk, int len, char *optval, int *optlen) { /* Applicable to UDP-style socket only */ if (SCTP_SOCKET_TCP == sctp_sk(sk)->type) @@ -1832,11 +1984,6 @@ SCTP_STATIC int sctp_do_peeloff(sctp_association_t *assoc, struct socket **newso struct sock *oldsk = assoc->base.sk; struct sock *newsk; struct socket *tmpsock; - sctp_endpoint_t *newep; - struct sctp_opt *oldsp = sctp_sk(oldsk); - struct sctp_opt *newsp; - struct sk_buff *skb, *tmp; - struct sctp_ulpevent *event; int err = 0; /* An association cannot be branched off from an already peeled-off @@ -1846,88 +1993,24 @@ SCTP_STATIC int sctp_do_peeloff(sctp_association_t *assoc, struct socket **newso return -EOPNOTSUPP; /* Create a new socket. */ - err = sock_create(PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP, &tmpsock); + err = sock_create(oldsk->family, SOCK_SEQPACKET, IPPROTO_SCTP, + &tmpsock); if (err < 0) return err; newsk = tmpsock->sk; - newsp = sctp_sk(newsk); - newep = newsp->ep; - /* Migrate socket buffer sizes and all the socket level options to the - * new socket. - */ - newsk->sndbuf = oldsk->sndbuf; - newsk->rcvbuf = oldsk->rcvbuf; - *newsp = *oldsp; - - /* Restore the ep value that was overwritten with the above structure - * copy. - */ - newsp->ep = newep; - - /* Move any messages in the old socket's receive queue that are for the - * peeled off association to the new socket's receive queue. - */ - sctp_skb_for_each(skb, &oldsk->receive_queue, tmp) { - event = sctp_skb2event(skb); - if (event->asoc == assoc) { - __skb_unlink(skb, skb->list); - __skb_queue_tail(&newsk->receive_queue, skb); - } - } - - /* Clean up an messages pending delivery due to partial - * delivery. Three cases: - * 1) No partial deliver; no work. - * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby. - * 3) Peeling off non-partial delivery; move pd_lobby to recieve_queue. - */ - skb_queue_head_init(&newsp->pd_lobby); - sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode;; - - if (sctp_sk(oldsk)->pd_mode) { - struct sk_buff_head *queue; - - /* Decide which queue to move pd_lobby skbs to. */ - if (assoc->ulpq.pd_mode) { - queue = &newsp->pd_lobby; - } else - queue = &newsk->receive_queue; - - /* Walk through the pd_lobby, looking for skbs that - * need moved to the new socket. - */ - sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { - event = sctp_skb2event(skb); - if (event->asoc == assoc) { - __skb_unlink(skb, skb->list); - __skb_queue_tail(queue, skb); - } - } - - /* Clear up any skbs waiting for the partial - * delivery to finish. - */ - if (assoc->ulpq.pd_mode) - sctp_clear_pd(oldsk); - - } - - /* Set the type of socket to indicate that it is peeled off from the - * original socket. - */ - newsp->type = SCTP_SOCKET_UDP_HIGH_BANDWIDTH; - - /* Migrate the association to the new socket. */ - sctp_assoc_migrate(assoc, newsk); + /* Populate the fields of the newsk from the oldsk and migrate the + * assoc to the newsk. + */ + sctp_sock_migrate(oldsk, newsk, assoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH); *newsock = tmpsock; return err; } -static inline int sctp_getsockopt_peeloff(struct sock *sk, int len, char *optval, int *optlen) +static int sctp_getsockopt_peeloff(struct sock *sk, int len, char *optval, int *optlen) { sctp_peeloff_arg_t peeloff; struct socket *newsock; @@ -1970,8 +2053,8 @@ out: return retval; } -static inline int sctp_getsockopt_get_peer_addr_params(struct sock *sk, - int len, char *optval, int *optlen) +static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, + char *optval, int *optlen) { struct sctp_paddrparams params; sctp_association_t *asoc; @@ -2014,7 +2097,7 @@ static inline int sctp_getsockopt_get_peer_addr_params(struct sock *sk, return 0; } -static inline int sctp_getsockopt_initmsg(struct sock *sk, int len, char *optval, int *optlen) +static int sctp_getsockopt_initmsg(struct sock *sk, int len, char *optval, int *optlen) { if (len != sizeof(struct sctp_initmsg)) return -EINVAL; @@ -2023,8 +2106,8 @@ static inline int sctp_getsockopt_initmsg(struct sock *sk, int len, char *optval return 0; } -static inline int sctp_getsockopt_get_peer_addrs_num(struct sock *sk, int len, - char *optval, int *optlen) +static int sctp_getsockopt_peer_addrs_num(struct sock *sk, int len, + char *optval, int *optlen) { sctp_assoc_t id; sctp_association_t *asoc; @@ -2053,7 +2136,7 @@ static inline int sctp_getsockopt_get_peer_addrs_num(struct sock *sk, int len, return 0; } -static inline int sctp_getsockopt_get_peer_addrs(struct sock *sk, int len, +static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, char *optval, int *optlen) { sctp_association_t *asoc; @@ -2093,8 +2176,8 @@ static inline int sctp_getsockopt_get_peer_addrs(struct sock *sk, int len, return 0; } -static inline int sctp_getsockopt_get_local_addrs_num(struct sock *sk, int len, - char *optval, int *optlen) +static int sctp_getsockopt_local_addrs_num(struct sock *sk, int len, + char *optval, int *optlen) { sctp_assoc_t id; sctp_bind_addr_t *bp; @@ -2132,8 +2215,8 @@ static inline int sctp_getsockopt_get_local_addrs_num(struct sock *sk, int len, return 0; } -static inline int sctp_getsockopt_get_local_addrs(struct sock *sk, int len, - char *optval, int *optlen) +static int sctp_getsockopt_local_addrs(struct sock *sk, int len, + char *optval, int *optlen) { sctp_bind_addr_t *bp; sctp_association_t *asoc; @@ -2183,6 +2266,40 @@ static inline int sctp_getsockopt_get_local_addrs(struct sock *sk, int len, return 0; } +/* 7.1.10 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR) + * + * Requests that the local SCTP stack use the enclosed peer address as + * the association primary. The enclosed address must be one of the + * association peer's addresses. + */ +static int sctp_getsockopt_peer_prim(struct sock *sk, int len, + char *optval, int *optlen) +{ + struct sctp_setpeerprim prim; + struct sctp_association *asoc; + + if (len != sizeof(struct sctp_setpeerprim)) + return -EINVAL; + + if (copy_from_user(&prim, optval, sizeof(struct sctp_setpeerprim))) + return -EFAULT; + + asoc = sctp_id2assoc(sk, prim.sspp_assoc_id); + if (!asoc) + return -EINVAL; + + if (!asoc->peer.primary_path) + return -ENOTCONN; + + memcpy(&prim.sspp_addr, &asoc->peer.primary_path->ipaddr, + sizeof(union sctp_addr)); + + if (copy_to_user(optval, &prim, sizeof(struct sctp_setpeerprim))) + return -EFAULT; + + return 0; +} + /* * * 7.1.15 Set default send parameters (SET_DEFAULT_SEND_PARAM) @@ -2200,7 +2317,7 @@ static inline int sctp_getsockopt_get_local_addrs(struct sock *sk, int len, * * For getsockopt, it get the default sctp_sndrcvinfo structure. */ -static inline int sctp_getsockopt_set_default_send_param(struct sock *sk, +static int sctp_getsockopt_default_send_param(struct sock *sk, int len, char *optval, int *optlen) { struct sctp_sndrcvinfo info; @@ -2227,6 +2344,33 @@ static inline int sctp_getsockopt_set_default_send_param(struct sock *sk, return 0; } +/* + * + * 7.1.5 SCTP_NODELAY + * + * Turn on/off any Nagle-like algorithm. This means that packets are + * generally sent as soon as possible and no unnecessary delays are + * introduced, at the cost of more packets in the network. Expects an + * integer boolean flag. + */ + +static int sctp_getsockopt_nodelay(struct sock *sk, int len, + char *optval, int *optlen) +{ + __u8 val; + + if (len < sizeof(__u8)) + return -EINVAL; + + len = sizeof(__u8); + val = (sctp_sk(sk)->nodelay == 1); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; +} + SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen) { @@ -2257,58 +2401,52 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_STATUS: retval = sctp_getsockopt_sctp_status(sk, len, optval, optlen); break; - case SCTP_DISABLE_FRAGMENTS: retval = sctp_getsockopt_disable_fragments(sk, len, optval, optlen); break; - case SCTP_SET_EVENTS: retval = sctp_getsockopt_set_events(sk, len, optval, optlen); break; - case SCTP_AUTOCLOSE: retval = sctp_getsockopt_autoclose(sk, len, optval, optlen); break; - case SCTP_SOCKOPT_PEELOFF: retval = sctp_getsockopt_peeloff(sk, len, optval, optlen); break; - case SCTP_GET_PEER_ADDR_PARAMS: - retval = sctp_getsockopt_get_peer_addr_params(sk, len, optval, - optlen); + retval = sctp_getsockopt_peer_addr_params(sk, len, optval, + optlen); break; - case SCTP_INITMSG: retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); break; - case SCTP_GET_PEER_ADDRS_NUM: - retval = sctp_getsockopt_get_peer_addrs_num(sk, len, optval, - optlen); + retval = sctp_getsockopt_peer_addrs_num(sk, len, optval, + optlen); break; - case SCTP_GET_LOCAL_ADDRS_NUM: - retval = sctp_getsockopt_get_local_addrs_num(sk, len, optval, - optlen); + retval = sctp_getsockopt_local_addrs_num(sk, len, optval, + optlen); break; - case SCTP_GET_PEER_ADDRS: - retval = sctp_getsockopt_get_peer_addrs(sk, len, optval, - optlen); + retval = sctp_getsockopt_peer_addrs(sk, len, optval, + optlen); break; - case SCTP_GET_LOCAL_ADDRS: - retval = sctp_getsockopt_get_local_addrs(sk, len, optval, - optlen); + retval = sctp_getsockopt_local_addrs(sk, len, optval, + optlen); break; - case SCTP_SET_DEFAULT_SEND_PARAM: - retval = sctp_getsockopt_set_default_send_param(sk, len, - optval, optlen); + retval = sctp_getsockopt_default_send_param(sk, len, + optval, optlen); + break; + case SCTP_SET_PEER_PRIMARY_ADDR: + retval = sctp_getsockopt_peer_prim(sk, len, optval, optlen); + break; + case SCTP_NODELAY: + retval = sctp_getsockopt_nodelay(sk, len, optval, optlen); break; - default: retval = -ENOPROTOOPT; break; @@ -2331,7 +2469,7 @@ static void sctp_unhash(struct sock *sk) /* Check if port is acceptable. Possibly find first available port. * * The port hash table (contained in the 'global' SCTP protocol storage - * returned by sctp_protocol_t * sctp_get_protocol()). The hash + * returned by struct sctp_protocol *sctp_get_protocol()). The hash * table is an array of 4096 lists (sctp_bind_hashbucket_t). Each * list (the list number is the port number hashed out, so as you * would expect from a hash function, all the ports in a given list have @@ -2346,7 +2484,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { sctp_bind_hashbucket_t *head; /* hash list */ sctp_bind_bucket_t *pp; /* hash list port iterator */ - sctp_protocol_t *sctp = sctp_get_protocol(); + struct sctp_protocol *sctp = sctp_get_protocol(); unsigned short snum; int ret; @@ -2543,6 +2681,9 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) if (SCTP_SOCKET_UDP != sp->type) return -EINVAL; + if (sk->state == SCTP_SS_LISTENING) + return 0; + /* * If a bind() or sctp_bindx() is not called prior to a listen() * call that allows new associations to be accepted, the system @@ -2563,6 +2704,40 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) } /* + * 4.1.3 listen() - TCP Style Syntax + * + * Applications uses listen() to ready the SCTP endpoint for accepting + * inbound associations. + */ +SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) +{ + struct sctp_opt *sp = sctp_sk(sk); + sctp_endpoint_t *ep = sp->ep; + + if (sk->state == SCTP_SS_LISTENING) + return 0; + + /* + * If a bind() or sctp_bindx() is not called prior to a listen() + * call that allows new associations to be accepted, the system + * picks an ephemeral port and will choose an address set equivalent + * to binding with a wildcard address. + * + * This is not currently spelled out in the SCTP sockets + * extensions draft, but follows the practice as seen in TCP + * sockets. + */ + if (!ep->base.bind_addr.port) { + if (sctp_autobind(sk)) + return -EAGAIN; + } + sk->state = SCTP_SS_LISTENING; + sk->max_ack_backlog = backlog; + sctp_hash_endpoint(ep); + return 0; +} + +/* * Move a socket to LISTENING state. */ int sctp_inet_listen(struct socket *sock, int backlog) @@ -2579,10 +2754,9 @@ int sctp_inet_listen(struct socket *sock, int backlog) case SOCK_SEQPACKET: err = sctp_seqpacket_listen(sk, backlog); break; - case SOCK_STREAM: - /* FIXME for TCP-style sockets. */ - err = -EOPNOTSUPP; + err = sctp_stream_listen(sk, backlog); + break; default: goto out; @@ -2684,7 +2858,7 @@ static sctp_bind_bucket_t *sctp_bucket_create(sctp_bind_hashbucket_t *head, unsi /* FIXME: Commments! */ static __inline__ void __sctp_put_port(struct sock *sk) { - sctp_protocol_t *sctp_proto = sctp_get_protocol(); + struct sctp_protocol *sctp_proto = sctp_get_protocol(); sctp_bind_hashbucket_t *head = &sctp_proto->port_hashtable[sctp_phashfn(inet_sk(sk)->num)]; sctp_bind_bucket_t *pp; @@ -2967,7 +3141,8 @@ no_packet: } /* Verify that this is a valid address. */ -static int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, int len) +static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, + int len) { struct sctp_af *af; @@ -3213,7 +3388,7 @@ out: return err; do_error: - err = -ECONNABORTED; + err = -ECONNREFUSED; goto out; do_interrupted: @@ -3225,6 +3400,131 @@ do_nonblock: goto out; } +static int sctp_wait_for_accept(struct sock *sk, long timeo) +{ + struct sctp_endpoint *ep; + int err = 0; + DECLARE_WAITQUEUE(wait, current); + + ep = sctp_sk(sk)->ep; + + add_wait_queue_exclusive(sk->sleep, &wait); + + for (;;) { + __set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&ep->asocs)) { + sctp_release_sock(sk); + timeo = schedule_timeout(timeo); + sctp_lock_sock(sk); + } + + err = -EINVAL; + if (sk->state != SCTP_SS_LISTENING) + break; + + err = 0; + if (!list_empty(&ep->asocs)) + break; + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + + err = -EAGAIN; + if (!timeo) + break; + } + + remove_wait_queue(sk->sleep, &wait); + __set_current_state(TASK_RUNNING); + + return err; +} + +/* Populate the fields of the newsk from the oldsk and migrate the assoc + * and its messages to the newsk. + */ +void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, + struct sctp_association *assoc, sctp_socket_type_t type) +{ + struct sctp_opt *oldsp = sctp_sk(oldsk); + struct sctp_opt *newsp = sctp_sk(newsk); + sctp_endpoint_t *newep = newsp->ep; + struct sk_buff *skb, *tmp; + struct sctp_ulpevent *event; + + /* Migrate socket buffer sizes and all the socket level options to the + * new socket. + */ + newsk->sndbuf = oldsk->sndbuf; + newsk->rcvbuf = oldsk->rcvbuf; + *newsp = *oldsp; + + /* Restore the ep value that was overwritten with the above structure + * copy. + */ + newsp->ep = newep; + + /* Move any messages in the old socket's receive queue that are for the + * peeled off association to the new socket's receive queue. + */ + sctp_skb_for_each(skb, &oldsk->receive_queue, tmp) { + event = sctp_skb2event(skb); + if (event->asoc == assoc) { + __skb_unlink(skb, skb->list); + __skb_queue_tail(&newsk->receive_queue, skb); + } + } + + /* Clean up any messages pending delivery due to partial + * delivery. Three cases: + * 1) No partial deliver; no work. + * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby. + * 3) Peeling off non-partial delivery; move pd_lobby to recieve_queue. + */ + skb_queue_head_init(&newsp->pd_lobby); + sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode;; + + if (sctp_sk(oldsk)->pd_mode) { + struct sk_buff_head *queue; + + /* Decide which queue to move pd_lobby skbs to. */ + if (assoc->ulpq.pd_mode) { + queue = &newsp->pd_lobby; + } else + queue = &newsk->receive_queue; + + /* Walk through the pd_lobby, looking for skbs that + * need moved to the new socket. + */ + sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { + event = sctp_skb2event(skb); + if (event->asoc == assoc) { + __skb_unlink(skb, skb->list); + __skb_queue_tail(queue, skb); + } + } + + /* Clear up any skbs waiting for the partial + * delivery to finish. + */ + if (assoc->ulpq.pd_mode) + sctp_clear_pd(oldsk); + + } + + /* Set the type of socket to indicate that it is peeled off from the + * original UDP-style socket or created with the accept() call on a + * TCP-style socket.. + */ + newsp->type = type; + + /* Migrate the association to the new socket. */ + sctp_assoc_migrate(assoc, newsk); + + newsk->state = SCTP_SS_ESTABLISHED; +} + /* This proto struct describes the ULP interface for SCTP. */ struct proto sctp_prot = { .name = "SCTP", diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index f2fcce00c6ed..1e54322277e6 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -42,7 +42,7 @@ #include <net/sctp/structs.h> #include <linux/sysctl.h> -extern sctp_protocol_t sctp_proto; +extern struct sctp_protocol sctp_proto; static ctl_table sctp_table[] = { { diff --git a/net/sctp/transport.c b/net/sctp/transport.c index b9d68744a621..6ee6ca94aa6b 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -83,7 +83,7 @@ struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, const union sctp_addr *addr, int priority) { - sctp_protocol_t *proto = sctp_get_protocol(); + struct sctp_protocol *proto = sctp_get_protocol(); /* Copy in the address. */ peer->ipaddr = *addr; @@ -262,7 +262,7 @@ void sctp_transport_put(struct sctp_transport *transport) /* Update transport's RTO based on the newly calculated RTT. */ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) { - sctp_protocol_t *proto = sctp_get_protocol(); + struct sctp_protocol *proto = sctp_get_protocol(); /* Check for valid transport. */ SCTP_ASSERT(tp, "NULL transport", return); diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 8773a7ee3ead..310c7f0b8c1b 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -250,7 +250,7 @@ int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, /* The Gap Ack Block happens to end at the end of the * overflow map. */ - if (started & !ended) { + if (started && !ended) { ended++; _end = map->len + map->len - 1; } @@ -395,7 +395,7 @@ void sctp_tsnmap_renege(struct sctp_tsnmap *map, __u32 tsn) return; if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) return; - + /* Assert: TSN is in range. */ gap = tsn - map->base_tsn; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 125946bdec10..e367b0735824 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -220,7 +220,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (sctp_event2skb(event)->list) sctp_skb_list_tail(sctp_event2skb(event)->list, queue); else - skb_queue_tail(queue, sctp_event2skb(event)); + __skb_queue_tail(queue, sctp_event2skb(event)); /* Did we just complete partial delivery and need to get * rolling again? Move pending data to the receive @@ -230,7 +230,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->receive_queue) - wake_up_interruptible(sk->sleep); + sk->data_ready(sk, 0); return 1; out_free: @@ -247,14 +247,14 @@ out_free: static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { - struct sk_buff *pos, *tmp; + struct sk_buff *pos; struct sctp_ulpevent *cevent; __u32 tsn, ctsn; tsn = event->sndrcvinfo.sinfo_tsn; /* Find the right place in this list. We store them by TSN. */ - sctp_skb_for_each(pos, &ulpq->reasm, tmp) { + skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->sndrcvinfo.sinfo_tsn; @@ -334,7 +334,7 @@ static inline struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff * */ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ulpq) { - struct sk_buff *pos, *tmp; + struct sk_buff *pos; struct sctp_ulpevent *cevent; struct sk_buff *first_frag = NULL; __u32 ctsn, next_tsn; @@ -355,7 +355,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u * fragment in order. If not, first_frag is reset to NULL and we * start the next pass when we find another first fragment. */ - sctp_skb_for_each(pos, &ulpq->reasm, tmp) { + skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->sndrcvinfo.sinfo_tsn; @@ -374,29 +374,26 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u case SCTP_DATA_LAST_FRAG: if (first_frag && (ctsn == next_tsn)) - retval = sctp_make_reassembled_event( - first_frag, pos); + goto found; else first_frag = NULL; break; }; - /* We have the reassembled event. There is no need to look - * further. - */ - if (retval) { - retval->msg_flags |= MSG_EOR; - break; - } } - +done: return retval; +found: + retval = sctp_make_reassembled_event(first_frag, pos); + if (retval) + retval->msg_flags |= MSG_EOR; + goto done; } /* Retrieve the next set of fragments of a partial message. */ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) { - struct sk_buff *pos, *tmp, *last_frag, *first_frag; + struct sk_buff *pos, *last_frag, *first_frag; struct sctp_ulpevent *cevent; __u32 ctsn, next_tsn; int is_last; @@ -415,7 +412,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq next_tsn = 0; is_last = 0; - sctp_skb_for_each(pos, &ulpq->reasm, tmp) { + skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->sndrcvinfo.sinfo_tsn; @@ -448,7 +445,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq */ done: retval = sctp_make_reassembled_event(first_frag, last_frag); - if (is_last) + if (retval && is_last) retval->msg_flags |= MSG_EOR; return retval; @@ -490,7 +487,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, /* Retrieve the first part (sequential fragments) for partial delivery. */ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) { - struct sk_buff *pos, *tmp, *last_frag, *first_frag; + struct sk_buff *pos, *last_frag, *first_frag; struct sctp_ulpevent *cevent; __u32 ctsn, next_tsn; struct sctp_ulpevent *retval; @@ -507,7 +504,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u retval = NULL; next_tsn = 0; - sctp_skb_for_each(pos, &ulpq->reasm, tmp) { + skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->sndrcvinfo.sinfo_tsn; @@ -590,7 +587,7 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { - struct sk_buff *pos, *tmp; + struct sk_buff *pos; struct sctp_ulpevent *cevent; __u16 sid, csid; __u16 ssn, cssn; @@ -601,7 +598,7 @@ static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, /* Find the right place in this list. We store them by * stream ID and then by SSN. */ - sctp_skb_for_each(pos, &ulpq->lobby, tmp) { + skb_queue_walk(&ulpq->lobby, pos) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->sndrcvinfo.sinfo_stream; cssn = cevent->sndrcvinfo.sinfo_ssn; @@ -786,9 +783,9 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, int priority) SCTP_PARTIAL_DELIVERY_ABORTED, priority); if (ev) - skb_queue_tail(&sk->receive_queue, sctp_event2skb(ev)); + __skb_queue_tail(&sk->receive_queue, sctp_event2skb(ev)); /* If there is data waiting, send it up the socket now. */ if (sctp_ulpq_clear_pd(ulpq) || ev) - wake_up_interruptible(sk->sleep); + sk->data_ready(sk, 0); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e11215d37f43..1cc95eb5a702 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -859,10 +859,9 @@ static long unix_wait_for_peer(unix_socket *other, long timeo) { struct unix_sock *u = unix_sk(other); int sched; - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&u->peer_wait, &wait); + prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); sched = (!test_bit(SOCK_DEAD, &other->flags) && !(other->shutdown&RCV_SHUTDOWN) && @@ -873,8 +872,7 @@ static long unix_wait_for_peer(unix_socket *other, long timeo) if (sched) timeo = schedule_timeout(timeo); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&u->peer_wait, &wait); + finish_wait(&u->peer_wait, &wait); return timeo; } @@ -1542,14 +1540,12 @@ out: static long unix_stream_data_wait(unix_socket * sk, long timeo) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); unix_state_rlock(sk); - add_wait_queue(sk->sleep, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(sk->sleep, &wait, TASK_INTERRUPTIBLE); if (skb_queue_len(&sk->receive_queue) || sk->err || @@ -1565,8 +1561,7 @@ static long unix_stream_data_wait(unix_socket * sk, long timeo) clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags); } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sleep, &wait); + finish_wait(sk->sleep, &wait); unix_state_runlock(sk); return timeo; } |
