From bbeface4f7cb1d117901695e35e2391073ce0ba3 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Fri, 4 Mar 2005 08:36:24 +0900 Subject: [IPV6] ROUTE: Add gc_min_interval_ms sysctl. Signed-off-by: Hideaki YOSHIFUJI --- include/linux/sysctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6b084680aaa1..3c533585a8a5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -455,7 +455,8 @@ enum { NET_IPV6_ROUTE_GC_INTERVAL=6, NET_IPV6_ROUTE_GC_ELASTICITY=7, NET_IPV6_ROUTE_MTU_EXPIRES=8, - NET_IPV6_ROUTE_MIN_ADVMSS=9 + NET_IPV6_ROUTE_MIN_ADVMSS=9, + NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10 }; enum { -- cgit v1.2.3 From 42ed4aad9878f50397b7d32adebd61b76308c451 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Fri, 4 Mar 2005 08:37:28 +0900 Subject: [NET] NEIGHBOUR: Add retrans_time_ms and reachable_time_ms sysctls. Signed-off-by: Hideaki YOSHIFUJI --- Documentation/filesystems/proc.txt | 21 +++++++++----- include/linux/sysctl.h | 5 +++- net/core/neighbour.c | 57 +++++++++++++++++++++++++++++--------- net/ipv6/ndisc.c | 17 ++++++++++-- 4 files changed, 77 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f3f69ad01a87..69a0e892d494 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1754,18 +1754,25 @@ settings contain additional options to set garbage collection parameters. In the interface directories you'll find the following entries: -base_reachable_time -------------------- +base_reachable_time, base_reachable_time_ms +------------------------------------------- A base value used for computing the random reachable time value as specified in RFC2461. -retrans_time ------------- +Expression of base_reachable_time is in seconds. +Expression of base_reachable_time_ms is in milliseconds. + +retrans_time, retrans_time_ms +----------------------------- + +The time between retransmitted Neighbor Solicitation messages. +Used for address resolution and to determine if a neighbor is +unreachable. -The time, expressed in jiffies (1/100 sec), between retransmitted Neighbor -Solicitation messages. Used for address resolution and to determine if a -neighbor is unreachable. +Expression of retrans_time is in 1/100 seconds (for IPv4) or in jiffies +(for IPv6). +Expression of retrans_time_ms is in milliseconds. unres_qlen ---------- diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 3c533585a8a5..46a9fbd5b6cb 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -501,7 +501,10 @@ enum { NET_NEIGH_GC_INTERVAL=13, NET_NEIGH_GC_THRESH1=14, NET_NEIGH_GC_THRESH2=15, - NET_NEIGH_GC_THRESH3=16 + NET_NEIGH_GC_THRESH3=16, + NET_NEIGH_RETRANS_TIME_MS=17, + NET_NEIGH_REACHABLE_TIME_MS=18, + __NET_NEIGH_MAX }; /* /proc/sys/net/ipx */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1d8f707207f1..4553f388dcb2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2047,7 +2047,7 @@ static void neigh_app_notify(struct neighbour *n) static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table neigh_vars[17]; + ctl_table neigh_vars[__NET_NEIGH_MAX]; ctl_table neigh_dev[2]; ctl_table neigh_neigh_dir[2]; ctl_table neigh_proto_dir[2]; @@ -2170,6 +2170,22 @@ static struct neigh_sysctl_table { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = NET_NEIGH_RETRANS_TIME_MS, + .procname = "retrans_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, + { + .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, + .procname = "base_reachable_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, }, .neigh_dev = { { @@ -2214,16 +2230,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; t->neigh_vars[3].data = &p->retrans_time; - if (handler || strategy) { - /* RetransTime */ - t->neigh_vars[3].proc_handler = handler; - t->neigh_vars[3].strategy = strategy; - t->neigh_vars[3].extra1 = dev; - /* ReachableTime */ - t->neigh_vars[4].proc_handler = handler; - t->neigh_vars[4].strategy = strategy; - t->neigh_vars[4].extra1 = dev; - } t->neigh_vars[4].data = &p->base_reachable_time; t->neigh_vars[5].data = &p->delay_probe_time; t->neigh_vars[6].data = &p->gc_staletime; @@ -2233,18 +2239,43 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[10].data = &p->proxy_delay; t->neigh_vars[11].data = &p->locktime; - dev_name_source = t->neigh_dev[0].procname; if (dev) { dev_name_source = dev->name; t->neigh_dev[0].ctl_name = dev->ifindex; - memset(&t->neigh_vars[12], 0, sizeof(ctl_table)); + t->neigh_vars[12].procname = NULL; + t->neigh_vars[13].procname = NULL; + t->neigh_vars[14].procname = NULL; + t->neigh_vars[15].procname = NULL; } else { + dev_name_source = t->neigh_dev[0].procname; t->neigh_vars[12].data = (int *)(p + 1); t->neigh_vars[13].data = (int *)(p + 1) + 1; t->neigh_vars[14].data = (int *)(p + 1) + 2; t->neigh_vars[15].data = (int *)(p + 1) + 3; } + t->neigh_vars[16].data = &p->retrans_time; + t->neigh_vars[17].data = &p->base_reachable_time; + + if (handler || strategy) { + /* RetransTime */ + t->neigh_vars[3].proc_handler = handler; + t->neigh_vars[3].strategy = strategy; + t->neigh_vars[3].extra1 = dev; + /* ReachableTime */ + t->neigh_vars[4].proc_handler = handler; + t->neigh_vars[4].strategy = strategy; + t->neigh_vars[4].extra1 = dev; + /* RetransTime (in milliseconds)*/ + t->neigh_vars[16].proc_handler = handler; + t->neigh_vars[16].strategy = strategy; + t->neigh_vars[16].extra1 = dev; + /* ReachableTime (in milliseconds) */ + t->neigh_vars[17].proc_handler = handler; + t->neigh_vars[17].strategy = strategy; + t->neigh_vars[17].extra1 = dev; + } + dev_name = net_sysctl_strdup(dev_name_source); if (!dev_name) { err = -ENOBUFS; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f3f2b02b41a9..75afd2ff32e6 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1550,12 +1550,18 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); break; + case NET_NEIGH_RETRANS_TIME_MS: + case NET_NEIGH_REACHABLE_TIME_MS: + ret = proc_dointvec_ms_jiffies(ctl, write, + filp, buffer, lenp, ppos); + break; default: ret = -1; } if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME) + if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME || + ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS) idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); idev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, idev); @@ -1579,13 +1585,20 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, int nlen, oldval, oldlenp, newval, newlen, context); break; + case NET_NEIGH_RETRANS_TIME_MS: + case NET_NEIGH_REACHABLE_TIME_MS: + ret = sysctl_ms_jiffies(ctl, name, nlen, + oldval, oldlenp, newval, newlen, + context); + break; default: ret = 0; } if (newval && newlen && ret > 0 && dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME) + if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME || + ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS) idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); idev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, idev); -- cgit v1.2.3 From 8b123d00b713465bb5d262cdfa9a05d3e34f612d Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Fri, 4 Mar 2005 08:38:31 +0900 Subject: [IPV6] Always add a fragment header after receiving TOO BIG w/ pmtu < 1280. According to RFC2460, PMTU is set to the IPv6 Minimum Link MTU (1280) and a fragment header should always be included after a node receiving Too Big message reporting PMTU is less than the IPv6 Minimum Link MTU (1280). Signed-off-by: Hideaki YOSHIFUJI --- include/linux/ip.h | 1 + include/linux/rtnetlink.h | 1 + include/net/dst.h | 9 +++++++++ net/ipv6/ip6_output.c | 10 +++++++--- net/ipv6/route.c | 34 +++++++++++++++++++++------------- 5 files changed, 39 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 487152a404f8..7d7633632d28 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -152,6 +152,7 @@ struct inet_sock { }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ static inline struct inet_sock *inet_sk(const struct sock *sk) { diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index c3d5d043a5ca..5f6310f0d880 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -346,6 +346,7 @@ enum #define RTAX_FEATURE_ECN 0x00000001 #define RTAX_FEATURE_SACK 0x00000002 #define RTAX_FEATURE_TIMESTAMP 0x00000004 +#define RTAX_FEATURE_ALLFRAG 0x00000008 struct rta_session { diff --git a/include/net/dst.h b/include/net/dst.h index b7e47a7ad105..785c4a185ff5 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -124,6 +124,15 @@ dst_pmtu(const struct dst_entry *dst) return mtu; } +static inline u32 +dst_allfrag(const struct dst_entry *dst) +{ + int ret = dst_path_metric(dst, RTAX_FEATURES) & RTAX_FEATURE_ALLFRAG; + /* Yes, _exactly_. This is paranoia. */ + barrier(); + return ret; +} + static inline int dst_metric_locked(struct dst_entry *dst, int metric) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fce2a87e9e4a..d19cafe5d1e2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { - if (skb->len > dst_pmtu(skb->dst)) + if (skb->len > dst_pmtu(skb->dst) || dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); else return ip6_output2(skb); @@ -848,6 +848,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse inet->cork.fl = *fl; np->cork.hop_limit = hlimit; inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst); + if (dst_allfrag(&rt->u.dst)) + inet->cork.flags |= IPCORK_ALLFRAG; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; @@ -899,7 +901,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse while (length > 0) { /* Check if the remaining data fits into current packet. */ - copy = mtu - skb->len; + copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; @@ -924,7 +926,7 @@ alloc_new_skb: * we know we need more fragment(s). */ datalen = length + fraggap; - if (datalen > mtu - fragheaderlen) + if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; @@ -1158,6 +1160,7 @@ out: if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL; + inet->cork.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); return err; @@ -1185,6 +1188,7 @@ void ip6_flush_pending_frames(struct sock *sk) if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL; + inet->cork.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c696043a8e47..e5bc02ccc0ea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -628,8 +628,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) + if (mtu < IPV6_MIN_MTU) { mtu = IPV6_MIN_MTU; + dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + } dst->metrics[RTAX_MTU-1] = mtu; } } @@ -1164,26 +1166,26 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; - - if (pmtu < IPV6_MIN_MTU) { - if (net_ratelimit()) - printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n", - pmtu); - /* According to RFC1981, the PMTU is set to the IPv6 minimum - link MTU if the node receives a Packet Too Big message - reporting next-hop MTU that is less than the IPv6 minimum MTU. - */ - pmtu = IPV6_MIN_MTU; - } + int allfrag = 0; rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); - if (rt == NULL) return; if (pmtu >= dst_pmtu(&rt->u.dst)) goto out; + if (pmtu < IPV6_MIN_MTU) { + /* + * According to RFC2460, PMTU is set to the IPv6 Minimum Link + * MTU (1280) and a fragment header should always be included + * after a node receiving Too Big message reporting PMTU is + * less than the IPv6 Minimum Link MTU. + */ + pmtu = IPV6_MIN_MTU; + allfrag = 1; + } + /* New mtu received -> path was valid. They are sent only in response to data packets, so that this nexthop apparently is reachable. --ANK @@ -1197,6 +1199,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, */ if (rt->rt6i_flags & RTF_CACHE) { rt->u.dst.metrics[RTAX_MTU-1] = pmtu; + if (allfrag) + rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; @@ -1211,6 +1215,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, nrt = rt6_cow(rt, daddr, saddr); if (!nrt->u.dst.error) { nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; + if (allfrag) + nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; /* According to RFC 1981, detecting PMTU increase shouldn't be happened within 5 mins, the recommended timer is 10 mins. Here this route expiration time is set to ip6_rt_mtu_expires @@ -1232,6 +1238,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; + if (allfrag) + nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; ip6_ins_rt(nrt, NULL, NULL); } -- cgit v1.2.3 From 04db04202b006675c18c356aa69810f57e98a639 Mon Sep 17 00:00:00 2001 From: Hideaki Yoshifuji Date: Fri, 4 Mar 2005 08:39:35 +0900 Subject: [NET] Don't use magic number for sysctl table definition. Signed-off-by: Hideaki YOSHIFUJI --- include/linux/sysctl.h | 4 +++- net/ipv4/devinet.c | 2 +- net/ipv6/addrconf.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 46a9fbd5b6cb..f418afb61d37 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -398,6 +398,7 @@ enum NET_IPV4_CONF_FORCE_IGMP_VERSION=17, NET_IPV4_CONF_ARP_ANNOUNCE=18, NET_IPV4_CONF_ARP_IGNORE=19, + __NET_IPV4_CONF_MAX }; /* /proc/sys/net/ipv4/netfilter */ @@ -476,7 +477,8 @@ enum { NET_IPV6_REGEN_MAX_RETRY=14, NET_IPV6_MAX_DESYNC_FACTOR=15, NET_IPV6_MAX_ADDRESSES=16, - NET_IPV6_FORCE_MLD_VERSION=17 + NET_IPV6_FORCE_MLD_VERSION=17, + __NET_IPV6_MAX }; /* /proc/sys/net/ipv6/icmp */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 082c5cc9570d..29fc6288ef2c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1212,7 +1212,7 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table devinet_vars[20]; + ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; ctl_table devinet_dev[2]; ctl_table devinet_conf_dir[2]; ctl_table devinet_proto_dir[2]; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9d7f26d37688..63c0b700a0cd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3212,7 +3212,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table addrconf_vars[18]; + ctl_table addrconf_vars[__NET_IPV6_MAX]; ctl_table addrconf_dev[2]; ctl_table addrconf_conf_dir[2]; ctl_table addrconf_proto_dir[2]; -- cgit v1.2.3