From c3b2adbfeb9e3bbf898cf824df21ac822a95348a Mon Sep 17 00:00:00 2001
From: Hideaki Yoshifuji <yoshfuji@linux-ipv6.org>
Date: Fri, 18 Jun 2004 21:55:24 -0700
Subject: [NET]: Fix some userland header bustage.

---
 include/linux/netfilter.h                 | 1 +
 include/linux/netfilter_arp/arp_tables.h  | 2 +-
 include/linux/netfilter_ipv4/ip_tables.h  | 3 ++-
 include/linux/netfilter_ipv6/ip6_tables.h | 3 +++
 4 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index b20c79258825..83f9668653ca 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -10,6 +10,7 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #endif
+#include <linux/compiler.h>
 
 /* Responses from hook functions. */
 #define NF_DROP 0
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 78c4f7142267..d6a7188b525c 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -16,7 +16,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #endif
-
+#include <linux/compiler.h>
 #include <linux/netfilter_arp.h>
 
 #define ARPT_FUNCTION_MAXNAMELEN 30
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index f43f1dddebc4..dccc68052c88 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -22,6 +22,7 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #endif
+#include <linux/compiler.h>
 #include <linux/netfilter_ipv4.h>
 
 #define IPT_FUNCTION_MAXNAMELEN 30
@@ -336,8 +337,8 @@ ipt_get_target(struct ipt_entry *e)
 /*
  *	Main firewall chains definitions and global var's definitions.
  */
-static DECLARE_MUTEX(ipt_mutex);
 #ifdef __KERNEL__
+static DECLARE_MUTEX(ipt_mutex);
 
 #include <linux/init.h>
 extern void ipt_init(void) __init;
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index a0995271934a..f9983d16cc1c 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -22,6 +22,7 @@
 #include <linux/ipv6.h>
 #include <linux/skbuff.h>
 #endif
+#include <linux/compiler.h>
 #include <linux/netfilter_ipv6.h>
 
 #define IP6T_FUNCTION_MAXNAMELEN 30
@@ -106,7 +107,9 @@ struct ip6t_counters
 	u_int64_t pcnt, bcnt;			/* Packet and byte counters */
 };
 
+#ifdef __KERNEL__
 static DECLARE_MUTEX(ip6t_mutex);
+#endif
 
 /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */
 #define IP6T_F_PROTO		0x01	/* Set if rule cares about upper 
-- 
cgit v1.2.3


From 56a6f4d0679764b124298f80f113b67db22e6ff4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@toy.kerneljanitors.org>
Date: Sat, 19 Jun 2004 21:03:06 -0300
Subject: [NET] generalise tcp_add_data, skb_split and tcp_copy_to_page

Signed-off-by: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
---
 include/linux/errqueue.h                     |  4 ++
 include/linux/skbuff.h                       | 24 +++++++
 include/net/checksum.h                       | 70 +--------------------
 include/net/ip.h                             |  2 +-
 include/net/ip6_checksum.h                   | 94 ++++++++++++++++++++++++++++
 include/net/sock.h                           | 24 +++++++
 include/net/tcp.h                            |  1 +
 net/core/skbuff.c                            | 76 ++++++++++++++++++++++
 net/ipv4/netfilter/ip_conntrack_core.c       |  1 +
 net/ipv4/netfilter/ip_conntrack_standalone.c |  1 +
 net/ipv4/netfilter/ip_fw_compat_redir.c      |  1 +
 net/ipv4/netfilter/ip_nat_standalone.c       |  1 +
 net/ipv4/netfilter/ipt_MASQUERADE.c          |  1 +
 net/ipv4/tcp.c                               | 49 +--------------
 net/ipv4/tcp_output.c                        | 64 -------------------
 net/ipv6/icmp.c                              |  2 +-
 net/ipv6/mcast.c                             |  2 +-
 net/ipv6/ndisc.c                             |  2 +-
 net/ipv6/raw.c                               |  2 +
 net/ipv6/tcp_ipv6.c                          |  1 +
 net/ipv6/udp.c                               |  2 +-
 21 files changed, 238 insertions(+), 186 deletions(-)
 create mode 100644 include/net/ip6_checksum.h

(limited to 'include/linux')

diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index 0d87e62ec9e7..174582fedb8b 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -22,6 +22,10 @@ struct sock_extended_err
 #ifdef __KERNEL__
 
 #include <linux/config.h>
+#include <net/ip.h>
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#include <linux/ipv6.h>
+#endif
 
 #define SKB_EXT_ERR(skb) ((struct sock_exterr_skb *) ((skb)->cb))
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7bf6501a9024..f777878608ab 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -27,6 +27,7 @@
 #include <linux/highmem.h>
 #include <linux/poll.h>
 #include <linux/net.h>
+#include <net/checksum.h>
 
 #define HAVE_ALLOC_SKB		/* For the drivers to know */
 #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
@@ -971,6 +972,27 @@ static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len)
 	return skb_pad(skb, len-size);
 }
 
+static inline int skb_add_data(struct sk_buff *skb,
+			       char __user *from, int copy)
+{
+	const int off = skb->len;
+
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		int err = 0;
+		unsigned int csum = csum_and_copy_from_user(from,
+							    skb_put(skb, copy),
+							    copy, 0, &err);
+		if (!err) {
+			skb->csum = csum_block_add(skb->csum, csum, off);
+			return 0;
+		}
+	} else if (!copy_from_user(skb_put(skb, copy), from, copy))
+		return 0;
+
+	__skb_trim(skb, off);
+	return -EFAULT;
+}
+
 /**
  *	skb_linearize - convert paged skb to linear one
  *	@skb: buffer to linarize
@@ -1034,6 +1056,8 @@ extern unsigned int    skb_copy_and_csum_bits(const struct sk_buff *skb,
 					      int offset, u8 *to, int len,
 					      unsigned int csum);
 extern void	       skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+extern void	       skb_split(struct sk_buff *skb,
+				 struct sk_buff *skb1, const u32 len);
 
 extern void skb_init(void);
 extern void skb_add_mtu(int mtu);
diff --git a/include/net/checksum.h b/include/net/checksum.h
index cd3c52a594e4..43f40235114e 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -16,83 +16,15 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-/*
- *	Fixes:
- *
- *	Ralf Baechle			:	generic ipv6 checksum
- *	<ralf@waldorf-gmbh.de>
- */
-
 #ifndef _CHECKSUM_H
 #define _CHECKSUM_H
 
+#include <linux/errno.h>
 #include <asm/types.h>
 #include <asm/byteorder.h>
-#include <net/ip.h>
-#include <linux/in6.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
 
-#ifndef _HAVE_ARCH_IPV6_CSUM
-
-static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
-						     struct in6_addr *daddr,
-						     __u16 len,
-						     unsigned short proto,
-						     unsigned int csum) 
-{
-
-	int carry;
-	__u32 ulen;
-	__u32 uproto;
-
-	csum += saddr->s6_addr32[0];
-	carry = (csum < saddr->s6_addr32[0]);
-	csum += carry;
-
-	csum += saddr->s6_addr32[1];
-	carry = (csum < saddr->s6_addr32[1]);
-	csum += carry;
-
-	csum += saddr->s6_addr32[2];
-	carry = (csum < saddr->s6_addr32[2]);
-	csum += carry;
-
-	csum += saddr->s6_addr32[3];
-	carry = (csum < saddr->s6_addr32[3]);
-	csum += carry;
-
-	csum += daddr->s6_addr32[0];
-	carry = (csum < daddr->s6_addr32[0]);
-	csum += carry;
-
-	csum += daddr->s6_addr32[1];
-	carry = (csum < daddr->s6_addr32[1]);
-	csum += carry;
-
-	csum += daddr->s6_addr32[2];
-	carry = (csum < daddr->s6_addr32[2]);
-	csum += carry;
-
-	csum += daddr->s6_addr32[3];
-	carry = (csum < daddr->s6_addr32[3]);
-	csum += carry;
-
-	ulen = htonl((__u32) len);
-	csum += ulen;
-	carry = (csum < ulen);
-	csum += carry;
-
-	uproto = htonl(proto);
-	csum += uproto;
-	carry = (csum < uproto);
-	csum += carry;
-
-	return csum_fold(csum);
-}
-
-#endif
-
 #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 static inline
 unsigned int csum_and_copy_from_user (const char __user *src, char *dst,
diff --git a/include/net/ip.h b/include/net/ip.h
index 5a683ccd4cb0..d36a3b230819 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -37,7 +37,7 @@
 #include <net/snmp.h>
 #endif
 
-#include <net/sock.h>	/* struct sock */
+struct sock;
 
 struct inet_skb_parm
 {
diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
new file mode 100644
index 000000000000..3dfc885bdf25
--- /dev/null
+++ b/include/net/ip6_checksum.h
@@ -0,0 +1,94 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Checksumming functions for IPv6
+ *
+ * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Borrows very liberally from tcp.c and ip.c, see those
+ *		files for more names.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	Fixes:
+ *
+ *	Ralf Baechle			:	generic ipv6 checksum
+ *	<ralf@waldorf-gmbh.de>
+ */
+
+#ifndef _CHECKSUM_IPV6_H
+#define _CHECKSUM_IPV6_H
+
+#include <asm/types.h>
+#include <asm/byteorder.h>
+#include <net/ip.h>
+#include <asm/checksum.h>
+#include <linux/in6.h>
+
+#ifndef _HAVE_ARCH_IPV6_CSUM
+
+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+						     struct in6_addr *daddr,
+						     __u16 len,
+						     unsigned short proto,
+						     unsigned int csum) 
+{
+
+	int carry;
+	__u32 ulen;
+	__u32 uproto;
+
+	csum += saddr->s6_addr32[0];
+	carry = (csum < saddr->s6_addr32[0]);
+	csum += carry;
+
+	csum += saddr->s6_addr32[1];
+	carry = (csum < saddr->s6_addr32[1]);
+	csum += carry;
+
+	csum += saddr->s6_addr32[2];
+	carry = (csum < saddr->s6_addr32[2]);
+	csum += carry;
+
+	csum += saddr->s6_addr32[3];
+	carry = (csum < saddr->s6_addr32[3]);
+	csum += carry;
+
+	csum += daddr->s6_addr32[0];
+	carry = (csum < daddr->s6_addr32[0]);
+	csum += carry;
+
+	csum += daddr->s6_addr32[1];
+	carry = (csum < daddr->s6_addr32[1]);
+	csum += carry;
+
+	csum += daddr->s6_addr32[2];
+	carry = (csum < daddr->s6_addr32[2]);
+	csum += carry;
+
+	csum += daddr->s6_addr32[3];
+	carry = (csum < daddr->s6_addr32[3]);
+	csum += carry;
+
+	ulen = htonl((__u32) len);
+	csum += ulen;
+	carry = (csum < ulen);
+	csum += carry;
+
+	uproto = htonl(proto);
+	csum += uproto;
+	carry = (csum < uproto);
+	csum += carry;
+
+	return csum_fold(csum);
+}
+
+#endif
+#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 38b90c8ab25f..5624b084742f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -53,6 +53,7 @@
 
 #include <asm/atomic.h>
 #include <net/dst.h>
+#include <net/checksum.h>
 
 /*
  * This structure really needs to be cleaned up.
@@ -923,6 +924,29 @@ static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
 	sk->sk_forward_alloc -= skb->truesize;
 }
 
+static inline int skb_copy_to_page(struct sock *sk, char __user *from,
+				   struct sk_buff *skb, struct page *page,
+				   int off, int copy)
+{
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		int err = 0;
+		unsigned int csum = csum_and_copy_from_user(from,
+						     page_address(page) + off,
+							    copy, 0, &err);
+		if (err)
+			return err;
+		skb->csum = csum_block_add(skb->csum, csum, skb->len);
+	} else if (copy_from_user(page_address(page) + off, from, copy))
+		return -EFAULT;
+
+	skb->len	     += copy;
+	skb->data_len	     += copy;
+	skb->truesize	     += copy;
+	sk->sk_wmem_queued   += copy;
+	sk->sk_forward_alloc -= copy;
+	return 0;
+}
+
 /*
  * 	Queue a received datagram if it will fit. Stream and sequenced
  *	protocols can't normally use this as they need to fit buffers in
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 52f27edef69e..3a323cd1e79f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -33,6 +33,7 @@
 #include <net/checksum.h>
 #include <net/sock.h>
 #include <net/snmp.h>
+#include <net/ip.h>
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 #include <linux/ipv6.h>
 #endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7d72cdb49a57..f191cae7462e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1263,6 +1263,81 @@ void skb_add_mtu(int mtu)
 }
 #endif
 
+static void inline skb_split_inside_header(struct sk_buff *skb,
+					   struct sk_buff* skb1,
+					   const u32 len, const int pos)
+{
+	int i;
+
+	memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
+
+	/* And move data appendix as is. */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
+
+	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+	skb_shinfo(skb)->nr_frags  = 0;
+	skb1->data_len		   = skb->data_len;
+	skb1->len		   += skb1->data_len;
+	skb->data_len		   = 0;
+	skb->len		   = len;
+	skb->tail		   = skb->data + len;
+}
+
+static void inline skb_split_no_header(struct sk_buff *skb,
+				       struct sk_buff* skb1,
+				       const u32 len, int pos)
+{
+	int i, k = 0;
+	const int nfrags = skb_shinfo(skb)->nr_frags;
+
+	skb_shinfo(skb)->nr_frags = 0;
+	skb1->len		  = skb1->data_len = skb->len - len;
+	skb->len		  = len;
+	skb->data_len		  = len - pos;
+
+	for (i = 0; i < nfrags; i++) {
+		int size = skb_shinfo(skb)->frags[i].size;
+
+		if (pos + size > len) {
+			skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
+
+			if (pos < len) {
+				/* Split frag.
+				 * We have to variants in this case:
+				 * 1. Move all the frag to the second
+				 *    part, if it is possible. F.e.
+				 *    this approach is mandatory for TUX,
+				 *    where splitting is expensive.
+				 * 2. Split is accurately. We make this.
+				 */
+				get_page(skb_shinfo(skb)->frags[i].page);
+				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+				skb_shinfo(skb1)->frags[0].size -= len - pos;
+				skb_shinfo(skb)->frags[i].size	= len - pos;
+				skb_shinfo(skb)->nr_frags++;
+			}
+			k++;
+		} else
+			skb_shinfo(skb)->nr_frags++;
+		pos += size;
+	}
+	skb_shinfo(skb1)->nr_frags = k;
+}
+
+/**
+ * skb_split - Split fragmented skb to two parts at length len.
+ */
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
+{
+	int pos = skb_headlen(skb);
+
+	if (len < pos)	/* Split line is inside header. */
+		skb_split_inside_header(skb, skb1, len, pos);
+	else		/* Second chunk has no header, nothing to copy. */
+		skb_split_no_header(skb, skb1, len, pos);
+}
+
 void __init skb_init(void)
 {
 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1300,3 +1375,4 @@ EXPORT_SYMBOL(skb_queue_head);
 EXPORT_SYMBOL(skb_queue_tail);
 EXPORT_SYMBOL(skb_unlink);
 EXPORT_SYMBOL(skb_append);
+EXPORT_SYMBOL(skb_split);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 50a467898671..47686afd71a6 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -28,6 +28,7 @@
 #include <linux/proc_fs.h>
 #include <linux/vmalloc.h>
 #include <net/checksum.h>
+#include <net/ip.h>
 #include <linux/stddef.h>
 #include <linux/sysctl.h>
 #include <linux/slab.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 4d378394aac4..fbe26d1ca0ad 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -24,6 +24,7 @@
 #include <linux/sysctl.h>
 #endif
 #include <net/checksum.h>
+#include <net/ip.h>
 
 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
diff --git a/net/ipv4/netfilter/ip_fw_compat_redir.c b/net/ipv4/netfilter/ip_fw_compat_redir.c
index 6a24a5cb17e0..7f68c1ed5a87 100644
--- a/net/ipv4/netfilter/ip_fw_compat_redir.c
+++ b/net/ipv4/netfilter/ip_fw_compat_redir.c
@@ -22,6 +22,7 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <net/checksum.h>
+#include <net/ip.h>
 #include <linux/timer.h>
 #include <linux/netdevice.h>
 #include <linux/if.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index a84453f899fe..9765fd2d5cf1 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
+#include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/spinlock.h>
 
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index fb499905f05d..54bc4684cc9d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/netfilter.h>
 #include <net/protocol.h>
+#include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d43b43938972..0b29f833d29b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -948,53 +948,6 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
 #define TCP_PAGE(sk)	(inet_sk(sk)->sndmsg_page)
 #define TCP_OFF(sk)	(inet_sk(sk)->sndmsg_off)
 
-static inline int tcp_copy_to_page(struct sock *sk, char __user *from,
-				   struct sk_buff *skb, struct page *page,
-				   int off, int copy)
-{
-	int err = 0;
-	unsigned int csum;
-
-	if (skb->ip_summed == CHECKSUM_NONE) {
-		csum = csum_and_copy_from_user(from, page_address(page) + off,
-				       copy, 0, &err);
-		if (err) return err;
-		skb->csum = csum_block_add(skb->csum, csum, skb->len);
-	} else {
-		if (copy_from_user(page_address(page) + off, from, copy))
-			return -EFAULT;
-	}
-
-	skb->len += copy;
-	skb->data_len += copy;
-	skb->truesize += copy;
-	sk->sk_wmem_queued += copy;
-	sk->sk_forward_alloc -= copy;
-	return 0;
-}
-
-static inline int skb_add_data(struct sk_buff *skb, char __user *from, int copy)
-{
-	int err = 0;
-	unsigned int csum;
-	int off = skb->len;
-
-	if (skb->ip_summed == CHECKSUM_NONE) {
-		csum = csum_and_copy_from_user(from, skb_put(skb, copy),
-				       copy, 0, &err);
-		if (!err) {
-			skb->csum = csum_block_add(skb->csum, csum, off);
-			return 0;
-		}
-	} else {
-		if (!copy_from_user(skb_put(skb, copy), from, copy))
-			return 0;
-	}
-
-	__skb_trim(skb, off);
-	return -EFAULT;
-}
-
 static inline int select_size(struct sock *sk, struct tcp_opt *tp)
 {
 	int tmp = tp->mss_cache_std;
@@ -1138,7 +1091,7 @@ new_segment:
 
 				/* Time to copy data. We are close to
 				 * the end! */
-				err = tcp_copy_to_page(sk, from, skb, page,
+				err = skb_copy_to_page(sk, from, skb, page,
 						       off, copy);
 				if (err) {
 					/* If this page was new, give it to the
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ac6c55259e1a..ad0c2ac89a1a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -354,70 +354,6 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss)
 	}
 }
 
-/* Split fragmented skb to two parts at length len. */
-
-static void skb_split(struct sk_buff *skb, struct sk_buff *skb1, u32 len)
-{
-	int i;
-	int pos = skb_headlen(skb);
-
-	if (len < pos) {
-		/* Split line is inside header. */
-		memcpy(skb_put(skb1, pos-len), skb->data + len, pos-len);
-
-		/* And move data appendix as is. */
-		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-			skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
-
-		skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
-		skb_shinfo(skb)->nr_frags = 0;
-
-		skb1->data_len = skb->data_len;
-		skb1->len += skb1->data_len;
-		skb->data_len = 0;
-		skb->len = len;
-		skb->tail = skb->data+len;
-	} else {
-		int k = 0;
-		int nfrags = skb_shinfo(skb)->nr_frags;
-
-		/* Second chunk has no header, nothing to copy. */
-
-		skb_shinfo(skb)->nr_frags = 0;
-		skb1->len = skb1->data_len = skb->len - len;
-		skb->len = len;
-		skb->data_len = len - pos;
-
-		for (i=0; i<nfrags; i++) {
-			int size = skb_shinfo(skb)->frags[i].size;
-			if (pos + size > len) {
-				skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
-
-				if (pos < len) {
-					/* Split frag.
-					 * We have to variants in this case:
-					 * 1. Move all the frag to the second
-					 *    part, if it is possible. F.e.
-					 *    this approach is mandatory for TUX,
-					 *    where splitting is expensive.
-					 * 2. Split is accurately. We make this.
-					 */
-					get_page(skb_shinfo(skb)->frags[i].page);
-					skb_shinfo(skb1)->frags[0].page_offset += (len-pos);
-					skb_shinfo(skb1)->frags[0].size -= (len-pos);
-					skb_shinfo(skb)->frags[i].size = len-pos;
-					skb_shinfo(skb)->nr_frags++;
-				}
-				k++;
-			} else {
-				skb_shinfo(skb)->nr_frags++;
-			}
-			pos += size;
-		}
-		skb_shinfo(skb1)->nr_frags = k;
-	}
-}
-
 /* Function to create two new TCP segments.  Shrinks the given segment
  * to the specified size and appends a new segment with the rest of the
  * packet to the list.  This won't be called frequently, I hope. 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index cfecd15d9b5c..6bc7ebcc32fd 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -55,7 +55,7 @@
 #include <net/sock.h>
 
 #include <net/ipv6.h>
-#include <net/checksum.h>
+#include <net/ip6_checksum.h>
 #include <net/protocol.h>
 #include <net/raw.h>
 #include <net/rawv6.h>
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b351145770d2..0bd071691ba4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -60,7 +60,7 @@
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
 
-#include <net/checksum.h>
+#include <net/ip6_checksum.h>
 
 /* Set to 3 to get tracing... */
 #define MCAST_DEBUG 2
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index aa6b74d9b08a..6330513fffaf 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -77,7 +77,7 @@
 #include <net/icmp.h>
 
 #include <net/flow.h>
-#include <net/checksum.h>
+#include <net/ip6_checksum.h>
 #include <linux/proc_fs.h>
 
 #include <linux/netfilter.h>
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 3d2961c48955..ebd24bb8f7e2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -35,6 +35,7 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
+#include <net/ip.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 
@@ -42,6 +43,7 @@
 #include <net/ndisc.h>
 #include <net/protocol.h>
 #include <net/ip6_route.h>
+#include <net/ip6_checksum.h>
 #include <net/addrconf.h>
 #include <net/transp_v6.h>
 #include <net/udp.h>
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f410d8c40d78..c41ddd55f958 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -51,6 +51,7 @@
 #include <net/transp_v6.h>
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
+#include <net/ip6_checksum.h>
 #include <net/inet_ecn.h>
 #include <net/protocol.h>
 #include <net/xfrm.h>
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 63ef59317f78..3ddbfa88ca2a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -51,7 +51,7 @@
 #include <net/udp.h>
 #include <net/inet_common.h>
 
-#include <net/checksum.h>
+#include <net/ip6_checksum.h>
 #include <net/xfrm.h>
 
 #include <linux/proc_fs.h>
-- 
cgit v1.2.3


From f51dc7a2a47aff78a05d65b6affccebd35fb68de Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sat, 19 Jun 2004 20:38:35 -0700
Subject: [PATCH] affs remount fixes

AFFS: Fix oops on write after remount (from Roman Zippel):
  - Allocate/free bitmap as necessary
  - Remove last uses of SF_READONLY

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/affs/amigaffs.c         |  1 -
 fs/affs/bitmap.c           | 29 +++++++++++++++++++++--------
 fs/affs/super.c            | 26 ++++++++++++--------------
 include/linux/affs_fs.h    |  3 ++-
 include/linux/affs_fs_sb.h |  1 -
 5 files changed, 35 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index b5d1aabf1c05..ddd53ec12a92 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -458,7 +458,6 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...)
 	if (!(sb->s_flags & MS_RDONLY))
 		printk(KERN_WARNING "AFFS: Remounting filesystem read-only\n");
 	sb->s_flags |= MS_RDONLY;
-	AFFS_SB(sb)->s_flags |= SF_READONLY;	/* Don't allow to remount rw */
 }
 
 void
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index 095ef77a407e..93d32d18e97f 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -272,8 +272,7 @@ err_full:
 	return 0;
 }
 
-int
-affs_init_bitmap(struct super_block *sb)
+int affs_init_bitmap(struct super_block *sb, int *flags)
 {
 	struct affs_bm_info *bm;
 	struct buffer_head *bmap_bh = NULL, *bh = NULL;
@@ -282,13 +281,13 @@ affs_init_bitmap(struct super_block *sb)
 	int i, res = 0;
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 
-	if (sb->s_flags & MS_RDONLY)
+	if (*flags & MS_RDONLY)
 		return 0;
 
 	if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) {
 		printk(KERN_NOTICE "AFFS: Bitmap invalid - mounting %s read only\n",
 			sb->s_id);
-		sb->s_flags |= MS_RDONLY;
+		*flags |= MS_RDONLY;
 		return 0;
 	}
 
@@ -301,7 +300,7 @@ affs_init_bitmap(struct super_block *sb)
 	bm = sbi->s_bitmap = kmalloc(size, GFP_KERNEL);
 	if (!sbi->s_bitmap) {
 		printk(KERN_ERR "AFFS: Bitmap allocation failed\n");
-		return 1;
+		return -ENOMEM;
 	}
 	memset(sbi->s_bitmap, 0, size);
 
@@ -316,13 +315,13 @@ affs_init_bitmap(struct super_block *sb)
 		bh = affs_bread(sb, bm->bm_key);
 		if (!bh) {
 			printk(KERN_ERR "AFFS: Cannot read bitmap\n");
-			res = 1;
+			res = -EIO;
 			goto out;
 		}
 		if (affs_checksum_block(sb, bh)) {
 			printk(KERN_WARNING "AFFS: Bitmap %u invalid - mounting %s read only.\n",
 			       bm->bm_key, sb->s_id);
-			sb->s_flags |= MS_RDONLY;
+			*flags |= MS_RDONLY;
 			goto out;
 		}
 		pr_debug("AFFS: read bitmap block %d: %d\n", blk, bm->bm_key);
@@ -338,7 +337,7 @@ affs_init_bitmap(struct super_block *sb)
 		bmap_bh = affs_bread(sb, be32_to_cpu(bmap_blk[blk]));
 		if (!bmap_bh) {
 			printk(KERN_ERR "AFFS: Cannot read bitmap extension\n");
-			res = 1;
+			res = -EIO;
 			goto out;
 		}
 		bmap_blk = (u32 *)bmap_bh->b_data;
@@ -383,3 +382,17 @@ out:
 	affs_brelse(bmap_bh);
 	return res;
 }
+
+void affs_free_bitmap(struct super_block *sb)
+{
+	struct affs_sb_info *sbi = AFFS_SB(sb);
+
+	if (!sbi->s_bitmap)
+		return;
+
+	affs_brelse(sbi->s_bmap_bh);
+	sbi->s_bmap_bh = NULL;
+	sbi->s_last_bmap = ~0;
+	kfree(sbi->s_bitmap);
+	sbi->s_bitmap = NULL;
+}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index ad83ec3a9a45..f4ebbd27ef1b 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -51,10 +51,9 @@ affs_put_super(struct super_block *sb)
 		mark_buffer_dirty(sbi->s_root_bh);
 	}
 
-	affs_brelse(sbi->s_bmap_bh);
 	if (sbi->s_prefix)
 		kfree(sbi->s_prefix);
-	kfree(sbi->s_bitmap);
+	affs_free_bitmap(sb);
 	affs_brelse(sbi->s_root_bh);
 	kfree(sbi);
 	sb->s_fs_info = NULL;
@@ -288,6 +287,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
 	gid_t			 gid;
 	int			 reserved;
 	unsigned long		 mount_flags;
+	int			 tmp_flags;	/* fix remount prototype... */
 
 	pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options");
 
@@ -399,7 +399,6 @@ got_root:
 		printk(KERN_NOTICE "AFFS: Dircache FS - mounting %s read only\n",
 			sb->s_id);
 		sb->s_flags |= MS_RDONLY;
-		sbi->s_flags |= SF_READONLY;
 	}
 	switch (chksum) {
 		case MUFS_FS:
@@ -455,8 +454,10 @@ got_root:
 	sbi->s_root_bh = root_bh;
 	/* N.B. after this point s_root_bh must be released */
 
-	if (affs_init_bitmap(sb))
+	tmp_flags = sb->s_flags;
+	if (affs_init_bitmap(sb, &tmp_flags))
 		goto out_error;
+	sb->s_flags = tmp_flags;
 
 	/* set up enough so that it can read an inode */
 
@@ -498,7 +499,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 	int			 reserved;
 	int			 root_block;
 	unsigned long		 mount_flags;
-	unsigned long		 read_only = sbi->s_flags & SF_READONLY;
+	int			 res = 0;
 
 	pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
 
@@ -507,7 +508,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 	if (!parse_options(data,&uid,&gid,&mode,&reserved,&root_block,
 	    &blocksize,&sbi->s_prefix,sbi->s_volume,&mount_flags))
 		return -EINVAL;
-	sbi->s_flags = mount_flags | read_only;
+	sbi->s_flags = mount_flags;
 	sbi->s_mode  = mode;
 	sbi->s_uid   = uid;
 	sbi->s_gid   = gid;
@@ -518,14 +519,11 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		sb->s_dirt = 1;
 		while (sb->s_dirt)
 			affs_write_super(sb);
-		sb->s_flags |= MS_RDONLY;
-	} else if (!(sbi->s_flags & SF_READONLY)) {
-		sb->s_flags &= ~MS_RDONLY;
-	} else {
-		affs_warning(sb,"remount","Cannot remount fs read/write because of errors");
-		return -EINVAL;
-	}
-	return 0;
+		affs_free_bitmap(sb);
+	} else
+		res = affs_init_bitmap(sb, flags);
+
+	return res;
 }
 
 static int
diff --git a/include/linux/affs_fs.h b/include/linux/affs_fs.h
index c849309b1131..5ba9d6205dc0 100644
--- a/include/linux/affs_fs.h
+++ b/include/linux/affs_fs.h
@@ -36,7 +36,8 @@ extern u32	affs_count_free_bits(u32 blocksize, const void *data);
 extern u32	affs_count_free_blocks(struct super_block *s);
 extern void	affs_free_block(struct super_block *sb, u32 block);
 extern u32	affs_alloc_block(struct inode *inode, u32 goal);
-extern int	affs_init_bitmap(struct super_block *sb);
+extern int	affs_init_bitmap(struct super_block *sb, int *flags);
+extern void	affs_free_bitmap(struct super_block *sb);
 
 /* namei.c */
 
diff --git a/include/linux/affs_fs_sb.h b/include/linux/affs_fs_sb.h
index d2f86715cf1c..d722befe1ced 100644
--- a/include/linux/affs_fs_sb.h
+++ b/include/linux/affs_fs_sb.h
@@ -47,7 +47,6 @@ struct affs_sb_info {
 #define SF_OFS		0x0200		/* Old filesystem */
 #define SF_PREFIX	0x0400		/* Buffer for prefix is allocated */
 #define SF_VERBOSE	0x0800		/* Talk about fs when mounting */
-#define SF_READONLY	0x1000		/* Don't allow to remount rw */
 
 /* short cut to get to the affs specific sb data */
 static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
-- 
cgit v1.2.3


From 86adf644850fbbc3da6448e8e1a0e39fe755e8e9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
Date: Sun, 20 Jun 2004 13:45:03 -0300
Subject: [NET] move skb_can_coalesce to skbuff.h

This one also removes the duplicate can_coalesce in tcp.c and makes it
use skb_can_coalesce.

Signed-off-by: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
---
 include/linux/skbuff.h | 12 ++++++++++++
 net/ipv4/ip_output.c   | 11 -----------
 net/ipv4/tcp.c         | 15 ++-------------
 3 files changed, 14 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f777878608ab..cc44268a6e83 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -993,6 +993,18 @@ static inline int skb_add_data(struct sk_buff *skb,
 	return -EFAULT;
 }
 
+static inline int skb_can_coalesce(struct sk_buff *skb, int i,
+				   struct page *page, int off)
+{
+	if (i) {
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+
+		return page == frag->page &&
+		       off == frag->page_offset + frag->size;
+	}
+	return 0;
+}
+
 /**
  *	skb_linearize - convert paged skb to linear one
  *	@skb: buffer to linarize
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 60fc5091c0ef..34a4a3feccb7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -702,17 +702,6 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 	return 0;
 }
 
-static inline int
-skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
-{
-	if (i) {
-		skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
-		return page == frag->page &&
-			off == frag->page_offset+frag->size;
-	}
-	return 0;
-}
-
 static inline unsigned int
 csum_page(struct page *page, int offset, int copy)
 {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0b29f833d29b..23532b96e85b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -737,17 +737,6 @@ do_interrupted:
 	goto out;
 }
 
-static inline int can_coalesce(struct sk_buff *skb, int i, struct page *page,
-			       int off)
-{
-	if (i) {
-		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
-		return page == frag->page &&
-		       off == frag->page_offset + frag->size;
-	}
-	return 0;
-}
-
 static inline void fill_page_desc(struct sk_buff *skb, int i,
 				  struct page *page, int off, int size)
 {
@@ -865,7 +854,7 @@ new_segment:
 			copy = size;
 
 		i = skb_shinfo(skb)->nr_frags;
-		if (can_coalesce(skb, i, page, offset)) {
+		if (skb_can_coalesce(skb, i, page, offset)) {
 			skb_shinfo(skb)->frags[i - 1].size += copy;
 		} else if (i < MAX_SKB_FRAGS) {
 			get_page(page);
@@ -1053,7 +1042,7 @@ new_segment:
 				struct page *page = TCP_PAGE(sk);
 				int off = TCP_OFF(sk);
 
-				if (can_coalesce(skb, i, page, off) &&
+				if (skb_can_coalesce(skb, i, page, off) &&
 				    off != PAGE_SIZE) {
 					/* We can extend the last page
 					 * fragment. */
-- 
cgit v1.2.3


From 567f48727b6cbd928c1d8531717ea981da164730 Mon Sep 17 00:00:00 2001
From: Arthur Kepner <akepner@sgi.com>
Date: Sun, 20 Jun 2004 03:36:21 -0700
Subject: [NET]: Lockless loopback patch (version 2).

---
 drivers/net/loopback.c    | 41 ++++++++++++++++++++++++++++-------
 include/linux/netdevice.h |  1 +
 include/net/pkt_sched.h   |  2 ++
 net/core/dev.c            | 55 ++++++++++++++++++++++++++++++++++-------------
 net/sched/sch_api.c       |  3 +++
 net/sched/sch_generic.c   | 43 ++++++++++++++++++++++++------------
 6 files changed, 108 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 1885bfe3c959..53dc879651e4 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -56,6 +56,7 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 
+static struct net_device_stats *loopback_stats;
 
 #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
 
@@ -123,7 +124,6 @@ static void emulate_large_send_offload(struct sk_buff *skb)
  */
 static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device_stats *stats = dev->priv;
 
 	skb_orphan(skb);
 
@@ -142,11 +142,12 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	dev->last_rx = jiffies;
-	if (likely(stats)) {
-		stats->rx_bytes+=skb->len;
-		stats->tx_bytes+=skb->len;
-		stats->rx_packets++;
-		stats->tx_packets++;
+	if (likely(loopback_stats)) {
+		get_cpu_ptr(loopback_stats)->rx_bytes += skb->len;
+		get_cpu_ptr(loopback_stats)->tx_bytes += skb->len;
+		get_cpu_ptr(loopback_stats)->rx_packets++;
+		get_cpu_ptr(loopback_stats)->tx_packets++;
+		put_cpu_ptr(loopback_stats);
 	}
 
 	netif_rx(skb);
@@ -156,7 +157,28 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static struct net_device_stats *get_stats(struct net_device *dev)
 {
-	return (struct net_device_stats *)dev->priv;
+	struct net_device_stats *stats = dev->priv;
+	int i;
+
+	if (!stats) {
+		return NULL;
+	}
+
+	memset(stats, 0, sizeof(struct net_device_stats));
+	if (!loopback_stats) {
+		return stats;
+	}
+
+	for (i=0; i < NR_CPUS; i++) {
+		if (!cpu_possible(i)) 
+			continue;
+		stats->rx_bytes   += per_cpu_ptr(loopback_stats, i)->rx_bytes;
+		stats->tx_bytes   += per_cpu_ptr(loopback_stats, i)->tx_bytes;
+		stats->rx_packets += per_cpu_ptr(loopback_stats, i)->rx_packets;
+		stats->tx_packets += per_cpu_ptr(loopback_stats, i)->tx_packets;
+	}
+				
+	return stats;
 }
 
 struct net_device loopback_dev = {
@@ -173,7 +195,8 @@ struct net_device loopback_dev = {
 	.rebuild_header		= eth_rebuild_header,
 	.flags			= IFF_LOOPBACK,
 	.features 		= NETIF_F_SG|NETIF_F_FRAGLIST
-				  |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA,
+				  |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
+				  |NETIF_F_LLTX,
 };
 
 /* Setup and register the of the LOOPBACK device. */
@@ -188,6 +211,8 @@ int __init loopback_init(void)
 		loopback_dev.priv = stats;
 		loopback_dev.get_stats = &get_stats;
 	}
+
+	loopback_stats = alloc_percpu(struct net_device_stats);
 	
 	return register_netdev(&loopback_dev);
 };
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a55f97a34035..97758cd8f50e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -405,6 +405,7 @@ struct net_device
 #define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
 #define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
 #define NETIF_F_TSO		2048	/* Can offload TCP/IP segmentation */
+#define NETIF_F_LLTX		4096	/* LockLess TX */
 
 	/* Called after device is detached from network. */
 	void			(*uninit)(struct net_device *dev);
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 80661d855fd8..d8960a01df42 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -11,6 +11,7 @@
 #include <linux/netdevice.h>
 #include <linux/types.h>
 #include <linux/pkt_sched.h>
+#include <linux/rcupdate.h>
 #include <net/pkt_cls.h>
 
 #ifdef CONFIG_X86_TSC
@@ -92,6 +93,7 @@ struct Qdisc
 	struct net_device	*dev;
 
 	struct tc_stats		stats;
+	struct rcu_head 	q_rcu;
 	int			(*reshape_fail)(struct sk_buff *skb, struct Qdisc *q);
 
 	/* This field is deprecated, but it is still used by CBQ
diff --git a/net/core/dev.c b/net/core/dev.c
index 27cbb530b95b..9729210ca4cb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -107,6 +107,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/netpoll.h>
+#include <linux/rcupdate.h>
 #ifdef CONFIG_NET_RADIO
 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
 #include <net/iw_handler.h>
@@ -1305,6 +1306,20 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
 	return 0;
 }
 
+#define HARD_TX_LOCK_BH(dev, cpu) {			\
+	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+		spin_lock_bh(&dev->xmit_lock);		\
+		dev->xmit_lock_owner = cpu;		\
+	}						\
+}
+
+#define HARD_TX_UNLOCK_BH(dev) {			\
+	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+		dev->xmit_lock_owner = -1;		\
+		spin_unlock_bh(&dev->xmit_lock);	\
+	}						\
+}
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -1348,18 +1363,35 @@ int dev_queue_xmit(struct sk_buff *skb)
 	      	if (skb_checksum_help(&skb, 0))
 	      		goto out_kfree_skb;
 
-	/* Grab device queue */
-	spin_lock_bh(&dev->queue_lock);
+	rcu_read_lock();
+	/* Updates of qdisc are serialized by queue_lock. 
+	 * The struct Qdisc which is pointed to by qdisc is now a 
+	 * rcu structure - it may be accessed without acquiring 
+	 * a lock (but the structure may be stale.) The freeing of the
+	 * qdisc will be deferred until it's known that there are no 
+	 * more references to it.
+	 * 
+	 * If the qdisc has an enqueue function, we still need to 
+	 * hold the queue_lock before calling it, since queue_lock
+	 * also serializes access to the device queue.
+	 */
+
 	q = dev->qdisc;
+	smp_read_barrier_depends();
 	if (q->enqueue) {
+		/* Grab device queue */
+		spin_lock_bh(&dev->queue_lock);
+
 		rc = q->enqueue(skb, q);
 
 		qdisc_run(dev);
 
 		spin_unlock_bh(&dev->queue_lock);
+		rcu_read_unlock();
 		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
 		goto out;
 	}
+	rcu_read_unlock();
 
 	/* The device has no queue. Common case for software devices:
 	   loopback, all the sorts of tunnels...
@@ -1374,17 +1406,12 @@ int dev_queue_xmit(struct sk_buff *skb)
 	   Either shot noqueue qdisc, it is even simpler 8)
 	 */
 	if (dev->flags & IFF_UP) {
+		preempt_disable();
 		int cpu = smp_processor_id();
 
 		if (dev->xmit_lock_owner != cpu) {
-			/*
-			 * The spin_lock effectivly does a preempt lock, but 
-			 * we are about to drop that...
-			 */
-			preempt_disable();
-			spin_unlock(&dev->queue_lock);
-			spin_lock(&dev->xmit_lock);
-			dev->xmit_lock_owner = cpu;
+
+			HARD_TX_LOCK_BH(dev, cpu);
 			preempt_enable();
 
 			if (!netif_queue_stopped(dev)) {
@@ -1393,18 +1420,17 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 				rc = 0;
 				if (!dev->hard_start_xmit(skb, dev)) {
-					dev->xmit_lock_owner = -1;
-					spin_unlock_bh(&dev->xmit_lock);
+					HARD_TX_UNLOCK_BH(dev);
 					goto out;
 				}
 			}
-			dev->xmit_lock_owner = -1;
-			spin_unlock_bh(&dev->xmit_lock);
+			HARD_TX_UNLOCK_BH(dev);
 			if (net_ratelimit())
 				printk(KERN_CRIT "Virtual device %s asks to "
 				       "queue packet!\n", dev->name);
 			goto out_enetdown;
 		} else {
+			preempt_enable();
 			/* Recursion is detected! It is possible,
 			 * unfortunately */
 			if (net_ratelimit())
@@ -1412,7 +1438,6 @@ int dev_queue_xmit(struct sk_buff *skb)
 				       "%s, fix it urgently!\n", dev->name);
 		}
 	}
-	spin_unlock_bh(&dev->queue_lock);
 out_enetdown:
 	rc = -ENETDOWN;
 out_kfree_skb:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 432531dca1c8..85ed105d1a12 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -450,6 +450,9 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 	if (!try_module_get(ops->owner))
 		goto err_out;
 
+	/* enqueue is accessed locklessly - make sure it's visible
+	 * before we set a netdevice's qdisc pointer to sch */
+	smp_wmb();
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
 		write_lock(&qdisc_tree_lock);
 		sch->next = dev->qdisc_list;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 97b56255c7be..111dad476d2b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -30,6 +30,7 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
+#include <linux/rcupdate.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
@@ -387,6 +388,9 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
 	sch->dev = dev;
 	sch->stats.lock = &dev->queue_lock;
 	atomic_set(&sch->refcnt, 1);
+	/* enqueue is accessed locklessly - make sure it's visible
+	 * before we set a netdevice's qdisc pointer to sch */
+	smp_wmb();
 	if (!ops->init || ops->init(sch, NULL) == 0)
 		return sch;
 
@@ -404,18 +408,36 @@ void qdisc_reset(struct Qdisc *qdisc)
 		ops->reset(qdisc);
 }
 
+/* this is the rcu callback function to clean up a qdisc when there 
+ * are no further references to it */
+
+static void __qdisc_destroy (void * arg) 
+{
+	struct Qdisc    *qdisc = (struct Qdisc *) arg;
+	struct Qdisc_ops  *ops = qdisc->ops;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	qdisc_kill_estimator(&qdisc->stats);
+#endif
+	if (ops->reset)
+		ops->reset(qdisc);
+	if (ops->destroy)
+		ops->destroy(qdisc);
+	module_put(ops->owner);
+
+	if (!(qdisc->flags&TCQ_F_BUILTIN))
+		kfree(qdisc);
+}
+
 /* Under dev->queue_lock and BH! */
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
-	struct Qdisc_ops *ops = qdisc->ops;
-	struct net_device *dev;
+	struct net_device *dev = qdisc->dev;
 
 	if (!atomic_dec_and_test(&qdisc->refcnt))
 		return;
 
-	dev = qdisc->dev;
-
 	if (dev) {
 		struct Qdisc *q, **qp;
 		for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) {
@@ -425,16 +447,9 @@ void qdisc_destroy(struct Qdisc *qdisc)
 			}
 		}
 	}
-#ifdef CONFIG_NET_ESTIMATOR
-	qdisc_kill_estimator(&qdisc->stats);
-#endif
-	if (ops->reset)
-		ops->reset(qdisc);
-	if (ops->destroy)
-		ops->destroy(qdisc);
-	module_put(ops->owner);
-	if (!(qdisc->flags&TCQ_F_BUILTIN))
-		kfree(qdisc);
+
+	call_rcu(&qdisc->q_rcu, __qdisc_destroy, qdisc);
+
 }
 
 
-- 
cgit v1.2.3


From efd3d374a20b019c0c89b2050123f9bcfcdeb794 Mon Sep 17 00:00:00 2001
From: Andrey Panin <pazke@donpac.ru>
Date: Sun, 20 Jun 2004 04:52:25 -0700
Subject: [PATCH] export DMI check functions

This patch creates and exports 2 functions which can be used by the rest of
kernel code to perform DMI data checks:

- dmi_check_system() function checks system DMI data against given blacklist
  table and on each match runs corresponding callback function;

- dmi_get_system_info() function returns DMI data value.  Useful for people
  wanting more complex DMI data check than simple string match.

Also filling unused match entries with NO_MATCH made optional, but existing
NO_MATCH occurences are left intact, so people are free to continue dmi_scan.c
patching without massive reject problems.


Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/dmi_scan.c | 111 ++++++++++++++++++++++----------------------
 include/linux/dmi.h         |  47 +++++++++++++++++++
 2 files changed, 102 insertions(+), 56 deletions(-)
 create mode 100644 include/linux/dmi.h

(limited to 'include/linux')

diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index aafc78898f93..d3909ea20c5a 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -10,6 +10,7 @@
 #include <asm/io.h>
 #include <linux/pm.h>
 #include <asm/system.h>
+#include <linux/dmi.h>
 #include <linux/bootmem.h>
 
 unsigned long dmi_broken;
@@ -139,21 +140,6 @@ static int __init dmi_iterate(void (*decode)(struct dmi_header *))
 	return -1;
 }
 
-
-enum
-{
-	DMI_BIOS_VENDOR,
-	DMI_BIOS_VERSION,
-	DMI_BIOS_DATE,
-	DMI_SYS_VENDOR,
-	DMI_PRODUCT_NAME,
-	DMI_PRODUCT_VERSION,
-	DMI_BOARD_VENDOR,
-	DMI_BOARD_NAME,
-	DMI_BOARD_VERSION,
-	DMI_STRING_MAX
-};
-
 static char *dmi_ident[DMI_STRING_MAX];
 
 /*
@@ -176,26 +162,11 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
 }
 
 /*
- *	DMI callbacks for problem boards
+ * Ugly compatibility crap.
  */
-
-struct dmi_strmatch
-{
-	u8 slot;
-	char *substr;
-};
-
-#define NONE	255
-
-struct dmi_blacklist
-{
-	int (*callback)(struct dmi_blacklist *);
-	char *ident;
-	struct dmi_strmatch matches[4];
-};
-
-#define NO_MATCH	{ NONE, NULL}
-#define MATCH(a,b)	{ a, b }
+#define dmi_blacklist	dmi_system_id
+#define NO_MATCH	{ DMI_NONE, NULL}
+#define MATCH		DMI_MATCH
 
 /* 
  * Reboot options and system auto-detection code provided by
@@ -1054,9 +1025,6 @@ static __initdata struct dmi_blacklist dmi_blacklist[]={
 
 static __init void dmi_check_blacklist(void)
 {
-	struct dmi_blacklist *d;
-	int i;
-		
 #ifdef	CONFIG_ACPI_BOOT
 #define	ACPI_BLACKLIST_CUTOFF_YEAR	2001
 
@@ -1078,25 +1046,7 @@ static __init void dmi_check_blacklist(void)
 		}
 	}
 #endif
-
-	d=&dmi_blacklist[0];
-	while(d->callback)
-	{
-		for(i=0;i<4;i++)
-		{
-			int s = d->matches[i].slot;
-			if(s==NONE)
-				continue;
-			if(dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr))
-				continue;
-			/* No match */
-			goto fail;
-		}
-		if(d->callback(d))
-			return;
-fail:			
-		d++;
-	}
+ 	dmi_check_system(dmi_blacklist);
 }
 
 	
@@ -1163,3 +1113,52 @@ void __init dmi_scan_machine(void)
 }
 
 EXPORT_SYMBOL(is_unsafe_smbus);
+
+
+/**
+ *	dmi_check_system - check system DMI data
+ *	@list: array of dmi_system_id structures to match against
+ *
+ *	Walk the blacklist table running matching functions until someone
+ *	returns non zero or we hit the end. Callback function is called for
+ *	each successfull match. Returns the number of matches.
+ */
+int dmi_check_system(struct dmi_system_id *list)
+{
+	int i, count = 0;
+	struct dmi_system_id *d = list;
+
+	while (d->ident) {
+		for (i = 0; i < ARRAY_SIZE(d->matches); i++) {
+			int s = d->matches[i].slot;
+			if (s == DMI_NONE)
+				continue;
+			if (dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr))
+				continue;
+			/* No match */
+			goto fail;
+		}
+		if (d->callback && d->callback(d))
+			break;
+		count++;
+fail:		d++;
+	}
+
+	return count;
+}
+
+EXPORT_SYMBOL(dmi_check_system);
+
+/**
+ *	dmi_get_system_info - return DMI data value
+ *	@field: data index (see enum dmi_filed)
+ *
+ *	Returns one DMI data value, can be used to perform
+ *	complex DMI data checks.
+ */
+char * dmi_get_system_info(int field)
+{
+	return dmi_ident[field];
+}
+
+EXPORT_SYMBOL(dmi_get_system_info);
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
new file mode 100644
index 000000000000..d2bcf556088b
--- /dev/null
+++ b/include/linux/dmi.h
@@ -0,0 +1,47 @@
+#ifndef __DMI_H__
+#define __DMI_H__
+
+enum dmi_field {
+	DMI_NONE,
+	DMI_BIOS_VENDOR,
+	DMI_BIOS_VERSION,
+	DMI_BIOS_DATE,
+	DMI_SYS_VENDOR,
+	DMI_PRODUCT_NAME,
+	DMI_PRODUCT_VERSION,
+	DMI_BOARD_VENDOR,
+	DMI_BOARD_NAME,
+	DMI_BOARD_VERSION,
+	DMI_STRING_MAX,
+};
+
+/*
+ *	DMI callbacks for problem boards
+ */
+struct dmi_strmatch {
+	u8 slot;
+	char *substr;
+};
+
+struct dmi_system_id {
+	int (*callback)(struct dmi_system_id *);
+	char *ident;
+	struct dmi_strmatch matches[4];
+	void *driver_data;
+};
+
+#define DMI_MATCH(a,b)	{ a, b }
+
+#if defined(CONFIG_X86) && !defined(CONFIG_X86_64)
+
+extern int dmi_check_system(struct dmi_system_id *list);
+extern char * dmi_get_system_info(int field);
+
+#else
+
+static inline int dmi_check_system(struct dmi_system_id *list) { return 0; }
+static inline char * dmi_get_system_info(int field) { return NULL; }
+
+#endif
+
+#endif	/* __DMI_H__ */
-- 
cgit v1.2.3


From 17e14befcea27d734fd4f2247f2186683ee35ae0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 20 Jun 2004 04:52:59 -0700
Subject: [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER
 size

Here's a patch to allocate memory for big system hash tables with the
bootmem allocator rather than with main page allocator.

It is needed for three reasons:

(1) So that the size can be bigger than MAX_ORDER.  IBM have done some
    testing on their big PPC64 systems (64GB of RAM) with linux-2.4 and found
    that they get better performance if the sizes of the inode cache hash,
    dentry cache hash, buffer head hash and page cache hash are increased
    beyond MAX_ORDER (order 11).

     Now the main allocator can't allocate anything larger than MAX_ORDER, but
     the bootmem allocator can.

     In 2.6 it appears that only the inode and dentry hashes remain of those
     four, but there are other hash tables that could use this service.

(2) Changing MAX_ORDER appears to have a number of effects beyond just
    limiting the maximum size that can be allocated in one go.

(3) Should someone want a hash table in which each bucket isn't a power of
    two in size, memory will be wasted as the chunk of memory allocated will
    be a power of two in size (to hold a power of two number of buckets).

    On the other hand, using the bootmem allocator means the allocation
    will only take up sufficient pages to hold it, rather than the next power
    of two up.

    Admittedly, this point doesn't apply to the dentry and inode hashes,
    but it might to another hash table that might want to use this service.


I've coelesced the meat of the inode and dentry allocation routines into
one such routine in mm/page_alloc.c that the the respective initialisation
functions now call before mem_init() is called.

This routine gets it's approximation of memory size by counting up the
ZONE_NORMAL and ZONE_DMA pages (and ZONE_HIGHMEM if requested) in all the
nodes passed to the main allocator by paging_init() (or wherever the arch
does it).  It does not use max_low_pfn as that doesn't seem to be available
on all archs, and it doesn't use num_physpages since that includes highmem
pages not available to the kernel for allocating data structures upon -
which may not be appropriate when calculating hash table size.

On the off chance that the size of each hash bucket may not be exactly a
power of two, the routine will only allocate as many pages as is necessary
to ensure that the number of buckets is exactly a power of two, rather than
allocating the smallest power-of-two sized chunk of memory that will hold
the same array of buckets.

The maximum size of any single hash table is given by
MAX_SYS_HASH_TABLE_ORDER, as is now defined in linux/mmzone.h.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c             | 68 ++++++++++++++++-----------------------------
 fs/inode.c              | 60 ++++++++++++----------------------------
 include/linux/bootmem.h |  8 ++++++
 include/linux/fs.h      | 11 +++++---
 include/linux/kernel.h  |  9 ++++++
 include/linux/mmzone.h  | 12 ++++++++
 init/main.c             |  1 +
 mm/page_alloc.c         | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 152 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 613ff66dacde..4c632e1261dc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -30,6 +30,7 @@
 #include <linux/security.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
+#include <linux/bootmem.h>
 
 #define DCACHE_PARANOIA 1
 /* #define DCACHE_DEBUG 1 */
@@ -1561,13 +1562,25 @@ static int __init set_dhash_entries(char *str)
 }
 __setup("dhash_entries=", set_dhash_entries);
 
-static void __init dcache_init(unsigned long mempages)
+static void __init dcache_init_early(void)
 {
-	struct hlist_head *d;
-	unsigned long order;
-	unsigned int nr_hash;
-	int i;
+	int loop;
+
+	dentry_hashtable =
+		alloc_large_system_hash("Dentry cache",
+					sizeof(struct hlist_head),
+					dhash_entries,
+					13,
+					0,
+					&d_hash_shift,
+					&d_hash_mask);
+
+	for (loop = 0; loop < (1 << d_hash_shift); loop++)
+		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+}
 
+static void __init dcache_init(unsigned long mempages)
+{
 	/* 
 	 * A constructor could be added for stable state like the lists,
 	 * but it is probably not worth it because of the cache nature
@@ -1580,45 +1593,6 @@ static void __init dcache_init(unsigned long mempages)
 					 NULL, NULL);
 	
 	set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
-
-	if (!dhash_entries)
-		dhash_entries = PAGE_SHIFT < 13 ?
-				mempages >> (13 - PAGE_SHIFT) :
-				mempages << (PAGE_SHIFT - 13);
-
-	dhash_entries *= sizeof(struct hlist_head);
-	for (order = 0; ((1UL << order) << PAGE_SHIFT) < dhash_entries; order++)
-		;
-
-	do {
-		unsigned long tmp;
-
-		nr_hash = (1UL << order) * PAGE_SIZE /
-			sizeof(struct hlist_head);
-		d_hash_mask = (nr_hash - 1);
-
-		tmp = nr_hash;
-		d_hash_shift = 0;
-		while ((tmp >>= 1UL) != 0UL)
-			d_hash_shift++;
-
-		dentry_hashtable = (struct hlist_head *)
-			__get_free_pages(GFP_ATOMIC, order);
-	} while (dentry_hashtable == NULL && --order >= 0);
-
-	printk(KERN_INFO "Dentry cache hash table entries: %d (order: %ld, %ld bytes)\n",
-			nr_hash, order, (PAGE_SIZE << order));
-
-	if (!dentry_hashtable)
-		panic("Failed to allocate dcache hash table\n");
-
-	d = dentry_hashtable;
-	i = nr_hash;
-	do {
-		INIT_HLIST_HEAD(d);
-		d++;
-		i--;
-	} while (i);
 }
 
 /* SLAB cache for __getname() consumers */
@@ -1632,6 +1606,12 @@ EXPORT_SYMBOL(d_genocide);
 extern void bdev_cache_init(void);
 extern void chrdev_init(void);
 
+void __init vfs_caches_init_early(void)
+{
+	dcache_init_early();
+	inode_init_early();
+}
+
 void __init vfs_caches_init(unsigned long mempages)
 {
 	unsigned long reserve;
diff --git a/fs/inode.c b/fs/inode.c
index 042e3bb454dd..e802a3c35fd2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -20,6 +20,7 @@
 #include <linux/security.h>
 #include <linux/pagemap.h>
 #include <linux/cdev.h>
+#include <linux/bootmem.h>
 
 /*
  * This is needed for the following functions:
@@ -1345,55 +1346,30 @@ __setup("ihash_entries=", set_ihash_entries);
 /*
  * Initialize the waitqueues and inode hash table.
  */
+void __init inode_init_early(void)
+{
+	int loop;
+
+	inode_hashtable =
+		alloc_large_system_hash("Inode-cache",
+					sizeof(struct hlist_head),
+					ihash_entries,
+					14,
+					0,
+					&i_hash_shift,
+					&i_hash_mask);
+
+	for (loop = 0; loop < (1 << i_hash_shift); loop++)
+		INIT_HLIST_HEAD(&inode_hashtable[loop]);
+}
+
 void __init inode_init(unsigned long mempages)
 {
-	struct hlist_head *head;
-	unsigned long order;
-	unsigned int nr_hash;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
 		init_waitqueue_head(&i_wait_queue_heads[i].wqh);
 
-	if (!ihash_entries)
-		ihash_entries = PAGE_SHIFT < 14 ?
-				mempages >> (14 - PAGE_SHIFT) :
-				mempages << (PAGE_SHIFT - 14);
-
-	ihash_entries *= sizeof(struct hlist_head);
-	for (order = 0; ((1UL << order) << PAGE_SHIFT) < ihash_entries; order++)
-		;
-
-	do {
-		unsigned long tmp;
-
-		nr_hash = (1UL << order) * PAGE_SIZE /
-			sizeof(struct hlist_head);
-		i_hash_mask = (nr_hash - 1);
-
-		tmp = nr_hash;
-		i_hash_shift = 0;
-		while ((tmp >>= 1UL) != 0UL)
-			i_hash_shift++;
-
-		inode_hashtable = (struct hlist_head *)
-			__get_free_pages(GFP_ATOMIC, order);
-	} while (inode_hashtable == NULL && --order >= 0);
-
-	printk("Inode-cache hash table entries: %d (order: %ld, %ld bytes)\n",
-			nr_hash, order, (PAGE_SIZE << order));
-
-	if (!inode_hashtable)
-		panic("Failed to allocate inode hash table\n");
-
-	head = inode_hashtable;
-	i = nr_hash;
-	do {
-		INIT_HLIST_HEAD(head);
-		head++;
-		i--;
-	} while (i);
-
 	/* inode slab cache */
 	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
 				0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once,
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 6902724691d2..e038f9a3d0ef 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -67,4 +67,12 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size,
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
+extern void *__init alloc_large_system_hash(const char *tablename,
+					    unsigned long bucketsize,
+					    unsigned long numentries,
+					    int scale,
+					    int consider_highmem,
+					    unsigned int *_hash_shift,
+					    unsigned int *_hash_mask);
+
 #endif /* _LINUX_BOOTMEM_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 10a69a544045..504ca447700d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -214,15 +214,17 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
 #include <linux/list.h>
 #include <linux/radix-tree.h>
 #include <linux/audit.h>
+#include <linux/init.h>
 #include <asm/semaphore.h>
 #include <asm/byteorder.h>
 
 /* Used to be a macro which just called the function, now just a function */
 extern void update_atime (struct inode *);
 
-extern void inode_init(unsigned long);
-extern void mnt_init(unsigned long);
-extern void files_init(unsigned long);
+extern void __init inode_init(unsigned long);
+extern void __init inode_init_early(void);
+extern void __init mnt_init(unsigned long);
+extern void __init files_init(unsigned long);
 
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -1199,7 +1201,8 @@ extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 
 /* fs/dcache.c */
-extern void vfs_caches_init(unsigned long);
+extern void __init vfs_caches_init_early(void);
+extern void __init vfs_caches_init(unsigned long);
 
 #define __getname()	kmem_cache_alloc(names_cachep, SLAB_KERNEL)
 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 67da15c2e1e3..9eb023020b44 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -92,6 +92,15 @@ asmlinkage int printk(const char * fmt, ...)
 
 unsigned long int_sqrt(unsigned long);
 
+static inline int __attribute_pure__ long_log2(unsigned long x)
+{
+	int r = 0;
+	for (x >>= 1; x > 0; x >>= 1)
+		r++;
+	return r;
+}
+
+
 extern int printk_ratelimit(void);
 extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f1e1a727acb2..373a13ba6f3f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -20,6 +20,18 @@
 #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
 #endif
 
+/*
+ * system hash table size limits
+ * - on large memory machines, we may want to allocate a bigger hash than that
+ *   permitted by MAX_ORDER, so we allocate with the bootmem allocator, and are
+ *   limited to this size
+ */
+#if MAX_ORDER > 14
+#define MAX_SYS_HASH_TABLE_ORDER MAX_ORDER
+#else
+#define MAX_SYS_HASH_TABLE_ORDER 14
+#endif
+
 struct free_area {
 	struct list_head	free_list;
 	unsigned long		*map;
diff --git a/init/main.c b/init/main.c
index 027896bba166..1e58b66b2144 100644
--- a/init/main.c
+++ b/init/main.c
@@ -458,6 +458,7 @@ asmlinkage void __init start_kernel(void)
 		initrd_start = 0;
 	}
 #endif
+	vfs_caches_init_early();
 	mem_init();
 	kmem_cache_init();
 	numa_policy_init();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 444bb534dbd8..16d5c2af94ee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -55,6 +55,9 @@ EXPORT_SYMBOL(zone_table);
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
 
+static unsigned long __initdata nr_kernel_pages;
+static unsigned long __initdata nr_all_pages;
+
 /*
  * Temporary debugging check for pages not lying within a given zone.
  */
@@ -1430,6 +1433,10 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
 		if (zholes_size)
 			realsize -= zholes_size[j];
 
+		if (j == ZONE_DMA || j == ZONE_NORMAL)
+			nr_kernel_pages += realsize;
+		nr_all_pages += realsize;
+
 		zone->spanned_pages = size;
 		zone->present_pages = realsize;
 		zone->name = zone_names[j];
@@ -1970,3 +1977,69 @@ int lower_zone_protection_sysctl_handler(ctl_table *table, int write,
 	setup_per_zone_protection();
 	return 0;
 }
+
+/*
+ * allocate a large system hash table from bootmem
+ * - it is assumed that the hash table must contain an exact power-of-2
+ *   quantity of entries
+ */
+void *__init alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int consider_highmem,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask)
+{
+	unsigned long mem, max, log2qty, size;
+	void *table;
+
+	/* round applicable memory size up to nearest megabyte */
+	mem = consider_highmem ? nr_all_pages : nr_kernel_pages;
+	mem += (1UL << (20 - PAGE_SHIFT)) - 1;
+	mem >>= 20 - PAGE_SHIFT;
+	mem <<= 20 - PAGE_SHIFT;
+
+	/* limit to 1 bucket per 2^scale bytes of low memory (rounded up to
+	 * nearest power of 2 in size) */
+	if (scale > PAGE_SHIFT)
+		mem >>= (scale - PAGE_SHIFT);
+	else
+		mem <<= (PAGE_SHIFT - scale);
+
+	mem = 1UL << (long_log2(mem) + 1);
+
+	/* limit allocation size */
+	max = (1UL << (PAGE_SHIFT + MAX_SYS_HASH_TABLE_ORDER)) / bucketsize;
+	if (max > mem)
+		max = mem;
+
+	/* allow the kernel cmdline to have a say */
+	if (!numentries || numentries > max)
+		numentries = max;
+
+	log2qty = long_log2(numentries);
+
+	do {
+		size = bucketsize << log2qty;
+
+		table = (void *) alloc_bootmem(size);
+
+	} while (!table && size > PAGE_SIZE);
+
+	if (!table)
+		panic("Failed to allocate %s hash table\n", tablename);
+
+	printk("%s hash table entries: %d (order: %d, %lu bytes)\n",
+	       tablename,
+	       (1U << log2qty),
+	       long_log2(size) - PAGE_SHIFT,
+	       size);
+
+	if (_hash_shift)
+		*_hash_shift = log2qty;
+	if (_hash_mask)
+		*_hash_mask = (1 << log2qty) - 1;
+
+	return table;
+}
-- 
cgit v1.2.3


From 5a86174c17ab9d62fb7aced585cc5b6cb552411c Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@nitnet.com.br>
Date: Sun, 20 Jun 2004 04:53:11 -0700
Subject: [PATCH] O_NOATIME support

This patch adds support for the O_NOATIME open flag (GNU extension):

int O_NOATIME  	Macro
  If this bit is set, read will not update the access time of the file.
  See File Times. This is used by programs that do backups, so that
  backing a file up does not count as reading it. Only the owner of the
  file or the superuser may use this bit.

It is useful if you want to do something with the file atime (for instance,
moving files that have not been accessed in a while to somewhere else, or
something like Debian's popularity-contest) but you also want to read all
files periodically (for instance, tripwire or debsums).

Currently, the program that reads all files periodically has to use utimes,
which can race with the atime update:

    A               B
  open
  fstat
  read
                  open
                  read
                  close
  close
  utimes

And the file still has the old atime, instead of the new one from when B
did the read from it.  This problem does not happen if A uses O_NOATIME
instead of utimes to preserve the atime.

This patch adds the O_NOATIME constant for all architectures, but it would
also be possible to add it one architecture at a time by defining it to 0
when not defined in asm-*.

Based on patch by Marek Michalkiewicz <marekm@i17linuxb.ists.pwr.wroc.pl> at
http://www.uwsg.iu.edu/hypermail/linux/kernel/9811.2/0118.html

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fcntl.c                  | 7 ++++++-
 fs/namei.c                  | 5 +++++
 include/asm-alpha/fcntl.h   | 1 +
 include/asm-arm/fcntl.h     | 1 +
 include/asm-arm26/fcntl.h   | 1 +
 include/asm-cris/fcntl.h    | 1 +
 include/asm-h8300/fcntl.h   | 1 +
 include/asm-i386/fcntl.h    | 1 +
 include/asm-ia64/fcntl.h    | 1 +
 include/asm-m68k/fcntl.h    | 1 +
 include/asm-mips/fcntl.h    | 1 +
 include/asm-parisc/fcntl.h  | 1 +
 include/asm-ppc/fcntl.h     | 1 +
 include/asm-ppc64/fcntl.h   | 1 +
 include/asm-s390/fcntl.h    | 1 +
 include/asm-sh/fcntl.h      | 1 +
 include/asm-sparc/fcntl.h   | 1 +
 include/asm-sparc64/fcntl.h | 1 +
 include/asm-v850/fcntl.h    | 1 +
 include/asm-x86_64/fcntl.h  | 1 +
 include/linux/fs.h          | 3 ++-
 21 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 13d351cba2e3..77cec9debe17 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -212,7 +212,7 @@ asmlinkage long sys_dup(unsigned int fildes)
 	return ret;
 }
 
-#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT)
+#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
 
 static int setfl(int fd, struct file * filp, unsigned long arg)
 {
@@ -223,6 +223,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 	if (!(arg & O_APPEND) && IS_APPEND(inode))
 		return -EPERM;
 
+	/* O_NOATIME can only be set by the owner or superuser */
+	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
+		if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+			return -EPERM;
+
 	/* required for strict SunOS emulation */
 	if (O_NONBLOCK != O_NDELAY)
 	       if (arg & O_NDELAY)
diff --git a/fs/namei.c b/fs/namei.c
index 9828b94fe8bb..efaaf1dd1d7d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1206,6 +1206,11 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 			return -EPERM;
 	}
 
+	/* O_NOATIME can only be set by the owner or superuser */
+	if (flag & O_NOATIME)
+		if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+			return -EPERM;
+
 	/*
 	 * Ensure there are no outstanding leases on the file.
 	 */
diff --git a/include/asm-alpha/fcntl.h b/include/asm-alpha/fcntl.h
index ad40d002f006..6b7d6c1649ce 100644
--- a/include/asm-alpha/fcntl.h
+++ b/include/asm-alpha/fcntl.h
@@ -21,6 +21,7 @@
 #define O_NOFOLLOW	0200000 /* don't follow links */
 #define O_LARGEFILE	0400000 /* will be set by the kernel on every open */
 #define O_DIRECT	02000000 /* direct disk access - should check with OSF/1 */
+#define O_NOATIME	04000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-arm/fcntl.h b/include/asm-arm/fcntl.h
index da2861ca37b0..485b6bdf4d7a 100644
--- a/include/asm-arm/fcntl.h
+++ b/include/asm-arm/fcntl.h
@@ -20,6 +20,7 @@
 #define O_NOFOLLOW	0100000	/* don't follow links */
 #define O_DIRECT	0200000	/* direct disk access hint - currently ignored */
 #define O_LARGEFILE	0400000
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-arm26/fcntl.h b/include/asm-arm26/fcntl.h
index da2861ca37b0..485b6bdf4d7a 100644
--- a/include/asm-arm26/fcntl.h
+++ b/include/asm-arm26/fcntl.h
@@ -20,6 +20,7 @@
 #define O_NOFOLLOW	0100000	/* don't follow links */
 #define O_DIRECT	0200000	/* direct disk access hint - currently ignored */
 #define O_LARGEFILE	0400000
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-cris/fcntl.h b/include/asm-cris/fcntl.h
index a68e2886e0c1..61c563242b51 100644
--- a/include/asm-cris/fcntl.h
+++ b/include/asm-cris/fcntl.h
@@ -22,6 +22,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get f_flags */
diff --git a/include/asm-h8300/fcntl.h b/include/asm-h8300/fcntl.h
index a7e7ac01d0d7..355350a57bf9 100644
--- a/include/asm-h8300/fcntl.h
+++ b/include/asm-h8300/fcntl.h
@@ -20,6 +20,7 @@
 #define O_NOFOLLOW	0100000	/* don't follow links */
 #define O_DIRECT	0200000	/* direct disk access hint - currently ignored */
 #define O_LARGEFILE	0400000
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-i386/fcntl.h b/include/asm-i386/fcntl.h
index 41e3c4d9144e..511cde94a3ed 100644
--- a/include/asm-i386/fcntl.h
+++ b/include/asm-i386/fcntl.h
@@ -20,6 +20,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-ia64/fcntl.h b/include/asm-ia64/fcntl.h
index 697dd0bc0a8e..d193981bb1d8 100644
--- a/include/asm-ia64/fcntl.h
+++ b/include/asm-ia64/fcntl.h
@@ -28,6 +28,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-m68k/fcntl.h b/include/asm-m68k/fcntl.h
index c0b273f68f05..0d4212983a33 100644
--- a/include/asm-m68k/fcntl.h
+++ b/include/asm-m68k/fcntl.h
@@ -20,6 +20,7 @@
 #define O_NOFOLLOW	0100000	/* don't follow links */
 #define O_DIRECT	0200000	/* direct disk access hint - currently ignored */
 #define O_LARGEFILE	0400000
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-mips/fcntl.h b/include/asm-mips/fcntl.h
index e2b9c0a2537b..2436392e7990 100644
--- a/include/asm-mips/fcntl.h
+++ b/include/asm-mips/fcntl.h
@@ -26,6 +26,7 @@
 #define O_DIRECT	0x8000	/* direct disk access hint */
 #define O_DIRECTORY	0x10000	/* must be a directory */
 #define O_NOFOLLOW	0x20000	/* don't follow links */
+#define O_NOATIME	0x40000
 
 #define O_NDELAY	O_NONBLOCK
 
diff --git a/include/asm-parisc/fcntl.h b/include/asm-parisc/fcntl.h
index 01fe48b0ba9d..def35230716a 100644
--- a/include/asm-parisc/fcntl.h
+++ b/include/asm-parisc/fcntl.h
@@ -19,6 +19,7 @@
 #define O_NOCTTY	00400000 /* not fcntl */
 #define O_DSYNC		01000000 /* HPUX only */
 #define O_RSYNC		02000000 /* HPUX only */
+#define O_NOATIME	04000000
 
 #define FASYNC		00020000 /* fcntl, for BSD compatibility */
 #define O_DIRECT	00040000 /* direct disk access hint - currently ignored */
diff --git a/include/asm-ppc/fcntl.h b/include/asm-ppc/fcntl.h
index 27ae7d2f6af2..5e28e41fb29f 100644
--- a/include/asm-ppc/fcntl.h
+++ b/include/asm-ppc/fcntl.h
@@ -20,6 +20,7 @@
 #define O_NOFOLLOW      0100000	/* don't follow links */
 #define O_LARGEFILE     0200000
 #define O_DIRECT	0400000	/* direct disk access hint */
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-ppc64/fcntl.h b/include/asm-ppc64/fcntl.h
index 1ef83570b4e4..842560d50656 100644
--- a/include/asm-ppc64/fcntl.h
+++ b/include/asm-ppc64/fcntl.h
@@ -27,6 +27,7 @@
 #define O_NOFOLLOW      0100000	/* don't follow links */
 #define O_LARGEFILE     0200000
 #define O_DIRECT	0400000	/* direct disk access hint */
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-s390/fcntl.h b/include/asm-s390/fcntl.h
index 307c01b45213..48f692b45732 100644
--- a/include/asm-s390/fcntl.h
+++ b/include/asm-s390/fcntl.h
@@ -27,6 +27,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-sh/fcntl.h b/include/asm-sh/fcntl.h
index 21e39b7cc05a..0b3ae524e34c 100644
--- a/include/asm-sh/fcntl.h
+++ b/include/asm-sh/fcntl.h
@@ -20,6 +20,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-sparc/fcntl.h b/include/asm-sparc/fcntl.h
index aa64cfc1df2e..df9c75d41d68 100644
--- a/include/asm-sparc/fcntl.h
+++ b/include/asm-sparc/fcntl.h
@@ -21,6 +21,7 @@
 #define O_NOFOLLOW	0x20000	/* don't follow links */
 #define O_LARGEFILE	0x40000
 #define O_DIRECT        0x100000 /* direct disk access hint */
+#define O_NOATIME	0x200000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-sparc64/fcntl.h b/include/asm-sparc64/fcntl.h
index 0999d6d525c1..e36def0d0d80 100644
--- a/include/asm-sparc64/fcntl.h
+++ b/include/asm-sparc64/fcntl.h
@@ -21,6 +21,7 @@
 #define O_NOFOLLOW	0x20000	/* don't follow links */
 #define O_LARGEFILE	0x40000
 #define O_DIRECT        0x100000 /* direct disk access hint */
+#define O_NOATIME	0x200000
 
 
 #define F_DUPFD		0	/* dup */
diff --git a/include/asm-v850/fcntl.h b/include/asm-v850/fcntl.h
index 42e358ff0752..31d4b5961221 100644
--- a/include/asm-v850/fcntl.h
+++ b/include/asm-v850/fcntl.h
@@ -20,6 +20,7 @@
 #define O_NOFOLLOW     0100000	/* don't follow links */
 #define O_DIRECT       0200000	/* direct disk access hint - currently ignored */
 #define O_LARGEFILE    0400000
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/asm-x86_64/fcntl.h b/include/asm-x86_64/fcntl.h
index aabf1a30a3b9..4411f221c037 100644
--- a/include/asm-x86_64/fcntl.h
+++ b/include/asm-x86_64/fcntl.h
@@ -20,6 +20,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_NOATIME	01000000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 504ca447700d..88337ed4f4f2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -979,7 +979,8 @@ static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
 
 static inline void file_accessed(struct file *file)
 {
-	touch_atime(file->f_vfsmnt, file->f_dentry);
+	if (!(file->f_flags & O_NOATIME))
+		touch_atime(file->f_vfsmnt, file->f_dentry);
 }
 
 int sync_inode(struct inode *inode, struct writeback_control *wbc);
-- 
cgit v1.2.3


From a2816bbfb17101d2faa45c8b557da257a98fe729 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Sun, 20 Jun 2004 04:54:29 -0700
Subject: [PATCH] add wait_event_interruptible_exclusive() macro

This patch defines a macro that does exactly what
wait_event_interruptible() does except that it adds the current task to the
wait queue as an exclusive task (i.e., sets the WQ_FLAG_EXCLUSIVE flag)
rather than as a non-exclusive task as wait_event_interruptible() does.

This allows one to do a wake_up_nr() to wake up a specific number of tasks.
 I'm in the process of submitting a patch to linux-ia64 that requires this
capability.  (Its subject line is "[PATCH 3/4] SGI Altix cross partition
functionality".)

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/wait.h | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 52edb1786b14..4a9f996bb6cc 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -200,7 +200,36 @@ do {									\
 		__wait_event_interruptible_timeout(wq, condition, __ret); \
 	__ret;								\
 })
-	
+
+#define __wait_event_interruptible_exclusive(wq, condition, ret)	\
+do {									\
+	wait_queue_t __wait;						\
+	init_waitqueue_entry(&__wait, current);				\
+									\
+	add_wait_queue_exclusive(&wq, &__wait);				\
+	for (;;) {							\
+		set_current_state(TASK_INTERRUPTIBLE);			\
+		if (condition)						\
+			break;						\
+		if (!signal_pending(current)) {				\
+			schedule();					\
+			continue;					\
+		}							\
+		ret = -ERESTARTSYS;					\
+		break;							\
+	}								\
+	current->state = TASK_RUNNING;					\
+	remove_wait_queue(&wq, &__wait);				\
+} while (0)
+
+#define wait_event_interruptible_exclusive(wq, condition)		\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__wait_event_interruptible_exclusive(wq, condition, __ret);\
+	__ret;								\
+})
+
 /*
  * Must be called with the spinlock in the wait_queue_head_t held.
  */
-- 
cgit v1.2.3


From 2fed84384a0b084d78252aa14d6bfae03deb268f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Sun, 20 Jun 2004 04:54:40 -0700
Subject: [PATCH] iommu max segment size

This patch is from James, I've changed it slightly only.

The problem is that some IOMMU implementations have a maximum limit to the
size of the number of contiguously mappable pages (admittedly, this limit
is mostly in the resource management algorithms rather than the IOMMUs
themselves).

This patch adds this concept to the bio layer via the parameter

BIO_VMERGE_MAX_SIZE

which architectures can define in asm/io.h (if undefined, we assume it to
be infinite, which is current behaviour).

While adding this, I noticed several places where bio was making incorrect
assumptions about virtual mergeability (none of which was a bug: bio was
overestimating rather than underestimating).

- The worst offender was bio_add_page(), which seemed never to check for
  virtual mergeability

- I also fixed blk_hw_contig_segments() not to check the QUEUE_CLUSTER
  flag, and not to check the phys segment boundary.

In order to track the hw segment size across bios, I had to introduce two
extra bio parameters: bi_hw_front_size and bi_hw_back_size which store the
sizes of the front and back hw contiguous segments (and which will be equal
if there's only one hw segment).  When the bio is merged into a request,
these fields are updated with the total hw contig size so they can always
be used to assess if the merger would violate the BIO_VMERGE_MAX_SIZE
parameter.

Signed-Off-By: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/ll_rw_blk.c | 104 +++++++++++++++++++++++++++++++++++-----------
 fs/bio.c                  |  14 +++++--
 include/linux/bio.h       |  18 ++++++++
 3 files changed, 109 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 32881024ecfa..713f3ecb7f40 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -817,14 +817,14 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
 void blk_recount_segments(request_queue_t *q, struct bio *bio)
 {
 	struct bio_vec *bv, *bvprv = NULL;
-	int i, nr_phys_segs, nr_hw_segs, seg_size, cluster;
+	int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
 	int high, highprv = 1;
 
 	if (unlikely(!bio->bi_io_vec))
 		return;
 
 	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
-	seg_size = nr_phys_segs = nr_hw_segs = 0;
+	hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
 	bio_for_each_segment(bv, bio, i) {
 		/*
 		 * the trick here is making sure that a high page is never
@@ -841,22 +841,35 @@ void blk_recount_segments(request_queue_t *q, struct bio *bio)
 				goto new_segment;
 			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
 				goto new_segment;
+			if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
+				goto new_hw_segment;
 
 			seg_size += bv->bv_len;
+			hw_seg_size += bv->bv_len;
 			bvprv = bv;
 			continue;
 		}
 new_segment:
-		if (!BIOVEC_VIRT_MERGEABLE(bvprv, bv))
+		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
+		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
+			hw_seg_size += bv->bv_len;
+		} else {
 new_hw_segment:
+			if (hw_seg_size > bio->bi_hw_front_size)
+				bio->bi_hw_front_size = hw_seg_size;
+			hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
 			nr_hw_segs++;
+		}
 
 		nr_phys_segs++;
 		bvprv = bv;
 		seg_size = bv->bv_len;
 		highprv = high;
 	}
-
+	if (hw_seg_size > bio->bi_hw_back_size)
+		bio->bi_hw_back_size = hw_seg_size;
+	if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
+		bio->bi_hw_front_size = hw_seg_size;
 	bio->bi_phys_segments = nr_phys_segs;
 	bio->bi_hw_segments = nr_hw_segs;
 	bio->bi_flags |= (1 << BIO_SEG_VALID);
@@ -889,22 +902,17 @@ EXPORT_SYMBOL(blk_phys_contig_segment);
 int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
 				 struct bio *nxt)
 {
-	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
-		return 0;
-
-	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
+	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
+		blk_recount_segments(q, bio);
+	if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
+		blk_recount_segments(q, nxt);
+	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
+	    BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))
 		return 0;
 	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
 		return 0;
 
-	/*
-	 * bio and nxt are contigous in memory, check if the queue allows
-	 * these two to be merged into one
-	 */
-	if (BIO_SEG_BOUNDARY(q, bio, nxt))
-		return 1;
-
-	return 0;
+	return 1;
 }
 
 EXPORT_SYMBOL(blk_hw_contig_segment);
@@ -1012,14 +1020,30 @@ static inline int ll_new_hw_segment(request_queue_t *q,
 static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
 			    struct bio *bio)
 {
+	int len;
+
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		q->last_merge = NULL;
 		return 0;
 	}
-
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)))
-		return ll_new_mergeable(q, req, bio);
+	if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
+		blk_recount_segments(q, req->biotail);
+	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
+		blk_recount_segments(q, bio);
+	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
+	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
+	    !BIOVEC_VIRT_OVERSIZE(len)) {
+		int mergeable =  ll_new_mergeable(q, req, bio);
+
+		if (mergeable) {
+			if (req->nr_hw_segments == 1)
+				req->bio->bi_hw_front_size = len;
+			if (bio->bi_hw_segments == 1)
+				bio->bi_hw_back_size = len;
+		}
+		return mergeable;
+	}
 
 	return ll_new_hw_segment(q, req, bio);
 }
@@ -1027,14 +1051,30 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
 			     struct bio *bio)
 {
+	int len;
+
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		q->last_merge = NULL;
 		return 0;
 	}
-
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)))
-		return ll_new_mergeable(q, req, bio);
+	len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
+	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
+		blk_recount_segments(q, bio);
+	if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
+		blk_recount_segments(q, req->bio);
+	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
+	    !BIOVEC_VIRT_OVERSIZE(len)) {
+		int mergeable =  ll_new_mergeable(q, req, bio);
+
+		if (mergeable) {
+			if (bio->bi_hw_segments == 1)
+				bio->bi_hw_front_size = len;
+			if (req->nr_hw_segments == 1)
+				req->biotail->bi_hw_back_size = len;
+		}
+		return mergeable;
+	}
 
 	return ll_new_hw_segment(q, req, bio);
 }
@@ -1066,8 +1106,17 @@ static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
 		return 0;
 
 	total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
-	if (blk_hw_contig_segment(q, req->biotail, next->bio))
+	if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
+		int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
+		/*
+		 * propagate the combined length to the end of the requests
+		 */
+		if (req->nr_hw_segments == 1)
+			req->bio->bi_hw_front_size = len;
+		if (next->nr_hw_segments == 1)
+			next->biotail->bi_hw_back_size = len;
 		total_hw_segments--;
+	}
 
 	if (total_hw_segments > q->max_hw_segments)
 		return 0;
@@ -2532,7 +2581,7 @@ EXPORT_SYMBOL(process_that_request_first);
 
 void blk_recalc_rq_segments(struct request *rq)
 {
-	struct bio *bio;
+	struct bio *bio, *prevbio = NULL;
 	int nr_phys_segs, nr_hw_segs;
 
 	if (!rq->bio)
@@ -2545,6 +2594,13 @@ void blk_recalc_rq_segments(struct request *rq)
 
 		nr_phys_segs += bio_phys_segments(rq->q, bio);
 		nr_hw_segs += bio_hw_segments(rq->q, bio);
+		if (prevbio) {
+			if (blk_phys_contig_segment(rq->q, prevbio, bio))
+				nr_phys_segs--;
+			if (blk_hw_contig_segment(rq->q, prevbio, bio))
+				nr_hw_segs--;
+		}
+		prevbio = bio;
 	}
 
 	rq->nr_phys_segments = nr_phys_segs;
diff --git a/fs/bio.c b/fs/bio.c
index 2d1ec65361a5..ac03005be2b0 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -116,6 +116,8 @@ inline void bio_init(struct bio *bio)
 	bio->bi_idx = 0;
 	bio->bi_phys_segments = 0;
 	bio->bi_hw_segments = 0;
+	bio->bi_hw_front_size = 0;
+	bio->bi_hw_back_size = 0;
 	bio->bi_size = 0;
 	bio->bi_max_vecs = 0;
 	bio->bi_end_io = NULL;
@@ -304,14 +306,15 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page
 	 * make this too complex.
 	 */
 
-	while (bio_phys_segments(q, bio) >= q->max_phys_segments
-	    || bio_hw_segments(q, bio) >= q->max_hw_segments) {
+	while (bio->bi_phys_segments >= q->max_phys_segments
+	       || bio->bi_hw_segments >= q->max_hw_segments
+	       || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
 
 		if (retried_segments)
 			return 0;
 
-		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 		retried_segments = 1;
+		blk_recount_segments(q, bio);
 	}
 
 	/*
@@ -341,6 +344,11 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page
 		}
 	}
 
+	/* If we may be able to merge these biovecs, force a recount */
+	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
+	    BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
+		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+
 	bio->bi_vcnt++;
 	bio->bi_phys_segments++;
 	bio->bi_hw_segments++;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c4dd287dd1c8..601531cf4976 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -25,6 +25,15 @@
 
 /* Platforms may set this to teach the BIO layer about IOMMU hardware. */
 #include <asm/io.h>
+
+#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY)
+#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1))
+#define BIOVEC_VIRT_OVERSIZE(x)	((x) > BIO_VMERGE_MAX_SIZE)
+#else
+#define BIOVEC_VIRT_START_SIZE(x)	0
+#define BIOVEC_VIRT_OVERSIZE(x)		0
+#endif
+
 #ifndef BIO_VMERGE_BOUNDARY
 #define BIO_VMERGE_BOUNDARY	0
 #endif
@@ -81,6 +90,15 @@ struct bio {
 	unsigned short		bi_hw_segments;
 
 	unsigned int		bi_size;	/* residual I/O count */
+
+	/*
+	 * To keep track of the max hw size, we account for the
+	 * sizes of the first and last virtually mergeable segments
+	 * in this bio
+	 */
+	unsigned int		bi_hw_front_size;
+	unsigned int		bi_hw_back_size;
+
 	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
 
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
-- 
cgit v1.2.3


From cc843d2e8176bfb1f5d1f1065a58b70cd0f78690 Mon Sep 17 00:00:00 2001
From: Gerd Knorr <kraxel@bytesex.org>
Date: Sun, 20 Jun 2004 04:54:51 -0700
Subject: [PATCH] v4l: v4l2 API updates

This patch has some minor updates to v4l2 API:

* A new pixel format (V4L2_PIX_FMT_SBGGR8).

* Adds some #defines for tv norms for convenience.

* Allow to specify the video source to capture from on a per-frame basis.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/videodev2.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index a4ab8e826bbe..ab8727d92739 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -207,6 +207,9 @@ struct v4l2_pix_format
 #define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y','Y','U','V') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_HI240   v4l2_fourcc('H','I','2','4') /*  8  8-bit color   */
 
+/* see http://www.siliconimaging.com/RGB%20Bayer.htm */
+#define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B','A','8','1') /*  8  BGBG.. GRGR.. */
+
 /* compressed formats */
 #define V4L2_PIX_FMT_MJPEG    v4l2_fourcc('M','J','P','G') /* Motion-JPEG   */
 #define V4L2_PIX_FMT_JPEG     v4l2_fourcc('J','P','E','G') /* JFIF JPEG     */
@@ -383,8 +386,8 @@ struct v4l2_buffer
 		unsigned long   userptr;
 	} m;
 	__u32			length;
-
-	__u32			reserved[2];
+	__u32			input;
+	__u32			reserved;
 };
 
 /*  Flags for 'flags' field */
@@ -395,6 +398,7 @@ struct v4l2_buffer
 #define V4L2_BUF_FLAG_PFRAME	0x0010	/* Image is a P-frame */
 #define V4L2_BUF_FLAG_BFRAME	0x0020	/* Image is a B-frame */
 #define V4L2_BUF_FLAG_TIMECODE	0x0100	/* timecode field is valid */
+#define V4L2_BUF_FLAG_INPUT     0x0200  /* input field is valid */
 
 /*
  *	O V E R L A Y   P R E V I E W
@@ -526,12 +530,13 @@ typedef __u64 v4l2_std_id;
 				 V4L2_STD_PAL_I)
 #define V4L2_STD_NTSC           (V4L2_STD_NTSC_M	|\
 				 V4L2_STD_NTSC_M_JP)
+#define V4L2_STD_SECAM_DK      	(V4L2_STD_SECAM_D	|\
+				 V4L2_STD_SECAM_K	|\
+				 V4L2_STD_SECAM_K1)
 #define V4L2_STD_SECAM		(V4L2_STD_SECAM_B	|\
-				 V4L2_STD_SECAM_D	|\
 				 V4L2_STD_SECAM_G	|\
 				 V4L2_STD_SECAM_H	|\
-				 V4L2_STD_SECAM_K	|\
-				 V4L2_STD_SECAM_K1	|\
+				 V4L2_STD_SECAM_DK	|\
 				 V4L2_STD_SECAM_L)
 
 #define V4L2_STD_525_60		(V4L2_STD_PAL_M		|\
@@ -541,6 +546,8 @@ typedef __u64 v4l2_std_id;
 				 V4L2_STD_PAL_N		|\
 				 V4L2_STD_PAL_Nc	|\
 				 V4L2_STD_SECAM)
+#define V4L2_STD_ATSC           (V4L2_STD_ATSC_8_VSB    |\
+		                 V4L2_STD_ATSC_16_VSB)
 
 #define V4L2_STD_UNKNOWN        0
 #define V4L2_STD_ALL            (V4L2_STD_525_60	|\
-- 
cgit v1.2.3


From 5d119c3c357cfea3d8ab2c23529f0d1da3369b6a Mon Sep 17 00:00:00 2001
From: Ralf Bächle <ralf@linux-mips.org>
Date: Sun, 20 Jun 2004 06:44:45 -0700
Subject: [PATCH] DS1286 cleanups

Remove #ifdef'ed hooks for the DS1286 driver through the kernel.  While
cleaning make it work as a module also and add back the core of the driver
got lost when drivers/sgi/ was shredded.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/mips/configs/ip22_defconfig |   2 +-
 arch/mips/defconfig              |   2 +-
 drivers/char/Kconfig             |  11 +
 drivers/char/Makefile            |   1 +
 drivers/char/ds1286.c            | 578 +++++++++++++++++++++++++++++++++++++++
 drivers/char/misc.c              |   4 -
 fs/proc/proc_misc.c              |  15 -
 include/linux/ds1286.h           |  54 ++++
 8 files changed, 646 insertions(+), 21 deletions(-)
 create mode 100644 drivers/char/ds1286.c
 create mode 100644 include/linux/ds1286.h

(limited to 'include/linux')

diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index bbd6215cc611..912beee0c66c 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -583,7 +583,7 @@ CONFIG_WATCHDOG=y
 CONFIG_INDYDOG=m
 # CONFIG_RTC is not set
 # CONFIG_GEN_RTC is not set
-CONFIG_SGI_DS1286=y
+CONFIG_SGI_DS1286=m
 # CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
diff --git a/arch/mips/defconfig b/arch/mips/defconfig
index bbd6215cc611..912beee0c66c 100644
--- a/arch/mips/defconfig
+++ b/arch/mips/defconfig
@@ -583,7 +583,7 @@ CONFIG_WATCHDOG=y
 CONFIG_INDYDOG=m
 # CONFIG_RTC is not set
 # CONFIG_GEN_RTC is not set
-CONFIG_SGI_DS1286=y
+CONFIG_SGI_DS1286=m
 # CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 2fe8bfab039e..c286a5e42159 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -753,6 +753,17 @@ config RTC
 	  To compile this driver as a module, choose M here: the
 	  module will be called rtc.
 
+config SGI_DS1286
+	tristate "SGI DS1286 RTC support"
+	depends on SGI_IP22
+	help
+	  If you say Y here and create a character special file /dev/rtc with
+	  major number 10 and minor number 135 using mknod ("man mknod"), you
+	  will get access to the real time clock built into your computer.
+	  Every SGI has such a clock built in. It reports status information
+	  via the file /proc/rtc and its behaviour is set by various ioctls on
+	  /dev/rtc.
+
 config GEN_RTC
 	tristate "Generic /dev/rtc emulation"
 	depends on RTC!=y && !IA64
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 789890c8c82b..893975d29374 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_RTC) += rtc.o
 obj-$(CONFIG_HPET) += hpet.o
 obj-$(CONFIG_GEN_RTC) += genrtc.o
 obj-$(CONFIG_EFI_RTC) += efirtc.o
+obj-$(CONFIG_SGI_DS1286) += ds1286.o
 ifeq ($(CONFIG_GENERIC_NVRAM),y)
   obj-$(CONFIG_NVRAM) += generic_nvram.o
 else
diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c
new file mode 100644
index 000000000000..bc042fb2d5cd
--- /dev/null
+++ b/drivers/char/ds1286.c
@@ -0,0 +1,578 @@
+/*
+ * DS1286 Real Time Clock interface for Linux
+ *
+ * Copyright (C) 1998, 1999, 2000 Ralf Baechle
+ *
+ * Based on code written by Paul Gortmaker.
+ *
+ * This driver allows use of the real time clock (built into nearly all
+ * computers) from user space. It exports the /dev/rtc interface supporting
+ * various ioctl() and also the /proc/rtc pseudo-file for status
+ * information.
+ *
+ * The ioctls can be used to set the interrupt behaviour and generation rate
+ * from the RTC via IRQ 8. Then the /dev/rtc interface can be used to make
+ * use of these timer interrupts, be they interval or alarm based.
+ *
+ * The /dev/rtc interface will block on reads until an interrupt has been
+ * received. If a RTC interrupt has already happened, it will output an
+ * unsigned long and then block. The output value contains the interrupt
+ * status in the low byte and the number of interrupts since the last read
+ * in the remaining high bytes. The /dev/rtc interface can also be used with
+ * the select(2) call.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+#include <linux/ds1286.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/rtc.h>
+#include <linux/spinlock.h>
+#include <linux/bcd.h>
+#include <linux/proc_fs.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#define DS1286_VERSION		"1.0"
+
+/*
+ *	We sponge a minor off of the misc major. No need slurping
+ *	up another valuable major dev number for this. If you add
+ *	an ioctl, make sure you don't conflict with SPARC's RTC
+ *	ioctls.
+ */
+
+static DECLARE_WAIT_QUEUE_HEAD(ds1286_wait);
+
+static ssize_t ds1286_read(struct file *file, char *buf,
+			size_t count, loff_t *ppos);
+
+static int ds1286_ioctl(struct inode *inode, struct file *file,
+                        unsigned int cmd, unsigned long arg);
+
+static unsigned int ds1286_poll(struct file *file, poll_table *wait);
+
+static void ds1286_get_alm_time (struct rtc_time *alm_tm);
+static void ds1286_get_time(struct rtc_time *rtc_tm);
+static int ds1286_set_time(struct rtc_time *rtc_tm);
+
+static inline unsigned char ds1286_is_updating(void);
+
+static spinlock_t ds1286_lock = SPIN_LOCK_UNLOCKED;
+
+static int ds1286_read_proc(char *page, char **start, off_t off,
+                            int count, int *eof, void *data);
+
+/*
+ *	Bits in rtc_status. (7 bits of room for future expansion)
+ */
+
+#define RTC_IS_OPEN		0x01	/* means /dev/rtc is in use	*/
+#define RTC_TIMER_ON		0x02	/* missed irq timer active	*/
+
+static unsigned char ds1286_status;	/* bitmapped status byte.	*/
+
+static unsigned char days_in_mo[] = {
+	0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+};
+
+/*
+ *	Now all the various file operations that we export.
+ */
+
+static ssize_t ds1286_read(struct file *file, char *buf,
+                           size_t count, loff_t *ppos)
+{
+	return -EIO;
+}
+
+static int ds1286_ioctl(struct inode *inode, struct file *file,
+                        unsigned int cmd, unsigned long arg)
+{
+	struct rtc_time wtime;
+
+	switch (cmd) {
+	case RTC_AIE_OFF:	/* Mask alarm int. enab. bit	*/
+	{
+		unsigned int flags;
+		unsigned char val;
+
+		if (!capable(CAP_SYS_TIME))
+			return -EACCES;
+
+		spin_lock_irqsave(&ds1286_lock, flags);
+		val = rtc_read(RTC_CMD);
+		val |=  RTC_TDM;
+		rtc_write(val, RTC_CMD);
+		spin_unlock_irqrestore(&ds1286_lock, flags);
+
+		return 0;
+	}
+	case RTC_AIE_ON:	/* Allow alarm interrupts.	*/
+	{
+		unsigned int flags;
+		unsigned char val;
+
+		if (!capable(CAP_SYS_TIME))
+			return -EACCES;
+
+		spin_lock_irqsave(&ds1286_lock, flags);
+		val = rtc_read(RTC_CMD);
+		val &=  ~RTC_TDM;
+		rtc_write(val, RTC_CMD);
+		spin_unlock_irqrestore(&ds1286_lock, flags);
+
+		return 0;
+	}
+	case RTC_WIE_OFF:	/* Mask watchdog int. enab. bit	*/
+	{
+		unsigned int flags;
+		unsigned char val;
+
+		if (!capable(CAP_SYS_TIME))
+			return -EACCES;
+
+		spin_lock_irqsave(&ds1286_lock, flags);
+		val = rtc_read(RTC_CMD);
+		val |= RTC_WAM;
+		rtc_write(val, RTC_CMD);
+		spin_unlock_irqrestore(&ds1286_lock, flags);
+
+		return 0;
+	}
+	case RTC_WIE_ON:	/* Allow watchdog interrupts.	*/
+	{
+		unsigned int flags;
+		unsigned char val;
+
+		if (!capable(CAP_SYS_TIME))
+			return -EACCES;
+
+		spin_lock_irqsave(&ds1286_lock, flags);
+		val = rtc_read(RTC_CMD);
+		val &= ~RTC_WAM;
+		rtc_write(val, RTC_CMD);
+		spin_unlock_irqrestore(&ds1286_lock, flags);
+
+		return 0;
+	}
+	case RTC_ALM_READ:	/* Read the present alarm time */
+	{
+		/*
+		 * This returns a struct rtc_time. Reading >= 0xc0
+		 * means "don't care" or "match all". Only the tm_hour,
+		 * tm_min, and tm_sec values are filled in.
+		 */
+
+		memset(&wtime, 0, sizeof(wtime));
+		ds1286_get_alm_time(&wtime);
+		break;
+	}
+	case RTC_ALM_SET:	/* Store a time into the alarm */
+	{
+		/*
+		 * This expects a struct rtc_time. Writing 0xff means
+		 * "don't care" or "match all". Only the tm_hour,
+		 * tm_min and tm_sec are used.
+		 */
+		unsigned char hrs, min, sec;
+		struct rtc_time alm_tm;
+
+		if (!capable(CAP_SYS_TIME))
+			return -EACCES;
+
+		if (copy_from_user(&alm_tm, (struct rtc_time*)arg,
+				   sizeof(struct rtc_time)))
+			return -EFAULT;
+
+		hrs = alm_tm.tm_hour;
+		min = alm_tm.tm_min;
+
+		if (hrs >= 24)
+			hrs = 0xff;
+
+		if (min >= 60)
+			min = 0xff;
+
+		BIN_TO_BCD(sec);
+		BIN_TO_BCD(min);
+		BIN_TO_BCD(hrs);
+
+		spin_lock(&ds1286_lock);
+		rtc_write(hrs, RTC_HOURS_ALARM);
+		rtc_write(min, RTC_MINUTES_ALARM);
+		spin_unlock(&ds1286_lock);
+
+		return 0;
+	}
+	case RTC_RD_TIME:	/* Read the time/date from RTC	*/
+	{
+		memset(&wtime, 0, sizeof(wtime));
+		ds1286_get_time(&wtime);
+		break;
+	}
+	case RTC_SET_TIME:	/* Set the RTC */
+	{
+		struct rtc_time rtc_tm;
+
+		if (!capable(CAP_SYS_TIME))
+			return -EACCES;
+
+		if (copy_from_user(&rtc_tm, (struct rtc_time*)arg,
+				   sizeof(struct rtc_time)))
+			return -EFAULT;
+
+		return ds1286_set_time(&rtc_tm);
+	}
+	default:
+		return -EINVAL;
+	}
+	return copy_to_user((void *)arg, &wtime, sizeof wtime) ? -EFAULT : 0;
+}
+
+/*
+ *	We enforce only one user at a time here with the open/close.
+ *	Also clear the previous interrupt data on an open, and clean
+ *	up things on a close.
+ */
+
+static int ds1286_open(struct inode *inode, struct file *file)
+{
+	spin_lock_irq(&ds1286_lock);
+
+	if (ds1286_status & RTC_IS_OPEN)
+		goto out_busy;
+
+	ds1286_status |= RTC_IS_OPEN;
+
+	spin_unlock_irq(&ds1286_lock);
+	return 0;
+
+out_busy:
+	spin_lock_irq(&ds1286_lock);
+	return -EBUSY;
+}
+
+static int ds1286_release(struct inode *inode, struct file *file)
+{
+	ds1286_status &= ~RTC_IS_OPEN;
+
+	return 0;
+}
+
+static unsigned int ds1286_poll(struct file *file, poll_table *wait)
+{
+	poll_wait(file, &ds1286_wait, wait);
+
+	return 0;
+}
+
+/*
+ *	The various file operations we support.
+ */
+
+static struct file_operations ds1286_fops = {
+	.llseek		= no_llseek,
+	.read		= ds1286_read,
+	.poll		= ds1286_poll,
+	.ioctl		= ds1286_ioctl,
+	.open		= ds1286_open,
+	.release	= ds1286_release,
+};
+
+static struct miscdevice ds1286_dev=
+{
+	.minor	= RTC_MINOR,
+	.name	= "rtc",
+	.fops	= &ds1286_fops,
+};
+
+static int __init ds1286_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "DS1286 Real Time Clock Driver v%s\n", DS1286_VERSION);
+
+	err = misc_register(&ds1286_dev);
+	if (err)
+		goto out;
+
+	if (!create_proc_read_entry("driver/rtc", 0, 0, ds1286_read_proc, NULL)) {
+		err = -ENOMEM;
+
+		goto out_deregister;
+	}
+
+	return 0;
+
+out_deregister:
+	misc_deregister(&ds1286_dev);
+
+out:
+	return err;
+}
+
+static void __exit ds1286_exit(void)
+{
+	remove_proc_entry("driver/rtc", NULL);
+	misc_deregister(&ds1286_dev);
+}
+
+static char *days[] = {
+	"***", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+};
+
+/*
+ *	Info exported via "/proc/rtc".
+ */
+static int ds1286_proc_output(char *buf)
+{
+	char *p, *s;
+	struct rtc_time tm;
+	unsigned char hundredth, month, cmd, amode;
+
+	p = buf;
+
+	ds1286_get_time(&tm);
+	hundredth = rtc_read(RTC_HUNDREDTH_SECOND);
+	BCD_TO_BIN(hundredth);
+
+	p += sprintf(p,
+	             "rtc_time\t: %02d:%02d:%02d.%02d\n"
+	             "rtc_date\t: %04d-%02d-%02d\n",
+		     tm.tm_hour, tm.tm_min, tm.tm_sec, hundredth,
+		     tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+
+	/*
+	 * We implicitly assume 24hr mode here. Alarm values >= 0xc0 will
+	 * match any value for that particular field. Values that are
+	 * greater than a valid time, but less than 0xc0 shouldn't appear.
+	 */
+	ds1286_get_alm_time(&tm);
+	p += sprintf(p, "alarm\t\t: %s ", days[tm.tm_wday]);
+	if (tm.tm_hour <= 24)
+		p += sprintf(p, "%02d:", tm.tm_hour);
+	else
+		p += sprintf(p, "**:");
+
+	if (tm.tm_min <= 59)
+		p += sprintf(p, "%02d\n", tm.tm_min);
+	else
+		p += sprintf(p, "**\n");
+
+	month = rtc_read(RTC_MONTH);
+	p += sprintf(p,
+	             "oscillator\t: %s\n"
+	             "square_wave\t: %s\n",
+	             (month & RTC_EOSC) ? "disabled" : "enabled",
+	             (month & RTC_ESQW) ? "disabled" : "enabled");
+
+	amode = ((rtc_read(RTC_MINUTES_ALARM) & 0x80) >> 5) |
+	        ((rtc_read(RTC_HOURS_ALARM) & 0x80) >> 6) |
+	        ((rtc_read(RTC_DAY_ALARM) & 0x80) >> 7);
+	if (amode == 7)      s = "each minute";
+	else if (amode == 3) s = "minutes match";
+	else if (amode == 1) s = "hours and minutes match";
+	else if (amode == 0) s = "days, hours and minutes match";
+	else                 s = "invalid";
+	p += sprintf(p, "alarm_mode\t: %s\n", s);
+
+	cmd = rtc_read(RTC_CMD);
+	p += sprintf(p,
+	             "alarm_enable\t: %s\n"
+	             "wdog_alarm\t: %s\n"
+	             "alarm_mask\t: %s\n"
+	             "wdog_alarm_mask\t: %s\n"
+	             "interrupt_mode\t: %s\n"
+	             "INTB_mode\t: %s_active\n"
+	             "interrupt_pins\t: %s\n",
+		     (cmd & RTC_TDF) ? "yes" : "no",
+		     (cmd & RTC_WAF) ? "yes" : "no",
+		     (cmd & RTC_TDM) ? "disabled" : "enabled",
+		     (cmd & RTC_WAM) ? "disabled" : "enabled",
+		     (cmd & RTC_PU_LVL) ? "pulse" : "level",
+		     (cmd & RTC_IBH_LO) ? "low" : "high",
+	             (cmd & RTC_IPSW) ? "unswapped" : "swapped");
+
+	return  p - buf;
+}
+
+static int ds1286_read_proc(char *page, char **start, off_t off,
+                         int count, int *eof, void *data)
+{
+	int len = ds1286_proc_output (page);
+	if (len <= off+count) *eof = 1;
+	*start = page + off;
+	len -= off;
+	if (len>count)
+		len = count;
+	if (len<0)
+		len = 0;
+
+	return len;
+}
+
+/*
+ * Returns true if a clock update is in progress
+ */
+static inline unsigned char ds1286_is_updating(void)
+{
+	return rtc_read(RTC_CMD) & RTC_TE;
+}
+
+
+static void ds1286_get_time(struct rtc_time *rtc_tm)
+{
+	unsigned char save_control;
+	unsigned int flags;
+	unsigned long uip_watchdog = jiffies;
+
+	/*
+	 * read RTC once any update in progress is done. The update
+	 * can take just over 2ms. We wait 10 to 20ms. There is no need to
+	 * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
+	 * If you need to know *exactly* when a second has started, enable
+	 * periodic update complete interrupts, (via ioctl) and then
+	 * immediately read /dev/rtc which will block until you get the IRQ.
+	 * Once the read clears, read the RTC time (again via ioctl). Easy.
+	 */
+
+	if (ds1286_is_updating() != 0)
+		while (jiffies - uip_watchdog < 2*HZ/100)
+			barrier();
+
+	/*
+	 * Only the values that we read from the RTC are set. We leave
+	 * tm_wday, tm_yday and tm_isdst untouched. Even though the
+	 * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
+	 * by the RTC when initially set to a non-zero value.
+	 */
+	spin_lock_irqsave(&ds1286_lock, flags);
+	save_control = rtc_read(RTC_CMD);
+	rtc_write((save_control|RTC_TE), RTC_CMD);
+
+	rtc_tm->tm_sec = rtc_read(RTC_SECONDS);
+	rtc_tm->tm_min = rtc_read(RTC_MINUTES);
+	rtc_tm->tm_hour = rtc_read(RTC_HOURS) & 0x3f;
+	rtc_tm->tm_mday = rtc_read(RTC_DATE);
+	rtc_tm->tm_mon = rtc_read(RTC_MONTH) & 0x1f;
+	rtc_tm->tm_year = rtc_read(RTC_YEAR);
+
+	rtc_write(save_control, RTC_CMD);
+	spin_unlock_irqrestore(&ds1286_lock, flags);
+
+	BCD_TO_BIN(rtc_tm->tm_sec);
+	BCD_TO_BIN(rtc_tm->tm_min);
+	BCD_TO_BIN(rtc_tm->tm_hour);
+	BCD_TO_BIN(rtc_tm->tm_mday);
+	BCD_TO_BIN(rtc_tm->tm_mon);
+	BCD_TO_BIN(rtc_tm->tm_year);
+
+	/*
+	 * Account for differences between how the RTC uses the values
+	 * and how they are defined in a struct rtc_time;
+	 */
+	if (rtc_tm->tm_year < 45)
+		rtc_tm->tm_year += 30;
+	if ((rtc_tm->tm_year += 40) < 70)
+		rtc_tm->tm_year += 100;
+
+	rtc_tm->tm_mon--;
+}
+
+static int ds1286_set_time(struct rtc_time *rtc_tm)
+{
+	unsigned char mon, day, hrs, min, sec, leap_yr;
+	unsigned char save_control;
+	unsigned int yrs, flags;
+
+
+	yrs = rtc_tm->tm_year + 1900;
+	mon = rtc_tm->tm_mon + 1;   /* tm_mon starts at zero */
+	day = rtc_tm->tm_mday;
+	hrs = rtc_tm->tm_hour;
+	min = rtc_tm->tm_min;
+	sec = rtc_tm->tm_sec;
+
+	if (yrs < 1970)
+		return -EINVAL;
+
+	leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400));
+
+	if ((mon > 12) || (day == 0))
+		return -EINVAL;
+
+	if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr)))
+		return -EINVAL;
+
+	if ((hrs >= 24) || (min >= 60) || (sec >= 60))
+		return -EINVAL;
+
+	if ((yrs -= 1940) > 255)    /* They are unsigned */
+		return -EINVAL;
+
+	if (yrs >= 100)
+		yrs -= 100;
+
+	BIN_TO_BCD(sec);
+	BIN_TO_BCD(min);
+	BIN_TO_BCD(hrs);
+	BIN_TO_BCD(day);
+	BIN_TO_BCD(mon);
+	BIN_TO_BCD(yrs);
+
+	spin_lock_irqsave(&ds1286_lock, flags);
+	save_control = rtc_read(RTC_CMD);
+	rtc_write((save_control|RTC_TE), RTC_CMD);
+
+	rtc_write(yrs, RTC_YEAR);
+	rtc_write(mon, RTC_MONTH);
+	rtc_write(day, RTC_DATE);
+	rtc_write(hrs, RTC_HOURS);
+	rtc_write(min, RTC_MINUTES);
+	rtc_write(sec, RTC_SECONDS);
+	rtc_write(0, RTC_HUNDREDTH_SECOND);
+
+	rtc_write(save_control, RTC_CMD);
+	spin_unlock_irqrestore(&ds1286_lock, flags);
+
+	return 0;
+}
+
+static void ds1286_get_alm_time(struct rtc_time *alm_tm)
+{
+	unsigned char cmd;
+	unsigned int flags;
+
+	/*
+	 * Only the values that we read from the RTC are set. That
+	 * means only tm_wday, tm_hour, tm_min.
+	 */
+	spin_lock_irqsave(&ds1286_lock, flags);
+	alm_tm->tm_min = rtc_read(RTC_MINUTES_ALARM) & 0x7f;
+	alm_tm->tm_hour = rtc_read(RTC_HOURS_ALARM)  & 0x1f;
+	alm_tm->tm_wday = rtc_read(RTC_DAY_ALARM)    & 0x07;
+	cmd = rtc_read(RTC_CMD);
+	spin_unlock_irqrestore(&ds1286_lock, flags);
+
+	BCD_TO_BIN(alm_tm->tm_min);
+	BCD_TO_BIN(alm_tm->tm_hour);
+	alm_tm->tm_sec = 0;
+}
+
+module_init(ds1286_init);
+module_exit(ds1286_exit);
+
+MODULE_AUTHOR("Ralf Baechle");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(RTC_MINOR);
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 1bccc0d19207..13b11270a9f2 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -65,7 +65,6 @@ static unsigned char misc_minors[DYNAMIC_MINORS / 8];
 
 extern int rtc_DP8570A_init(void);
 extern int rtc_MK48T08_init(void);
-extern int ds1286_init(void);
 extern int pmu_device_init(void);
 extern int tosh_init(void);
 extern int i8k_init(void);
@@ -314,9 +313,6 @@ static int __init misc_init(void)
 #ifdef CONFIG_BVME6000
 	rtc_DP8570A_init();
 #endif
-#ifdef CONFIG_SGI_DS1286
-	ds1286_init();
-#endif
 #ifdef CONFIG_PMAC_PBOOK
 	pmu_device_init();
 #endif
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 2066d2551c9f..a46bd705e93b 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -66,9 +66,6 @@ extern int get_filesystem_list(char *);
 extern int get_exec_domain_list(char *);
 extern int get_dma_list(char *);
 extern int get_locks_status (char *, char **, off_t, int);
-#ifdef CONFIG_SGI_DS1286
-extern int get_ds1286_status(char *);
-#endif
 
 static int proc_calc_metrics(char *page, char **start, off_t off,
 				 int count, int *eof, int len)
@@ -528,15 +525,6 @@ static int cmdline_read_proc(char *page, char **start, off_t off,
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-#ifdef CONFIG_SGI_DS1286
-static int ds1286_read_proc(char *page, char **start, off_t off,
-				 int count, int *eof, void *data)
-{
-	int len = get_ds1286_status(page);
-	return proc_calc_metrics(page, start, off, count, eof, len);
-}
-#endif
-
 static int locks_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -670,9 +658,6 @@ void __init proc_misc_init(void)
 		{"devices",	devices_read_proc},
 		{"filesystems",	filesystems_read_proc},
 		{"cmdline",	cmdline_read_proc},
-#ifdef CONFIG_SGI_DS1286
-		{"rtc",		ds1286_read_proc},
-#endif
 		{"locks",	locks_read_proc},
 		{"execdomains",	execdomains_read_proc},
 		{NULL,}
diff --git a/include/linux/ds1286.h b/include/linux/ds1286.h
new file mode 100644
index 000000000000..d8989860e4ce
--- /dev/null
+++ b/include/linux/ds1286.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 1998, 1999, 2003 Ralf Baechle
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __LINUX_DS1286_H
+#define __LINUX_DS1286_H
+
+#include <asm/ds1286.h>
+
+/**********************************************************************
+ * register summary
+ **********************************************************************/
+#define RTC_HUNDREDTH_SECOND	0
+#define RTC_SECONDS		1
+#define RTC_MINUTES		2
+#define RTC_MINUTES_ALARM	3
+#define RTC_HOURS		4
+#define RTC_HOURS_ALARM		5
+#define RTC_DAY			6
+#define RTC_DAY_ALARM		7
+#define RTC_DATE		8
+#define RTC_MONTH		9
+#define RTC_YEAR		10
+#define RTC_CMD			11
+#define RTC_WHSEC		12
+#define RTC_WSEC		13
+#define RTC_UNUSED		14
+
+/* RTC_*_alarm is always true if 2 MSBs are set */
+# define RTC_ALARM_DONT_CARE 	0xC0
+
+
+/*
+ * Bits in the month register
+ */
+#define RTC_EOSC		0x80
+#define RTC_ESQW		0x40
+
+/*
+ * Bits in the Command register
+ */
+#define RTC_TDF			0x01
+#define RTC_WAF			0x02
+#define RTC_TDM			0x04
+#define RTC_WAM			0x08
+#define RTC_PU_LVL		0x10
+#define RTC_IBH_LO		0x20
+#define RTC_IPSW		0x40
+#define RTC_TE			0x80
+
+#endif /* __LINUX_DS1286_H */
-- 
cgit v1.2.3


From 914d17491cfd3019e00b9f3271815da3815e6b56 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 20 Jun 2004 06:45:52 -0700
Subject: [PATCH] Fix idr.h comment

Fix path in <linux/idr.h> header file.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/idr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 8e2618e3f334..f41128683d65 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -1,5 +1,5 @@
 /*
- * include/linux/id.h
+ * include/linux/idr.h
  * 
  * 2002-10-18  written by Jim Houston jim.houston@ccur.com
  *	Copyright (C) 2002 by Concurrent Computer Corporation
-- 
cgit v1.2.3